blob: a372a60c3367f672a21efb56c8098482db98e82d [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
Robert Khasanov2ea081d2014-08-25 14:49:34 +000025 string VTName = "v" # NumElts # EltVT;
26
Adam Nemet5ed17da2014-08-21 19:50:07 +000027 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000028 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000029
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000032 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000034
35 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000036 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000037
38 // Size of RC in bits, e.g. 512 for VR512.
39 int Size = VT.Size;
40
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000043 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
44
45 // Load patterns
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
52 VTName)), VTName));
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
60 VTName,
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
63 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +000064
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
67}
68
Robert Khasanov2ea081d2014-08-25 14:49:34 +000069def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +000071def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
73
Robert Khasanov2ea081d2014-08-25 14:49:34 +000074// "x" in v32i8x_info means RC = VR256X
75def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
79
80def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
84
85class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
90}
91
92def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
93 v16i8x_info>;
94def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
95 v8i16x_info>;
96def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
97 v4i32x_info>;
98def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
99 v2i64x_info>;
100
101
Adam Nemet2e91ee52014-08-14 17:13:19 +0000102// Common base class of AVX512_masking and AVX512_masking_3src.
103multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
105 string OpcodeStr,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +0000110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
114
Adam Nemetfa1f7202014-08-07 23:18:18 +0000115 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +0000118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +0000120 [(set RC:$dst, MaskingRHS)]>,
121 EVEX_K {
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
124 }
Adam Nemet7d498622014-08-07 23:53:38 +0000125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +0000127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
129 [(set RC:$dst,
130 (vselect KRC:$mask, RHS,
131 (OpVT (bitconvert
132 (v16i32 immAllZerosV)))))]>,
133 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000134}
135
Adam Nemet2e91ee52014-08-14 17:13:19 +0000136// This multiclass generates the unconditional/non-masking, the masking and
137// the zero-masking variant of the instruction. In the masking case, the
138// perserved vector elements come from a new dummy input operand tied to $dst.
139multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
140 string OpcodeStr,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
143 RegisterClass KRC> :
144 AVX512_masking_common<O, F, Outs,
145 Ins,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
150 "$src0 = $dst">;
151
152// Similar to AVX512_masking but in this case one of the source operands
153// ($src1) is already tied to $dst so we just use that for the preserved
154// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
155// $src1.
156multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
157 string OpcodeStr,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
164 NonTiedIns)),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
166 NonTiedIns)),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
169
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000170// Bitcasts between 512-bit vector types. Return the original type since
171// no instruction is needed for the conversion
172let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000204
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
235
236// Bitcasts between 256-bit vector types. Return the original type since
237// no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
268}
269
270//
271// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
272//
273
274let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
278}
279
Craig Topperfb1746b2014-01-30 06:03:19 +0000280let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000281def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000284}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000285
286//===----------------------------------------------------------------------===//
287// AVX-512 - VECTOR INSERT
288//
289// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000290let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000291def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
295let mayLoad = 1 in
296def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
300}
301
302// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000303let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000304def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
308let mayLoad = 1 in
309def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
313}
314// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000315let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000316def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
320let mayLoad = 1 in
321def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000325}
326
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000327let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000328// -- 64x4 form --
329def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
333let mayLoad = 1 in
334def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
338}
339
340def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000352
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000353def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000357 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
366
367def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
379
380def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
393
394// vinsertps - insert f32 to XMM
395def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
396 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000399 EVEX_4V;
400def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
401 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
406
407//===----------------------------------------------------------------------===//
408// AVX-512 VECTOR EXTRACT
409//---
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000410let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000411// -- 32x4 form --
412def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
420
421// -- 64x4 form --
422def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
426let mayStore = 1 in
427def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
431}
432
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000433let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000434// -- 32x4 form --
435def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
443
444// -- 64x4 form --
445def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
449let mayStore = 1 in
450def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
454}
455
456def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
459
460def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
463
464def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
467
468def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
471
472
473def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
476
477def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
480
481def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
484
485def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
488
489// A 256-bit subvector extract from the first 512-bit vector position
490// is a subregister copy that needs no instruction.
491def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
499
500// zmm -> xmm
501def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
509
510
511// A 128-bit subvector insert to the first 512-bit vector position
512// is a subregister copy that needs no instruction.
513def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
516 sub_ymm)>;
517def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
520 sub_ymm)>;
521def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
524 sub_ymm)>;
525def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
528 sub_ymm)>;
529
530def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
538
539// vextractps - extract 32 bits from XMM
540def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
541 (ins VR128X:$src1, u32u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
544 EVEX;
545
546def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
547 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551
552//===---------------------------------------------------------------------===//
553// AVX-512 BROADCAST
554//---
555multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000560 []>, EVEX;
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000563}
564let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000566 VR128X, f32mem>,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
568}
569
570let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000572 VR128X, f64mem>,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
574}
575
576def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
580
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000581def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
585
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000586multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595 []>, EVEX, EVEX_V512, EVEX_KZ;
596}
597
598defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
600 VEX_W;
601
602def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
604
605def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
607
608def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000610def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000612def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000614def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000616
Cameron McInally394d5572013-10-31 13:56:31 +0000617def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
621
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000622def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
628
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000629multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
632 RegisterClass KRC> {
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000635 [(set DstRC:$dst,
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
638 VR128X:$src),
639 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000641 [(set DstRC:$dst,
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
643 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000644 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000647 [(set DstRC:$dst,
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
650 x86memop:$src),
651 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000655 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000656}
657
658defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
664
Adam Nemet73f72e12014-06-27 00:43:38 +0000665multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
667 RegisterClass KRC> {
668 let mayLoad = 1 in {
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
671 []>, EVEX;
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
673 x86memop:$src),
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
676 []>, EVEX, EVEX_KZ;
677 }
678}
679
680defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
686
Cameron McInally394d5572013-10-31 13:56:31 +0000687def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
691
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000692def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000696
697def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000701
702// Provide fallback in case the load node that is used in the patterns above
703// is used by additional users, which prevents the pattern selection.
704def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
708
709
710let Predicates = [HasAVX512] in {
711def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
712 (EXTRACT_SUBREG
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
715}
716//===----------------------------------------------------------------------===//
717// AVX-512 BROADCAST MASK TO VECTOR REGISTER
718//---
719
720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000725 []>, EVEX;
726}
727
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000728let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000729defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000733}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000734
735//===----------------------------------------------------------------------===//
736// AVX-512 - VPERM
737//
738// -- immediate form --
739multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000746 [(set RC:$dst,
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
748 EVEX;
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000753 [(set RC:$dst,
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
756}
757
758defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760let ExeDomain = SSEPackedDouble in
761defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
763
764// -- VPERM - register form --
765multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
767
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000772 [(set RC:$dst,
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
774
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000779 [(set RC:$dst,
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
781 EVEX_4V;
782}
783
784defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788let ExeDomain = SSEPackedSingle in
789defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791let ExeDomain = SSEPackedDouble in
792defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
794
795// -- VPERM2I - 3 source operands form --
796multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000799let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000804 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000806 EVEX_4V;
807
Adam Nemet2415a492014-07-02 21:25:54 +0000808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
815 RC:$src3),
816 RC:$src1)))]>,
817 EVEX_4V, EVEX_K;
818
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
827 RC:$src3),
828 (OpVT (bitconvert
829 (v16i32 immAllZerosV))))))]>,
830 EVEX_4V, EVEX_KZ;
831
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000836 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000837 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000838 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000839
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
845 [(set RC:$dst,
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
849 RC:$src1)))]>,
850 EVEX_4V, EVEX_K;
851
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
858 [(set RC:$dst,
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
862 (OpVT (bitconvert
863 (v16i32 immAllZerosV))))))]>,
864 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000865 }
866}
Adam Nemet2415a492014-07-02 21:25:54 +0000867defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000879
Adam Nemetefe9c982014-07-02 21:25:58 +0000880multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
885 OpVT, KRC> {
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000889
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000894}
895
896defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000899defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000902defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000905defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000908
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000909//===----------------------------------------------------------------------===//
910// AVX-512 - BLEND using mask
911//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000912multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000922 let mayLoad = 1 in
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000927 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000928}
929
930let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000931defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000932 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000936defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000937 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
940
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000941def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000944 VR512:$src1, VR512:$src2)>;
945
946def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000949 VR512:$src1, VR512:$src2)>;
950
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000951defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000955
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000956defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000960
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000961def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
965
966def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
970
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000971let Predicates = [HasAVX512] in {
972def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
974 (EXTRACT_SUBREG
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
978
979def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
981 (EXTRACT_SUBREG
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
985}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000986//===----------------------------------------------------------------------===//
987// Compare Instructions
988//===----------------------------------------------------------------------===//
989
990// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +00001002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1009 }
1010}
1011
1012let Predicates = [HasAVX512] in {
1013defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1016 XS;
1017defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1020 XD, VEX_W;
1021}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001022
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001023multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001025 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001030 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001031 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1044 let mayLoad = 1 in
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1051 (_.VT (bitconvert
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001054}
1055
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001056multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1057 X86VectorVTInfo _> {
1058 let mayLoad = 1 in {
1059 def rmb : AVX512BI<opc, MRMSrcMem,
1060 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1061 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1062 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1063 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1064 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1065 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1066 def rmbk : AVX512BI<opc, MRMSrcMem,
1067 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1068 _.ScalarMemOp:$src2),
1069 !strconcat(OpcodeStr,
1070 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1071 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1072 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1073 (OpNode (_.VT _.RC:$src1),
1074 (X86VBroadcast
1075 (_.ScalarLdFrag addr:$src2)))))],
1076 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1077 }
1078}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001079
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001080multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1081 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1082 let Predicates = [prd] in
1083 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1084 EVEX_V512;
1085
1086 let Predicates = [prd, HasVLX] in {
1087 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1088 EVEX_V256;
1089 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1090 EVEX_V128;
1091 }
1092}
1093
1094multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1095 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1096 Predicate prd> {
1097 let Predicates = [prd] in
1098 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1099 EVEX_V512;
1100
1101 let Predicates = [prd, HasVLX] in {
1102 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1103 EVEX_V256;
1104 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1105 EVEX_V128;
1106 }
1107}
1108
1109defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1110 avx512vl_i8_info, HasBWI>,
1111 EVEX_CD8<8, CD8VF>;
1112
1113defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1114 avx512vl_i16_info, HasBWI>,
1115 EVEX_CD8<16, CD8VF>;
1116
1117defm VPCMPEQD : avx512_icmp_packed_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1118 avx512vl_i32_info, HasAVX512>,
1119 avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1120 avx512vl_i32_info, HasAVX512>,
1121 EVEX_CD8<32, CD8VF>;
1122
1123defm VPCMPEQQ : avx512_icmp_packed_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1124 avx512vl_i64_info, HasAVX512>,
1125 avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1126 avx512vl_i64_info, HasAVX512>,
1127 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1128
1129defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1130 avx512vl_i8_info, HasBWI>,
1131 EVEX_CD8<8, CD8VF>;
1132
1133defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1134 avx512vl_i16_info, HasBWI>,
1135 EVEX_CD8<16, CD8VF>;
1136
1137defm VPCMPGTD : avx512_icmp_packed_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1138 avx512vl_i32_info, HasAVX512>,
1139 avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1140 avx512vl_i32_info, HasAVX512>,
1141 EVEX_CD8<32, CD8VF>;
1142
1143defm VPCMPGTQ : avx512_icmp_packed_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1144 avx512vl_i64_info, HasAVX512>,
1145 avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1146 avx512vl_i64_info, HasAVX512>,
1147 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001148
1149def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1150 (COPY_TO_REGCLASS (VPCMPGTDZrr
1151 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1152 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1153
1154def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1155 (COPY_TO_REGCLASS (VPCMPEQDZrr
1156 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1157 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1158
Adam Nemet79580db2014-07-08 00:22:32 +00001159multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001160 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
Adam Nemet1efcb902014-07-01 18:03:43 +00001161 SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001162 def rri : AVX512AIi8<opc, MRMSrcReg,
Adam Nemet1efcb902014-07-01 18:03:43 +00001163 (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
1164 !strconcat("vpcmp${cc}", Suffix,
1165 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001166 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
1167 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1168 def rmi : AVX512AIi8<opc, MRMSrcMem,
Adam Nemet1efcb902014-07-01 18:03:43 +00001169 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
1170 !strconcat("vpcmp${cc}", Suffix,
1171 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001172 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
1173 imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1174 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001175 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001176 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001177 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001178 !strconcat("vpcmp", Suffix,
1179 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1180 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Adam Nemet16de2482014-07-01 18:03:45 +00001181 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
Adam Nemet79580db2014-07-08 00:22:32 +00001182 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001183 !strconcat("vpcmp", Suffix,
1184 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1185 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001186 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001187 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001188 !strconcat("vpcmp", Suffix,
1189 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1190 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Adam Nemet16de2482014-07-01 18:03:45 +00001191 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
Adam Nemet79580db2014-07-08 00:22:32 +00001192 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001193 !strconcat("vpcmp", Suffix,
1194 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1195 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001196 }
1197}
1198
Adam Nemet79580db2014-07-08 00:22:32 +00001199defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
Adam Nemet1efcb902014-07-01 18:03:43 +00001200 X86cmpm, v16i32, AVXCC, "d">,
1201 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemet79580db2014-07-08 00:22:32 +00001202defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
Adam Nemet1efcb902014-07-01 18:03:43 +00001203 X86cmpmu, v16i32, AVXCC, "ud">,
1204 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001205
Adam Nemet79580db2014-07-08 00:22:32 +00001206defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
Adam Nemet1efcb902014-07-01 18:03:43 +00001207 X86cmpm, v8i64, AVXCC, "q">,
1208 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Adam Nemet79580db2014-07-08 00:22:32 +00001209defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
Adam Nemet1efcb902014-07-01 18:03:43 +00001210 X86cmpmu, v8i64, AVXCC, "uq">,
1211 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212
Adam Nemet905832b2014-06-26 00:21:12 +00001213// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001214multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001215 X86MemOperand x86memop, ValueType vt,
1216 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001217 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001218 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1219 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001220 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001221 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1222 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001223 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001224 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001225 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001226 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001227 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001228 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001229 !strconcat("vcmp${cc}", suffix,
1230 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001231 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001232 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001233
1234 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001235 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001236 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001237 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001238 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001239 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001240 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001241 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001242 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001243 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001244 }
1245}
1246
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001247defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001248 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001249 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001250defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001251 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001252 EVEX_CD8<64, CD8VF>;
1253
1254def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1255 (COPY_TO_REGCLASS (VCMPPSZrri
1256 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1257 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1258 imm:$cc), VK8)>;
1259def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1260 (COPY_TO_REGCLASS (VPCMPDZrri
1261 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1262 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1263 imm:$cc), VK8)>;
1264def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1265 (COPY_TO_REGCLASS (VPCMPUDZrri
1266 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1267 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1268 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001269
1270def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1271 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1272 FROUND_NO_EXC)),
1273 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001274 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001275
1276def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1277 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1278 FROUND_NO_EXC)),
1279 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001280 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001281
1282def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1283 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1284 FROUND_CURRENT)),
1285 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1286 (I8Imm imm:$cc)), GR16)>;
1287
1288def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1289 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1290 FROUND_CURRENT)),
1291 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1292 (I8Imm imm:$cc)), GR8)>;
1293
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001294// Mask register copy, including
1295// - copy between mask registers
1296// - load/store mask registers
1297// - copy from GPR to mask register and vice versa
1298//
1299multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1300 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001301 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001302 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001303 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001304 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001305 let mayLoad = 1 in
1306 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001307 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001308 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001309 let mayStore = 1 in
1310 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001311 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001312 }
1313}
1314
1315multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1316 string OpcodeStr,
1317 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001318 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001319 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001320 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001321 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001322 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001323 }
1324}
1325
Robert Khasanov74acbb72014-07-23 14:49:42 +00001326let Predicates = [HasDQI] in
1327 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1328 i8mem>,
1329 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1330 VEX, PD;
1331
1332let Predicates = [HasAVX512] in
1333 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1334 i16mem>,
1335 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001336 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001337
1338let Predicates = [HasBWI] in {
1339 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1340 i32mem>, VEX, PD, VEX_W;
1341 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1342 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001343}
1344
Robert Khasanov74acbb72014-07-23 14:49:42 +00001345let Predicates = [HasBWI] in {
1346 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1347 i64mem>, VEX, PS, VEX_W;
1348 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1349 VEX, XD, VEX_W;
1350}
1351
1352// GR from/to mask register
1353let Predicates = [HasDQI] in {
1354 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1355 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1356 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1357 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1358}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001359let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001360 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1361 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1362 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1363 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001364}
1365let Predicates = [HasBWI] in {
1366 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1367 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1368}
1369let Predicates = [HasBWI] in {
1370 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1371 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1372}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001373
Robert Khasanov74acbb72014-07-23 14:49:42 +00001374// Load/store kreg
1375let Predicates = [HasDQI] in {
1376 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1377 (KMOVBmk addr:$dst, VK8:$src)>;
1378}
1379let Predicates = [HasAVX512] in {
1380 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001381 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001382 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001383 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001384 def : Pat<(i1 (load addr:$src)),
1385 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001386 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001387 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001388}
1389let Predicates = [HasBWI] in {
1390 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1391 (KMOVDmk addr:$dst, VK32:$src)>;
1392}
1393let Predicates = [HasBWI] in {
1394 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1395 (KMOVQmk addr:$dst, VK64:$src)>;
1396}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001397
Robert Khasanov74acbb72014-07-23 14:49:42 +00001398let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001399 def : Pat<(i1 (trunc (i64 GR64:$src))),
1400 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1401 (i32 1))), VK1)>;
1402
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001403 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001404 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001405
1406 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001407 (COPY_TO_REGCLASS
1408 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1409 VK1)>;
1410 def : Pat<(i1 (trunc (i16 GR16:$src))),
1411 (COPY_TO_REGCLASS
1412 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1413 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001414
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001415 def : Pat<(i32 (zext VK1:$src)),
1416 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001417 def : Pat<(i8 (zext VK1:$src)),
1418 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001419 (AND32ri (KMOVWrk
1420 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001421 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001422 (AND64ri8 (SUBREG_TO_REG (i64 0),
1423 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001424 def : Pat<(i16 (zext VK1:$src)),
1425 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001426 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1427 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001428 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1429 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1430 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1431 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001432}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001433let Predicates = [HasBWI] in {
1434 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1435 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1436 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1437 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1438}
1439
1440
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001441// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1442let Predicates = [HasAVX512] in {
1443 // GR from/to 8-bit mask without native support
1444 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1445 (COPY_TO_REGCLASS
1446 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1447 VK8)>;
1448 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1449 (EXTRACT_SUBREG
1450 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1451 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001452
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001453 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001454 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001455 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001456 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001457}
1458let Predicates = [HasBWI] in {
1459 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1460 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1461 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1462 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001463}
1464
1465// Mask unary operation
1466// - KNOT
1467multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001468 RegisterClass KRC, SDPatternOperator OpNode,
1469 Predicate prd> {
1470 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001471 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001472 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001473 [(set KRC:$dst, (OpNode KRC:$src))]>;
1474}
1475
Robert Khasanov74acbb72014-07-23 14:49:42 +00001476multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1477 SDPatternOperator OpNode> {
1478 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1479 HasDQI>, VEX, PD;
1480 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1481 HasAVX512>, VEX, PS;
1482 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1483 HasBWI>, VEX, PD, VEX_W;
1484 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1485 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001486}
1487
Robert Khasanov74acbb72014-07-23 14:49:42 +00001488defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001489
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001490multiclass avx512_mask_unop_int<string IntName, string InstName> {
1491 let Predicates = [HasAVX512] in
1492 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1493 (i16 GR16:$src)),
1494 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1495 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1496}
1497defm : avx512_mask_unop_int<"knot", "KNOT">;
1498
Robert Khasanov74acbb72014-07-23 14:49:42 +00001499let Predicates = [HasDQI] in
1500def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1501let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001502def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001503let Predicates = [HasBWI] in
1504def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1505let Predicates = [HasBWI] in
1506def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1507
1508// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1509let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001510def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1511 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1512
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001513def : Pat<(not VK8:$src),
1514 (COPY_TO_REGCLASS
1515 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001516}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001517
1518// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001519// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001520multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001521 RegisterClass KRC, SDPatternOperator OpNode,
1522 Predicate prd> {
1523 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001524 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1525 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001526 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001527 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1528}
1529
Robert Khasanov595683d2014-07-28 13:46:45 +00001530multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1531 SDPatternOperator OpNode> {
1532 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1533 HasDQI>, VEX_4V, VEX_L, PD;
1534 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1535 HasAVX512>, VEX_4V, VEX_L, PS;
1536 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1537 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1538 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1539 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001540}
1541
1542def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1543def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1544
1545let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001546 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1547 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1548 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1549 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001550}
Robert Khasanov595683d2014-07-28 13:46:45 +00001551let isCommutable = 0 in
1552 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001553
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001554def : Pat<(xor VK1:$src1, VK1:$src2),
1555 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1556 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1557
1558def : Pat<(or VK1:$src1, VK1:$src2),
1559 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1560 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1561
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001562def : Pat<(and VK1:$src1, VK1:$src2),
1563 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1564 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1565
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001566multiclass avx512_mask_binop_int<string IntName, string InstName> {
1567 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001568 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1569 (i16 GR16:$src1), (i16 GR16:$src2)),
1570 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1571 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1572 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001573}
1574
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001575defm : avx512_mask_binop_int<"kand", "KAND">;
1576defm : avx512_mask_binop_int<"kandn", "KANDN">;
1577defm : avx512_mask_binop_int<"kor", "KOR">;
1578defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1579defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001580
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001581// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1582multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1583 let Predicates = [HasAVX512] in
1584 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1585 (COPY_TO_REGCLASS
1586 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1587 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1588}
1589
1590defm : avx512_binop_pat<and, KANDWrr>;
1591defm : avx512_binop_pat<andn, KANDNWrr>;
1592defm : avx512_binop_pat<or, KORWrr>;
1593defm : avx512_binop_pat<xnor, KXNORWrr>;
1594defm : avx512_binop_pat<xor, KXORWrr>;
1595
1596// Mask unpacking
1597multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001598 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001599 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001600 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001601 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001602 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001603}
1604
1605multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001606 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001607 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001608}
1609
1610defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001611def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1612 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1613 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1614
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001615
1616multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1617 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001618 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1619 (i16 GR16:$src1), (i16 GR16:$src2)),
1620 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1621 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1622 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001623}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001624defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001625
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001626// Mask bit testing
1627multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1628 SDNode OpNode> {
1629 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1630 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001631 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001632 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1633}
1634
1635multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1636 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001637 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001638}
1639
1640defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001641
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001642def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001643 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001644 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001645
1646// Mask shift
1647multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1648 SDNode OpNode> {
1649 let Predicates = [HasAVX512] in
1650 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1651 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001652 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001653 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1654}
1655
1656multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1657 SDNode OpNode> {
1658 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001659 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001660}
1661
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001662defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1663defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001664
1665// Mask setting all 0s or 1s
1666multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1667 let Predicates = [HasAVX512] in
1668 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1669 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1670 [(set KRC:$dst, (VT Val))]>;
1671}
1672
1673multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001674 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001675 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1676}
1677
1678defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1679defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1680
1681// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1682let Predicates = [HasAVX512] in {
1683 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1684 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001685 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1686 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1687 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001688}
1689def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1690 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1691
1692def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1693 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1694
1695def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1696 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1697
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001698def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1699 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1700
1701def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1702 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001703//===----------------------------------------------------------------------===//
1704// AVX-512 - Aligned and unaligned load and store
1705//
1706
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001707multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1708 RegisterClass KRC, RegisterClass RC,
1709 ValueType vt, ValueType zvt, X86MemOperand memop,
1710 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001711let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001712 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001713 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1714 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001715 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001716 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1717 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001718 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001719 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1720 SchedRW = [WriteLoad] in
1721 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1722 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1723 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1724 d>, EVEX;
1725
1726 let AddedComplexity = 20 in {
1727 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1728 let hasSideEffects = 0 in
1729 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1730 (ins RC:$src0, KRC:$mask, RC:$src1),
1731 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1732 "${dst} {${mask}}, $src1}"),
1733 [(set RC:$dst, (vt (vselect KRC:$mask,
1734 (vt RC:$src1),
1735 (vt RC:$src0))))],
1736 d>, EVEX, EVEX_K;
1737 let mayLoad = 1, SchedRW = [WriteLoad] in
1738 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1739 (ins RC:$src0, KRC:$mask, memop:$src1),
1740 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1741 "${dst} {${mask}}, $src1}"),
1742 [(set RC:$dst, (vt
1743 (vselect KRC:$mask,
1744 (vt (bitconvert (ld_frag addr:$src1))),
1745 (vt RC:$src0))))],
1746 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001747 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001748 let mayLoad = 1, SchedRW = [WriteLoad] in
1749 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1750 (ins KRC:$mask, memop:$src),
1751 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1752 "${dst} {${mask}} {z}, $src}"),
1753 [(set RC:$dst, (vt
1754 (vselect KRC:$mask,
1755 (vt (bitconvert (ld_frag addr:$src))),
1756 (vt (bitconvert (zvt immAllZerosV))))))],
1757 d>, EVEX, EVEX_KZ;
1758 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001759}
1760
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001761multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1762 string elty, string elsz, string vsz512,
1763 string vsz256, string vsz128, Domain d,
1764 Predicate prd, bit IsReMaterializable = 1> {
1765 let Predicates = [prd] in
1766 defm Z : avx512_load<opc, OpcodeStr,
1767 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1768 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1769 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1770 !cast<X86MemOperand>(elty##"512mem"), d,
1771 IsReMaterializable>, EVEX_V512;
1772
1773 let Predicates = [prd, HasVLX] in {
1774 defm Z256 : avx512_load<opc, OpcodeStr,
1775 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1776 "v"##vsz256##elty##elsz, "v4i64")),
1777 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1778 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1779 !cast<X86MemOperand>(elty##"256mem"), d,
1780 IsReMaterializable>, EVEX_V256;
1781
1782 defm Z128 : avx512_load<opc, OpcodeStr,
1783 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1784 "v"##vsz128##elty##elsz, "v2i64")),
1785 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1786 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1787 !cast<X86MemOperand>(elty##"128mem"), d,
1788 IsReMaterializable>, EVEX_V128;
1789 }
1790}
1791
1792
1793multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1794 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1795 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001796 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1797 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001798 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001799 EVEX;
1800 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001801 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1802 (ins RC:$src1, KRC:$mask, RC:$src2),
1803 !strconcat(OpcodeStr,
1804 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001805 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001806 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001807 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001808 !strconcat(OpcodeStr,
1809 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001810 [], d>, EVEX, EVEX_KZ;
1811 }
1812 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001813 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1814 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1815 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001816 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001817 (ins memop:$dst, KRC:$mask, RC:$src),
1818 !strconcat(OpcodeStr,
1819 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001820 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001821 }
1822}
1823
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001824
1825multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1826 string st_suff_512, string st_suff_256,
1827 string st_suff_128, string elty, string elsz,
1828 string vsz512, string vsz256, string vsz128,
1829 Domain d, Predicate prd> {
1830 let Predicates = [prd] in
1831 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1832 !cast<ValueType>("v"##vsz512##elty##elsz),
1833 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1834 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1835
1836 let Predicates = [prd, HasVLX] in {
1837 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1838 !cast<ValueType>("v"##vsz256##elty##elsz),
1839 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1840 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1841
1842 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1843 !cast<ValueType>("v"##vsz128##elty##elsz),
1844 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1845 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1846 }
1847}
1848
1849defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1850 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1851 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1852 "512", "256", "", "f", "32", "16", "8", "4",
1853 SSEPackedSingle, HasAVX512>,
1854 PS, EVEX_CD8<32, CD8VF>;
1855
1856defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1857 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1858 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1859 "512", "256", "", "f", "64", "8", "4", "2",
1860 SSEPackedDouble, HasAVX512>,
1861 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1862
1863defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1864 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1865 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1866 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1867 PS, EVEX_CD8<32, CD8VF>;
1868
1869defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1870 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1871 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1872 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1873 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1874
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001875def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001876 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001877 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001878
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001879def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1880 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1881 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001882
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001883def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1884 GR16:$mask),
1885 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1886 VR512:$src)>;
1887def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1888 GR8:$mask),
1889 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1890 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001891
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001892defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1893 "16", "8", "4", SSEPackedInt, HasAVX512>,
1894 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1895 "512", "256", "", "i", "32", "16", "8", "4",
1896 SSEPackedInt, HasAVX512>,
1897 PD, EVEX_CD8<32, CD8VF>;
1898
1899defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1900 "8", "4", "2", SSEPackedInt, HasAVX512>,
1901 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1902 "512", "256", "", "i", "64", "8", "4", "2",
1903 SSEPackedInt, HasAVX512>,
1904 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1905
1906defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1907 "64", "32", "16", SSEPackedInt, HasBWI>,
1908 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1909 "i", "8", "64", "32", "16", SSEPackedInt,
1910 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1911
1912defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1913 "32", "16", "8", SSEPackedInt, HasBWI>,
1914 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1915 "i", "16", "32", "16", "8", SSEPackedInt,
1916 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1917
1918defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1919 "16", "8", "4", SSEPackedInt, HasAVX512>,
1920 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1921 "i", "32", "16", "8", "4", SSEPackedInt,
1922 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1923
1924defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1925 "8", "4", "2", SSEPackedInt, HasAVX512>,
1926 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1927 "i", "64", "8", "4", "2", SSEPackedInt,
1928 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001929
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001930def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1931 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001932 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001933
1934def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001935 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
1936 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001937
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001938def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001939 GR16:$mask),
1940 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001941 VR512:$src)>;
1942def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001943 GR8:$mask),
1944 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001945 VR512:$src)>;
1946
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001947let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001948def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001949 (bc_v8i64 (v16i32 immAllZerosV)))),
1950 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001951
1952def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001953 (v8i64 VR512:$src))),
1954 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001955 VK8), VR512:$src)>;
1956
1957def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
1958 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001959 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001960
1961def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001962 (v16i32 VR512:$src))),
1963 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001964}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001965
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001966// Move Int Doubleword to Packed Double Int
1967//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001968def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001969 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001970 [(set VR128X:$dst,
1971 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
1972 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001973def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001974 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001975 [(set VR128X:$dst,
1976 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
1977 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001978def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001979 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001980 [(set VR128X:$dst,
1981 (v2i64 (scalar_to_vector GR64:$src)))],
1982 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00001983let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001984def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001985 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001986 [(set FR64:$dst, (bitconvert GR64:$src))],
1987 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001988def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001989 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001990 [(set GR64:$dst, (bitconvert FR64:$src))],
1991 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00001992}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001993def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001994 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001995 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
1996 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
1997 EVEX_CD8<64, CD8VT1>;
1998
1999// Move Int Doubleword to Single Scalar
2000//
Craig Topper88adf2a2013-10-12 05:41:08 +00002001let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002002def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002003 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002004 [(set FR32X:$dst, (bitconvert GR32:$src))],
2005 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2006
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002007def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002008 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002009 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2010 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002011}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002012
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002013// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002014//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002015def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002016 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002017 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2018 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2019 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002020def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002021 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002022 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002023 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2024 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2025 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2026
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002027// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002028//
2029def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002030 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002031 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2032 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00002033 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002034 Requires<[HasAVX512, In64BitMode]>;
2035
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00002036def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002037 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002038 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002039 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2040 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00002041 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002042 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2043
2044// Move Scalar Single to Double Int
2045//
Craig Topper88adf2a2013-10-12 05:41:08 +00002046let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002047def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002048 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002049 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002050 [(set GR32:$dst, (bitconvert FR32X:$src))],
2051 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002052def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002053 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002054 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002055 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2056 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002057}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002058
2059// Move Quadword Int to Packed Quadword Int
2060//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002061def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002062 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002063 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002064 [(set VR128X:$dst,
2065 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2066 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2067
2068//===----------------------------------------------------------------------===//
2069// AVX-512 MOVSS, MOVSD
2070//===----------------------------------------------------------------------===//
2071
2072multiclass avx512_move_scalar <string asm, RegisterClass RC,
2073 SDNode OpNode, ValueType vt,
2074 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002075 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002076 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002077 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002078 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2079 (scalar_to_vector RC:$src2))))],
2080 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002081 let Constraints = "$src1 = $dst" in
2082 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2083 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2084 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002085 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002086 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002087 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002088 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002089 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2090 EVEX, VEX_LIG;
2091 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002092 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002093 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2094 EVEX, VEX_LIG;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002095 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002096}
2097
2098let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002099defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002100 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2101
2102let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002103defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002104 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2105
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002106def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2107 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2108 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2109
2110def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2111 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2112 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002113
2114// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00002115let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002116 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2117 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002118 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002119 IIC_SSE_MOV_S_RR>,
2120 XS, EVEX_4V, VEX_LIG;
2121 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2122 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002123 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002124 IIC_SSE_MOV_S_RR>,
2125 XD, EVEX_4V, VEX_LIG, VEX_W;
2126}
2127
2128let Predicates = [HasAVX512] in {
2129 let AddedComplexity = 15 in {
2130 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2131 // MOVS{S,D} to the lower bits.
2132 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2133 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2134 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2135 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2136 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2137 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2138 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2139 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2140
2141 // Move low f32 and clear high bits.
2142 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2143 (SUBREG_TO_REG (i32 0),
2144 (VMOVSSZrr (v4f32 (V_SET0)),
2145 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2146 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2147 (SUBREG_TO_REG (i32 0),
2148 (VMOVSSZrr (v4i32 (V_SET0)),
2149 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2150 }
2151
2152 let AddedComplexity = 20 in {
2153 // MOVSSrm zeros the high parts of the register; represent this
2154 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2155 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2156 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2157 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2158 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2159 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2160 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2161
2162 // MOVSDrm zeros the high parts of the register; represent this
2163 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2164 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2165 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2166 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2167 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2168 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2169 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2170 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2171 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2172 def : Pat<(v2f64 (X86vzload addr:$src)),
2173 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2174
2175 // Represent the same patterns above but in the form they appear for
2176 // 256-bit types
2177 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2178 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002179 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002180 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2181 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2182 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2183 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2184 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2185 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2186 }
2187 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2188 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2189 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2190 FR32X:$src)), sub_xmm)>;
2191 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2192 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2193 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2194 FR64X:$src)), sub_xmm)>;
2195 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2196 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002197 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002198
2199 // Move low f64 and clear high bits.
2200 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2201 (SUBREG_TO_REG (i32 0),
2202 (VMOVSDZrr (v2f64 (V_SET0)),
2203 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2204
2205 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2206 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2207 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2208
2209 // Extract and store.
2210 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2211 addr:$dst),
2212 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2213 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2214 addr:$dst),
2215 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2216
2217 // Shuffle with VMOVSS
2218 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2219 (VMOVSSZrr (v4i32 VR128X:$src1),
2220 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2221 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2222 (VMOVSSZrr (v4f32 VR128X:$src1),
2223 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2224
2225 // 256-bit variants
2226 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2227 (SUBREG_TO_REG (i32 0),
2228 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2229 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2230 sub_xmm)>;
2231 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2232 (SUBREG_TO_REG (i32 0),
2233 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2234 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2235 sub_xmm)>;
2236
2237 // Shuffle with VMOVSD
2238 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2239 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2240 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2241 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2242 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2243 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2244 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2245 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2246
2247 // 256-bit variants
2248 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2249 (SUBREG_TO_REG (i32 0),
2250 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2251 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2252 sub_xmm)>;
2253 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2254 (SUBREG_TO_REG (i32 0),
2255 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2256 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2257 sub_xmm)>;
2258
2259 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2260 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2261 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2262 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2263 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2264 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2265 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2266 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2267}
2268
2269let AddedComplexity = 15 in
2270def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2271 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002272 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002273 [(set VR128X:$dst, (v2i64 (X86vzmovl
2274 (v2i64 VR128X:$src))))],
2275 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2276
2277let AddedComplexity = 20 in
2278def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2279 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002280 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002281 [(set VR128X:$dst, (v2i64 (X86vzmovl
2282 (loadv2i64 addr:$src))))],
2283 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2284 EVEX_CD8<8, CD8VT8>;
2285
2286let Predicates = [HasAVX512] in {
2287 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2288 let AddedComplexity = 20 in {
2289 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2290 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002291 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2292 (VMOV64toPQIZrr GR64:$src)>;
2293 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2294 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002295
2296 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2297 (VMOVDI2PDIZrm addr:$src)>;
2298 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2299 (VMOVDI2PDIZrm addr:$src)>;
2300 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2301 (VMOVZPQILo2PQIZrm addr:$src)>;
2302 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2303 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002304 def : Pat<(v2i64 (X86vzload addr:$src)),
2305 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002306 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002307
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002308 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2309 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2310 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2311 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2312 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2313 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2314 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2315}
2316
2317def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2318 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2319
2320def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2321 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2322
2323def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2324 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2325
2326def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2327 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2328
2329//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002330// AVX-512 - Non-temporals
2331//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002332let SchedRW = [WriteLoad] in {
2333 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2334 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2335 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2336 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2337 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002338
Robert Khasanoved882972014-08-13 10:46:00 +00002339 let Predicates = [HasAVX512, HasVLX] in {
2340 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2341 (ins i256mem:$src),
2342 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2343 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2344 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002345
Robert Khasanoved882972014-08-13 10:46:00 +00002346 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2347 (ins i128mem:$src),
2348 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2349 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2350 EVEX_CD8<64, CD8VF>;
2351 }
Adam Nemetefd07852014-06-18 16:51:10 +00002352}
2353
Robert Khasanoved882972014-08-13 10:46:00 +00002354multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2355 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2356 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2357 let SchedRW = [WriteStore], mayStore = 1,
2358 AddedComplexity = 400 in
2359 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2360 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2361 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2362}
2363
2364multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2365 string elty, string elsz, string vsz512,
2366 string vsz256, string vsz128, Domain d,
2367 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2368 let Predicates = [prd] in
2369 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2370 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2371 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2372 EVEX_V512;
2373
2374 let Predicates = [prd, HasVLX] in {
2375 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2376 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2377 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2378 EVEX_V256;
2379
2380 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2381 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2382 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2383 EVEX_V128;
2384 }
2385}
2386
2387defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2388 "i", "64", "8", "4", "2", SSEPackedInt,
2389 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2390
2391defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2392 "f", "64", "8", "4", "2", SSEPackedDouble,
2393 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2394
2395defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2396 "f", "32", "16", "8", "4", SSEPackedSingle,
2397 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2398
Adam Nemet7f62b232014-06-10 16:39:53 +00002399//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002400// AVX-512 - Integer arithmetic
2401//
2402multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002403 ValueType OpVT, RegisterClass KRC,
2404 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002405 X86MemOperand x86memop, PatFrag scalar_mfrag,
2406 X86MemOperand x86scalar_mop, string BrdcstStr,
2407 OpndItins itins, bit IsCommutable = 0> {
2408 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002409 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2410 (ins RC:$src1, RC:$src2),
2411 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2412 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2413 itins.rr>, EVEX_4V;
2414 let AddedComplexity = 30 in {
2415 let Constraints = "$src0 = $dst" in
2416 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2417 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2418 !strconcat(OpcodeStr,
2419 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2420 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2421 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2422 RC:$src0)))],
2423 itins.rr>, EVEX_4V, EVEX_K;
2424 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2425 (ins KRC:$mask, RC:$src1, RC:$src2),
2426 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2427 "|$dst {${mask}} {z}, $src1, $src2}"),
2428 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2429 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2430 (OpVT immAllZerosV))))],
2431 itins.rr>, EVEX_4V, EVEX_KZ;
2432 }
2433
2434 let mayLoad = 1 in {
2435 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2436 (ins RC:$src1, x86memop:$src2),
2437 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2438 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2439 itins.rm>, EVEX_4V;
2440 let AddedComplexity = 30 in {
2441 let Constraints = "$src0 = $dst" in
2442 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2443 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2444 !strconcat(OpcodeStr,
2445 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2446 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2447 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2448 RC:$src0)))],
2449 itins.rm>, EVEX_4V, EVEX_K;
2450 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2451 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2452 !strconcat(OpcodeStr,
2453 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2454 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2455 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2456 (OpVT immAllZerosV))))],
2457 itins.rm>, EVEX_4V, EVEX_KZ;
2458 }
2459 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2460 (ins RC:$src1, x86scalar_mop:$src2),
2461 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2462 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2463 [(set RC:$dst, (OpNode RC:$src1,
2464 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2465 itins.rm>, EVEX_4V, EVEX_B;
2466 let AddedComplexity = 30 in {
2467 let Constraints = "$src0 = $dst" in
2468 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2469 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2470 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2471 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2472 BrdcstStr, "}"),
2473 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2474 (OpNode (OpVT RC:$src1),
2475 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2476 RC:$src0)))],
2477 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2478 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2479 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2480 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2481 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2482 BrdcstStr, "}"),
2483 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2484 (OpNode (OpVT RC:$src1),
2485 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2486 (OpVT immAllZerosV))))],
2487 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2488 }
2489 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002490}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002491
2492multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2493 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2494 PatFrag memop_frag, X86MemOperand x86memop,
2495 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2496 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002497 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002498 {
2499 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002500 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002501 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002502 []>, EVEX_4V;
2503 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2504 (ins KRC:$mask, RC:$src1, RC:$src2),
2505 !strconcat(OpcodeStr,
2506 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2507 [], itins.rr>, EVEX_4V, EVEX_K;
2508 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2509 (ins KRC:$mask, RC:$src1, RC:$src2),
2510 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2511 "|$dst {${mask}} {z}, $src1, $src2}"),
2512 [], itins.rr>, EVEX_4V, EVEX_KZ;
2513 }
2514 let mayLoad = 1 in {
2515 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2516 (ins RC:$src1, x86memop:$src2),
2517 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2518 []>, EVEX_4V;
2519 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2520 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2521 !strconcat(OpcodeStr,
2522 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2523 [], itins.rm>, EVEX_4V, EVEX_K;
2524 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2525 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2526 !strconcat(OpcodeStr,
2527 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2528 [], itins.rm>, EVEX_4V, EVEX_KZ;
2529 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2530 (ins RC:$src1, x86scalar_mop:$src2),
2531 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2532 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2533 [], itins.rm>, EVEX_4V, EVEX_B;
2534 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2535 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2536 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2537 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2538 BrdcstStr, "}"),
2539 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2540 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2541 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2542 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2543 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2544 BrdcstStr, "}"),
2545 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2546 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002547}
2548
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002549defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2550 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2551 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002552
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002553defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2554 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2555 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002556
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002557defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2558 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2559 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002560
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002561defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2562 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2563 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002564
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002565defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2566 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2567 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002568
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002569defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2570 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2571 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2572 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002573
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002574defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2575 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2576 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002577
2578def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2579 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2580
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002581def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2582 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2583 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2584def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2585 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2586 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2587
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002588defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2589 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2590 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002591 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002592defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2593 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2594 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002595 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002596
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002597defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2598 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2599 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002600 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002601defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2602 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2603 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002604 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002605
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002606defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2607 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2608 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002609 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002610defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2611 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2612 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002613 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002614
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002615defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2616 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2617 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002618 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002619defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2620 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2621 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002622 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002623
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002624def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2625 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2626 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2627def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2628 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2629 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2630def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2631 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2632 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2633def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2634 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2635 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2636def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2637 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2638 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2639def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2640 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2641 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2642def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2643 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2644 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2645def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2646 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2647 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002648//===----------------------------------------------------------------------===//
2649// AVX-512 - Unpack Instructions
2650//===----------------------------------------------------------------------===//
2651
2652multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2653 PatFrag mem_frag, RegisterClass RC,
2654 X86MemOperand x86memop, string asm,
2655 Domain d> {
2656 def rr : AVX512PI<opc, MRMSrcReg,
2657 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2658 asm, [(set RC:$dst,
2659 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002660 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002661 def rm : AVX512PI<opc, MRMSrcMem,
2662 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2663 asm, [(set RC:$dst,
2664 (vt (OpNode RC:$src1,
2665 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002666 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002667}
2668
2669defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2670 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002671 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002672defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2673 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002674 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002675defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2676 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002677 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002678defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2679 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002680 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002681
2682multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2683 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2684 X86MemOperand x86memop> {
2685 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2686 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002687 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002688 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2689 IIC_SSE_UNPCK>, EVEX_4V;
2690 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2691 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002692 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002693 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2694 (bitconvert (memop_frag addr:$src2)))))],
2695 IIC_SSE_UNPCK>, EVEX_4V;
2696}
2697defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2698 VR512, memopv16i32, i512mem>, EVEX_V512,
2699 EVEX_CD8<32, CD8VF>;
2700defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2701 VR512, memopv8i64, i512mem>, EVEX_V512,
2702 VEX_W, EVEX_CD8<64, CD8VF>;
2703defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2704 VR512, memopv16i32, i512mem>, EVEX_V512,
2705 EVEX_CD8<32, CD8VF>;
2706defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2707 VR512, memopv8i64, i512mem>, EVEX_V512,
2708 VEX_W, EVEX_CD8<64, CD8VF>;
2709//===----------------------------------------------------------------------===//
2710// AVX-512 - PSHUFD
2711//
2712
2713multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2714 SDNode OpNode, PatFrag mem_frag,
2715 X86MemOperand x86memop, ValueType OpVT> {
2716 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2717 (ins RC:$src1, i8imm:$src2),
2718 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002719 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002720 [(set RC:$dst,
2721 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2722 EVEX;
2723 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2724 (ins x86memop:$src1, i8imm:$src2),
2725 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002726 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002727 [(set RC:$dst,
2728 (OpVT (OpNode (mem_frag addr:$src1),
2729 (i8 imm:$src2))))]>, EVEX;
2730}
2731
2732defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002733 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002734
2735let ExeDomain = SSEPackedSingle in
2736defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002737 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002738 EVEX_CD8<32, CD8VF>;
2739let ExeDomain = SSEPackedDouble in
2740defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002741 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002742 VEX_W, EVEX_CD8<32, CD8VF>;
2743
2744def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2745 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2746def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2747 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2748
2749//===----------------------------------------------------------------------===//
2750// AVX-512 Logical Instructions
2751//===----------------------------------------------------------------------===//
2752
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002753defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002754 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2755 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002756defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002757 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2758 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002759defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002760 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2761 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002762defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002763 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2764 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002765defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002766 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2767 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002768defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002769 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2770 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002771defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002772 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2773 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002774defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2775 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2776 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002777
2778//===----------------------------------------------------------------------===//
2779// AVX-512 FP arithmetic
2780//===----------------------------------------------------------------------===//
2781
2782multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2783 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002784 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002785 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2786 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002787 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002788 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2789 EVEX_CD8<64, CD8VT1>;
2790}
2791
2792let isCommutable = 1 in {
2793defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2794defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2795defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2796defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2797}
2798let isCommutable = 0 in {
2799defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2800defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2801}
2802
2803multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002804 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002805 RegisterClass RC, ValueType vt,
2806 X86MemOperand x86memop, PatFrag mem_frag,
2807 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2808 string BrdcstStr,
2809 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002810 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002811 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002812 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002813 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002814 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002815
2816 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2817 !strconcat(OpcodeStr,
2818 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2819 [], itins.rr, d>, EVEX_4V, EVEX_K;
2820
2821 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2822 !strconcat(OpcodeStr,
2823 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2824 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2825 }
2826
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002827 let mayLoad = 1 in {
2828 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002829 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002830 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002831 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002832
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002833 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2834 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002835 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002836 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002837 [(set RC:$dst, (OpNode RC:$src1,
2838 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002839 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002840
2841 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2842 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2843 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2844 [], itins.rm, d>, EVEX_4V, EVEX_K;
2845
2846 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2847 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2848 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2849 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2850
2851 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2852 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2853 " \t{${src2}", BrdcstStr,
2854 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2855 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2856
2857 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2858 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2859 " \t{${src2}", BrdcstStr,
2860 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2861 BrdcstStr, "}"),
2862 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2863 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002864}
2865
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002866defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002867 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002868 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002869
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002870defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002871 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2872 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002873 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002874
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002875defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002876 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002877 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002878defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002879 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2880 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002881 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002882
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002883defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002884 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2885 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002886 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002887defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002888 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2889 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002890 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002891
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002892defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002893 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2894 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002895 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002896defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002897 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2898 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002899 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002900
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002901defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002902 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002903 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002904defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002905 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002906 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002907
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002908defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002909 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2910 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002911 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002912defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002913 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2914 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002915 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002916
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00002917def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2918 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2919 (i16 -1), FROUND_CURRENT)),
2920 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2921
2922def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2923 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2924 (i8 -1), FROUND_CURRENT)),
2925 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
2926
2927def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
2928 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2929 (i16 -1), FROUND_CURRENT)),
2930 (VMINPSZrr VR512:$src1, VR512:$src2)>;
2931
2932def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
2933 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2934 (i8 -1), FROUND_CURRENT)),
2935 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002936//===----------------------------------------------------------------------===//
2937// AVX-512 VPTESTM instructions
2938//===----------------------------------------------------------------------===//
2939
2940multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2941 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
2942 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002943 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002944 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002945 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002946 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
2947 SSEPackedInt>, EVEX_4V;
2948 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002949 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002950 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002951 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002952 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002953}
2954
2955defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002956 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002957 EVEX_CD8<32, CD8VF>;
2958defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002959 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002960 EVEX_CD8<64, CD8VF>;
2961
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002962let Predicates = [HasCDI] in {
2963defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
2964 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
2965 EVEX_CD8<32, CD8VF>;
2966defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002967 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002968 EVEX_CD8<64, CD8VF>;
2969}
2970
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00002971def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
2972 (v16i32 VR512:$src2), (i16 -1))),
2973 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
2974
2975def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
2976 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002977 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002978//===----------------------------------------------------------------------===//
2979// AVX-512 Shift instructions
2980//===----------------------------------------------------------------------===//
2981multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
2982 string OpcodeStr, SDNode OpNode, RegisterClass RC,
2983 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
2984 RegisterClass KRC> {
2985 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002986 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002987 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00002988 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002989 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
2990 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002991 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002992 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002993 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002994 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
2995 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002996 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002997 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002998 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00002999 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003000 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003001 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003002 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003003 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003004 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3005}
3006
3007multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3008 RegisterClass RC, ValueType vt, ValueType SrcVT,
3009 PatFrag bc_frag, RegisterClass KRC> {
3010 // src2 is always 128-bit
3011 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3012 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003013 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003014 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3015 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3016 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3017 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3018 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003019 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003020 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3021 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3022 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003023 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003024 [(set RC:$dst, (vt (OpNode RC:$src1,
3025 (bc_frag (memopv2i64 addr:$src2)))))],
3026 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3027 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3028 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3029 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003030 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003031 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3032}
3033
3034defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3035 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3036 EVEX_V512, EVEX_CD8<32, CD8VF>;
3037defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3038 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3039 EVEX_CD8<32, CD8VQ>;
3040
3041defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3042 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3043 EVEX_CD8<64, CD8VF>, VEX_W;
3044defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3045 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3046 EVEX_CD8<64, CD8VQ>, VEX_W;
3047
3048defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3049 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3050 EVEX_CD8<32, CD8VF>;
3051defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3052 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3053 EVEX_CD8<32, CD8VQ>;
3054
3055defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3056 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3057 EVEX_CD8<64, CD8VF>, VEX_W;
3058defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3059 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3060 EVEX_CD8<64, CD8VQ>, VEX_W;
3061
3062defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3063 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3064 EVEX_V512, EVEX_CD8<32, CD8VF>;
3065defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3066 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3067 EVEX_CD8<32, CD8VQ>;
3068
3069defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3070 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3071 EVEX_CD8<64, CD8VF>, VEX_W;
3072defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3073 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3074 EVEX_CD8<64, CD8VQ>, VEX_W;
3075
3076//===-------------------------------------------------------------------===//
3077// Variable Bit Shifts
3078//===-------------------------------------------------------------------===//
3079multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3080 RegisterClass RC, ValueType vt,
3081 X86MemOperand x86memop, PatFrag mem_frag> {
3082 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3083 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003084 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003085 [(set RC:$dst,
3086 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3087 EVEX_4V;
3088 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3089 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003090 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003091 [(set RC:$dst,
3092 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3093 EVEX_4V;
3094}
3095
3096defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3097 i512mem, memopv16i32>, EVEX_V512,
3098 EVEX_CD8<32, CD8VF>;
3099defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3100 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3101 EVEX_CD8<64, CD8VF>;
3102defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3103 i512mem, memopv16i32>, EVEX_V512,
3104 EVEX_CD8<32, CD8VF>;
3105defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3106 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3107 EVEX_CD8<64, CD8VF>;
3108defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3109 i512mem, memopv16i32>, EVEX_V512,
3110 EVEX_CD8<32, CD8VF>;
3111defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3112 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3113 EVEX_CD8<64, CD8VF>;
3114
3115//===----------------------------------------------------------------------===//
3116// AVX-512 - MOVDDUP
3117//===----------------------------------------------------------------------===//
3118
3119multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3120 X86MemOperand x86memop, PatFrag memop_frag> {
3121def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003122 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003123 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3124def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003125 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003126 [(set RC:$dst,
3127 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3128}
3129
3130defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3131 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3132def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3133 (VMOVDDUPZrm addr:$src)>;
3134
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003135//===---------------------------------------------------------------------===//
3136// Replicate Single FP - MOVSHDUP and MOVSLDUP
3137//===---------------------------------------------------------------------===//
3138multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3139 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3140 X86MemOperand x86memop> {
3141 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003142 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003143 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3144 let mayLoad = 1 in
3145 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003146 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003147 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3148}
3149
3150defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3151 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3152 EVEX_CD8<32, CD8VF>;
3153defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3154 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3155 EVEX_CD8<32, CD8VF>;
3156
3157def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3158def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3159 (VMOVSHDUPZrm addr:$src)>;
3160def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3161def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3162 (VMOVSLDUPZrm addr:$src)>;
3163
3164//===----------------------------------------------------------------------===//
3165// Move Low to High and High to Low packed FP Instructions
3166//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003167def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3168 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003169 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003170 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3171 IIC_SSE_MOV_LH>, EVEX_4V;
3172def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3173 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003174 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003175 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3176 IIC_SSE_MOV_LH>, EVEX_4V;
3177
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003178let Predicates = [HasAVX512] in {
3179 // MOVLHPS patterns
3180 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3181 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3182 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3183 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003184
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003185 // MOVHLPS patterns
3186 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3187 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3188}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003189
3190//===----------------------------------------------------------------------===//
3191// FMA - Fused Multiply Operations
3192//
3193let Constraints = "$src1 = $dst" in {
3194multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3195 RegisterClass RC, X86MemOperand x86memop,
3196 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003197 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3198 RegisterClass KRC> {
3199 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3200 (ins RC:$src2, RC:$src3),
3201 OpcodeStr, "$src3, $src2", "$src2, $src3",
3202 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3203 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003204
3205 let mayLoad = 1 in
3206 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3207 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003208 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003209 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3210 (mem_frag addr:$src3))))]>;
3211 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3212 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003213 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003214 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3215 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3216 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3217}
3218} // Constraints = "$src1 = $dst"
3219
3220let ExeDomain = SSEPackedSingle in {
3221 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3222 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003223 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003224 EVEX_CD8<32, CD8VF>;
3225 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3226 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003227 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003228 EVEX_CD8<32, CD8VF>;
3229 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3230 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003231 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003232 EVEX_V512, EVEX_CD8<32, CD8VF>;
3233 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3234 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003235 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003236 EVEX_V512, EVEX_CD8<32, CD8VF>;
3237 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3238 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003239 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003240 EVEX_CD8<32, CD8VF>;
3241 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3242 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003243 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003244 EVEX_CD8<32, CD8VF>;
3245}
3246let ExeDomain = SSEPackedDouble in {
3247 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3248 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003249 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003250 VEX_W, EVEX_CD8<64, CD8VF>;
3251 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3252 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003253 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003254 EVEX_CD8<64, CD8VF>;
3255 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3256 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003257 X86Fmaddsub, v8f64, VK8WM>,
3258 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003259 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3260 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003261 X86Fmsubadd, v8f64, VK8WM>,
3262 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003263 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3264 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003265 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003266 EVEX_CD8<64, CD8VF>;
3267 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3268 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003269 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003270 EVEX_CD8<64, CD8VF>;
3271}
3272
3273let Constraints = "$src1 = $dst" in {
3274multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3275 RegisterClass RC, X86MemOperand x86memop,
3276 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3277 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3278 let mayLoad = 1 in
3279 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3280 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003281 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003282 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3283 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3284 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003285 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003286 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3287 [(set RC:$dst, (OpNode RC:$src1,
3288 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3289}
3290} // Constraints = "$src1 = $dst"
3291
3292
3293let ExeDomain = SSEPackedSingle in {
3294 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3295 memopv16f32, f32mem, loadf32, "{1to16}",
3296 X86Fmadd, v16f32>, EVEX_V512,
3297 EVEX_CD8<32, CD8VF>;
3298 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3299 memopv16f32, f32mem, loadf32, "{1to16}",
3300 X86Fmsub, v16f32>, EVEX_V512,
3301 EVEX_CD8<32, CD8VF>;
3302 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3303 memopv16f32, f32mem, loadf32, "{1to16}",
3304 X86Fmaddsub, v16f32>,
3305 EVEX_V512, EVEX_CD8<32, CD8VF>;
3306 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3307 memopv16f32, f32mem, loadf32, "{1to16}",
3308 X86Fmsubadd, v16f32>,
3309 EVEX_V512, EVEX_CD8<32, CD8VF>;
3310 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3311 memopv16f32, f32mem, loadf32, "{1to16}",
3312 X86Fnmadd, v16f32>, EVEX_V512,
3313 EVEX_CD8<32, CD8VF>;
3314 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3315 memopv16f32, f32mem, loadf32, "{1to16}",
3316 X86Fnmsub, v16f32>, EVEX_V512,
3317 EVEX_CD8<32, CD8VF>;
3318}
3319let ExeDomain = SSEPackedDouble in {
3320 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3321 memopv8f64, f64mem, loadf64, "{1to8}",
3322 X86Fmadd, v8f64>, EVEX_V512,
3323 VEX_W, EVEX_CD8<64, CD8VF>;
3324 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3325 memopv8f64, f64mem, loadf64, "{1to8}",
3326 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3327 EVEX_CD8<64, CD8VF>;
3328 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3329 memopv8f64, f64mem, loadf64, "{1to8}",
3330 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3331 EVEX_CD8<64, CD8VF>;
3332 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3333 memopv8f64, f64mem, loadf64, "{1to8}",
3334 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3335 EVEX_CD8<64, CD8VF>;
3336 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3337 memopv8f64, f64mem, loadf64, "{1to8}",
3338 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3339 EVEX_CD8<64, CD8VF>;
3340 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3341 memopv8f64, f64mem, loadf64, "{1to8}",
3342 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3343 EVEX_CD8<64, CD8VF>;
3344}
3345
3346// Scalar FMA
3347let Constraints = "$src1 = $dst" in {
3348multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3349 RegisterClass RC, ValueType OpVT,
3350 X86MemOperand x86memop, Operand memop,
3351 PatFrag mem_frag> {
3352 let isCommutable = 1 in
3353 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3354 (ins RC:$src1, RC:$src2, RC:$src3),
3355 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003356 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003357 [(set RC:$dst,
3358 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3359 let mayLoad = 1 in
3360 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3361 (ins RC:$src1, RC:$src2, f128mem:$src3),
3362 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003363 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003364 [(set RC:$dst,
3365 (OpVT (OpNode RC:$src2, RC:$src1,
3366 (mem_frag addr:$src3))))]>;
3367}
3368
3369} // Constraints = "$src1 = $dst"
3370
Elena Demikhovskycf088092013-12-11 14:31:04 +00003371defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003372 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003373defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003374 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003375defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003376 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003377defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003378 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003379defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003380 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003381defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003382 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003383defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003384 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003385defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003386 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3387
3388//===----------------------------------------------------------------------===//
3389// AVX-512 Scalar convert from sign integer to float/double
3390//===----------------------------------------------------------------------===//
3391
3392multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3393 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003394let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003395 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003396 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003397 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003398 let mayLoad = 1 in
3399 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3400 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003401 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003402 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003403} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003404}
Andrew Trick15a47742013-10-09 05:11:10 +00003405let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003406defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003407 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003408defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003409 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003410defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003411 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003412defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003413 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3414
3415def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3416 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3417def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003418 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003419def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3420 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3421def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003422 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003423
3424def : Pat<(f32 (sint_to_fp GR32:$src)),
3425 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3426def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003427 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003428def : Pat<(f64 (sint_to_fp GR32:$src)),
3429 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3430def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003431 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3432
Elena Demikhovskycf088092013-12-11 14:31:04 +00003433defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003434 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003435defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003436 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003437defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003438 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003439defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003440 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3441
3442def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3443 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3444def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3445 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3446def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3447 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3448def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3449 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3450
3451def : Pat<(f32 (uint_to_fp GR32:$src)),
3452 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3453def : Pat<(f32 (uint_to_fp GR64:$src)),
3454 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3455def : Pat<(f64 (uint_to_fp GR32:$src)),
3456 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3457def : Pat<(f64 (uint_to_fp GR64:$src)),
3458 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003459}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003460
3461//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003462// AVX-512 Scalar convert from float/double to integer
3463//===----------------------------------------------------------------------===//
3464multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3465 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3466 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003467let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003468 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003469 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003470 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3471 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003472 let mayLoad = 1 in
3473 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003474 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003475 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003476} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003477}
3478let Predicates = [HasAVX512] in {
3479// Convert float/double to signed/unsigned int 32/64
3480defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003481 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003482 XS, EVEX_CD8<32, CD8VT1>;
3483defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003484 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003485 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3486defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003487 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003488 XS, EVEX_CD8<32, CD8VT1>;
3489defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3490 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003491 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003492 EVEX_CD8<32, CD8VT1>;
3493defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003494 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003495 XD, EVEX_CD8<64, CD8VT1>;
3496defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003497 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003498 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3499defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003500 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003501 XD, EVEX_CD8<64, CD8VT1>;
3502defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3503 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003504 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003505 EVEX_CD8<64, CD8VT1>;
3506
Craig Topper9dd48c82014-01-02 17:28:14 +00003507let isCodeGenOnly = 1 in {
3508 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3509 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3510 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3511 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3512 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3513 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3514 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3515 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3516 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3517 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3518 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3519 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003520
Craig Topper9dd48c82014-01-02 17:28:14 +00003521 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3522 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3523 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3524 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3525 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3526 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3527 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3528 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3529 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3530 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3531 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3532 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3533} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003534
3535// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003536let isCodeGenOnly = 1 in {
3537 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3538 ssmem, sse_load_f32, "cvttss2si">,
3539 XS, EVEX_CD8<32, CD8VT1>;
3540 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3541 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3542 "cvttss2si">, XS, VEX_W,
3543 EVEX_CD8<32, CD8VT1>;
3544 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3545 sdmem, sse_load_f64, "cvttsd2si">, XD,
3546 EVEX_CD8<64, CD8VT1>;
3547 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3548 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3549 "cvttsd2si">, XD, VEX_W,
3550 EVEX_CD8<64, CD8VT1>;
3551 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3552 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3553 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3554 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3555 int_x86_avx512_cvttss2usi64, ssmem,
3556 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3557 EVEX_CD8<32, CD8VT1>;
3558 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3559 int_x86_avx512_cvttsd2usi,
3560 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3561 EVEX_CD8<64, CD8VT1>;
3562 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3563 int_x86_avx512_cvttsd2usi64, sdmem,
3564 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3565 EVEX_CD8<64, CD8VT1>;
3566} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003567
3568multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3569 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3570 string asm> {
3571 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003572 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003573 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3574 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003575 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003576 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3577}
3578
3579defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003580 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003581 EVEX_CD8<32, CD8VT1>;
3582defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003583 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003584 EVEX_CD8<32, CD8VT1>;
3585defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003586 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003587 EVEX_CD8<32, CD8VT1>;
3588defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003589 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003590 EVEX_CD8<32, CD8VT1>;
3591defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003592 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003593 EVEX_CD8<64, CD8VT1>;
3594defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003595 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003596 EVEX_CD8<64, CD8VT1>;
3597defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003598 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003599 EVEX_CD8<64, CD8VT1>;
3600defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003601 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003602 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003603} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003604//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003605// AVX-512 Convert form float to double and back
3606//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003607let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003608def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3609 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003610 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003611 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3612let mayLoad = 1 in
3613def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3614 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003615 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003616 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3617 EVEX_CD8<32, CD8VT1>;
3618
3619// Convert scalar double to scalar single
3620def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3621 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003622 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003623 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3624let mayLoad = 1 in
3625def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3626 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003627 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003628 []>, EVEX_4V, VEX_LIG, VEX_W,
3629 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3630}
3631
3632def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3633 Requires<[HasAVX512]>;
3634def : Pat<(fextend (loadf32 addr:$src)),
3635 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3636
3637def : Pat<(extloadf32 addr:$src),
3638 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3639 Requires<[HasAVX512, OptForSize]>;
3640
3641def : Pat<(extloadf32 addr:$src),
3642 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3643 Requires<[HasAVX512, OptForSpeed]>;
3644
3645def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3646 Requires<[HasAVX512]>;
3647
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003648multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003649 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3650 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3651 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003652let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003653 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003654 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003655 [(set DstRC:$dst,
3656 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003657 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003658 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003659 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003660 let mayLoad = 1 in
3661 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003662 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003663 [(set DstRC:$dst,
3664 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003665} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003666}
3667
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003668multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003669 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3670 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3671 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003672let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003673 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003674 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003675 [(set DstRC:$dst,
3676 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3677 let mayLoad = 1 in
3678 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003679 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003680 [(set DstRC:$dst,
3681 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003682} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003683}
3684
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003685defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003686 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003687 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003688 EVEX_CD8<64, CD8VF>;
3689
3690defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3691 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003692 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003693 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003694def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3695 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003696
3697def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3698 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3699 (VCVTPD2PSZrr VR512:$src)>;
3700
3701def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3702 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3703 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003704
3705//===----------------------------------------------------------------------===//
3706// AVX-512 Vector convert from sign integer to float/double
3707//===----------------------------------------------------------------------===//
3708
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003709defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003710 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003711 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003712 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003713
3714defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3715 memopv4i64, i256mem, v8f64, v8i32,
3716 SSEPackedDouble>, EVEX_V512, XS,
3717 EVEX_CD8<32, CD8VH>;
3718
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003719defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003720 memopv16f32, f512mem, v16i32, v16f32,
3721 SSEPackedSingle>, EVEX_V512, XS,
3722 EVEX_CD8<32, CD8VF>;
3723
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003724defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003725 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003726 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003727 EVEX_CD8<64, CD8VF>;
3728
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003729defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003730 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003731 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003732 EVEX_CD8<32, CD8VF>;
3733
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003734// cvttps2udq (src, 0, mask-all-ones, sae-current)
3735def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3736 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3737 (VCVTTPS2UDQZrr VR512:$src)>;
3738
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003739defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003740 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003741 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003742 EVEX_CD8<64, CD8VF>;
3743
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003744// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3745def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3746 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3747 (VCVTTPD2UDQZrr VR512:$src)>;
3748
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003749defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3750 memopv4i64, f256mem, v8f64, v8i32,
3751 SSEPackedDouble>, EVEX_V512, XS,
3752 EVEX_CD8<32, CD8VH>;
3753
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003754defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003755 memopv16i32, f512mem, v16f32, v16i32,
3756 SSEPackedSingle>, EVEX_V512, XD,
3757 EVEX_CD8<32, CD8VF>;
3758
3759def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3760 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3761 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3762
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003763def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3764 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3765 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3766
3767def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3768 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3769 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3770
3771def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3772 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3773 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003774
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003775def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3776 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3777 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3778
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003779def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003780 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003781 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003782def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3783 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3784 (VCVTDQ2PDZrr VR256X:$src)>;
3785def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3786 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3787 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3788def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3789 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3790 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003791
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003792multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3793 RegisterClass DstRC, PatFrag mem_frag,
3794 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003795let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003796 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003797 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003798 [], d>, EVEX;
3799 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003800 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003801 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003802 let mayLoad = 1 in
3803 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003804 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003805 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003806} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003807}
3808
3809defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003810 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003811 EVEX_V512, EVEX_CD8<32, CD8VF>;
3812defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3813 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3814 EVEX_V512, EVEX_CD8<64, CD8VF>;
3815
3816def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3817 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3818 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3819
3820def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3821 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3822 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3823
3824defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3825 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003826 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003827defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3828 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003829 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003830
3831def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3832 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3833 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3834
3835def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3836 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3837 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003838
3839let Predicates = [HasAVX512] in {
3840 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3841 (VCVTPD2PSZrm addr:$src)>;
3842 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3843 (VCVTPS2PDZrm addr:$src)>;
3844}
3845
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003846//===----------------------------------------------------------------------===//
3847// Half precision conversion instructions
3848//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003849multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3850 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003851 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3852 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003853 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003854 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003855 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3856 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3857}
3858
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003859multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3860 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003861 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3862 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003863 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3864 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003865 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003866 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3867 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003868 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003869}
3870
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003871defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003872 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003873defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003874 EVEX_CD8<32, CD8VH>;
3875
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003876def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3877 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3878 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3879
3880def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3881 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3882 (VCVTPH2PSZrr VR256X:$src)>;
3883
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003884let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3885 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003886 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003887 EVEX_CD8<32, CD8VT1>;
3888 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00003889 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003890 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3891 let Pattern = []<dag> in {
3892 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00003893 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003894 EVEX_CD8<32, CD8VT1>;
3895 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00003896 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003897 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3898 }
Craig Topper9dd48c82014-01-02 17:28:14 +00003899 let isCodeGenOnly = 1 in {
3900 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003901 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003902 EVEX_CD8<32, CD8VT1>;
3903 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003904 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003905 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003906
Craig Topper9dd48c82014-01-02 17:28:14 +00003907 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003908 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003909 EVEX_CD8<32, CD8VT1>;
3910 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003911 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003912 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3913 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003914}
3915
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003916/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3917multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3918 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003919 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003920 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3921 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003922 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003923 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003924 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003925 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3926 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003927 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003928 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003929 }
3930}
3931}
3932
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003933defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
3934 EVEX_CD8<32, CD8VT1>;
3935defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
3936 VEX_W, EVEX_CD8<64, CD8VT1>;
3937defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
3938 EVEX_CD8<32, CD8VT1>;
3939defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
3940 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003941
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003942def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
3943 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3944 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3945 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003946
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003947def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
3948 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3949 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3950 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003951
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003952def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
3953 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3954 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3955 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003956
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003957def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
3958 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3959 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3960 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003961
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003962/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
3963multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3964 RegisterClass RC, X86MemOperand x86memop,
3965 PatFrag mem_frag, ValueType OpVt> {
3966 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3967 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003968 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003969 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
3970 EVEX;
3971 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003972 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003973 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
3974 EVEX;
3975}
3976defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
3977 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3978defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
3979 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3980defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
3981 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3982defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
3983 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3984
3985def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
3986 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3987 (VRSQRT14PSZr VR512:$src)>;
3988def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
3989 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3990 (VRSQRT14PDZr VR512:$src)>;
3991
3992def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
3993 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3994 (VRCP14PSZr VR512:$src)>;
3995def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
3996 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3997 (VRCP14PDZr VR512:$src)>;
3998
3999/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4000multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4001 X86MemOperand x86memop> {
4002 let hasSideEffects = 0, Predicates = [HasERI] in {
4003 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4004 (ins RC:$src1, RC:$src2),
4005 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004006 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004007 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4008 (ins RC:$src1, RC:$src2),
4009 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004010 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004011 []>, EVEX_4V, EVEX_B;
4012 let mayLoad = 1 in {
4013 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4014 (ins RC:$src1, x86memop:$src2),
4015 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004016 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004017 }
4018}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004019}
4020
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004021defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4022 EVEX_CD8<32, CD8VT1>;
4023defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4024 VEX_W, EVEX_CD8<64, CD8VT1>;
4025defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4026 EVEX_CD8<32, CD8VT1>;
4027defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4028 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004029
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004030def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4031 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4032 FROUND_NO_EXC)),
4033 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4034 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4035
4036def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4037 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4038 FROUND_NO_EXC)),
4039 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4040 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4041
4042def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4043 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4044 FROUND_NO_EXC)),
4045 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4046 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4047
4048def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4049 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4050 FROUND_NO_EXC)),
4051 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4052 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4053
4054/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4055multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4056 RegisterClass RC, X86MemOperand x86memop> {
4057 let hasSideEffects = 0, Predicates = [HasERI] in {
4058 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4059 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004060 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004061 []>, EVEX;
4062 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4063 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004064 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004065 []>, EVEX, EVEX_B;
4066 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004067 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004068 []>, EVEX;
4069 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004070}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004071defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4072 EVEX_V512, EVEX_CD8<32, CD8VF>;
4073defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4074 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4075defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4076 EVEX_V512, EVEX_CD8<32, CD8VF>;
4077defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4078 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4079
4080def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4081 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4082 (VRSQRT28PSZrb VR512:$src)>;
4083def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4084 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4085 (VRSQRT28PDZrb VR512:$src)>;
4086
4087def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4088 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4089 (VRCP28PSZrb VR512:$src)>;
4090def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4091 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4092 (VRCP28PDZrb VR512:$src)>;
4093
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004094multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004095 OpndItins itins_s, OpndItins itins_d> {
4096 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004097 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004098 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4099 EVEX, EVEX_V512;
4100
4101 let mayLoad = 1 in
4102 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004103 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004104 [(set VR512:$dst,
4105 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4106 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4107
4108 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004109 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004110 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4111 EVEX, EVEX_V512;
4112
4113 let mayLoad = 1 in
4114 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004115 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004116 [(set VR512:$dst, (OpNode
4117 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4118 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4119
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004120}
4121
4122multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4123 Intrinsic F32Int, Intrinsic F64Int,
4124 OpndItins itins_s, OpndItins itins_d> {
4125 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4126 (ins FR32X:$src1, FR32X:$src2),
4127 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004128 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004129 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00004130 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004131 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4132 (ins VR128X:$src1, VR128X:$src2),
4133 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004134 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004135 [(set VR128X:$dst,
4136 (F32Int VR128X:$src1, VR128X:$src2))],
4137 itins_s.rr>, XS, EVEX_4V;
4138 let mayLoad = 1 in {
4139 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4140 (ins FR32X:$src1, f32mem:$src2),
4141 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004142 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004143 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004144 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004145 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4146 (ins VR128X:$src1, ssmem:$src2),
4147 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004148 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004149 [(set VR128X:$dst,
4150 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4151 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4152 }
4153 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4154 (ins FR64X:$src1, FR64X:$src2),
4155 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004156 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004157 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004158 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004159 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4160 (ins VR128X:$src1, VR128X:$src2),
4161 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004162 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004163 [(set VR128X:$dst,
4164 (F64Int VR128X:$src1, VR128X:$src2))],
4165 itins_s.rr>, XD, EVEX_4V, VEX_W;
4166 let mayLoad = 1 in {
4167 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4168 (ins FR64X:$src1, f64mem:$src2),
4169 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004170 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004171 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004172 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004173 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4174 (ins VR128X:$src1, sdmem:$src2),
4175 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004176 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004177 [(set VR128X:$dst,
4178 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4179 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4180 }
4181}
4182
4183
4184defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4185 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4186 SSE_SQRTSS, SSE_SQRTSD>,
4187 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004188 SSE_SQRTPS, SSE_SQRTPD>;
4189
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004190let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004191 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4192 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4193 (VSQRTPSZrr VR512:$src1)>;
4194 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4195 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4196 (VSQRTPDZrr VR512:$src1)>;
4197
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004198 def : Pat<(f32 (fsqrt FR32X:$src)),
4199 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4200 def : Pat<(f32 (fsqrt (load addr:$src))),
4201 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4202 Requires<[OptForSize]>;
4203 def : Pat<(f64 (fsqrt FR64X:$src)),
4204 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4205 def : Pat<(f64 (fsqrt (load addr:$src))),
4206 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4207 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004208
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004209 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004210 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004211 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004212 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004213 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004214
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004215 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004216 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004217 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004218 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004219 Requires<[OptForSize]>;
4220
4221 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4222 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4223 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4224 VR128X)>;
4225 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4226 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4227
4228 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4229 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4230 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4231 VR128X)>;
4232 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4233 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4234}
4235
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004236
4237multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4238 X86MemOperand x86memop, RegisterClass RC,
4239 PatFrag mem_frag32, PatFrag mem_frag64,
4240 Intrinsic V4F32Int, Intrinsic V2F64Int,
4241 CD8VForm VForm> {
4242let ExeDomain = SSEPackedSingle in {
4243 // Intrinsic operation, reg.
4244 // Vector intrinsic operation, reg
4245 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4246 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4247 !strconcat(OpcodeStr,
4248 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4249 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4250
4251 // Vector intrinsic operation, mem
4252 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4253 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4254 !strconcat(OpcodeStr,
4255 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4256 [(set RC:$dst,
4257 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4258 EVEX_CD8<32, VForm>;
4259} // ExeDomain = SSEPackedSingle
4260
4261let ExeDomain = SSEPackedDouble in {
4262 // Vector intrinsic operation, reg
4263 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4264 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4265 !strconcat(OpcodeStr,
4266 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4267 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4268
4269 // Vector intrinsic operation, mem
4270 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4271 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4272 !strconcat(OpcodeStr,
4273 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4274 [(set RC:$dst,
4275 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4276 EVEX_CD8<64, VForm>;
4277} // ExeDomain = SSEPackedDouble
4278}
4279
4280multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4281 string OpcodeStr,
4282 Intrinsic F32Int,
4283 Intrinsic F64Int> {
4284let ExeDomain = GenericDomain in {
4285 // Operation, reg.
4286 let hasSideEffects = 0 in
4287 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4288 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4289 !strconcat(OpcodeStr,
4290 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4291 []>;
4292
4293 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004294 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004295 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4296 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4297 !strconcat(OpcodeStr,
4298 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4299 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4300
4301 // Intrinsic operation, mem.
4302 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4303 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4304 !strconcat(OpcodeStr,
4305 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4306 [(set VR128X:$dst, (F32Int VR128X:$src1,
4307 sse_load_f32:$src2, imm:$src3))]>,
4308 EVEX_CD8<32, CD8VT1>;
4309
4310 // Operation, reg.
4311 let hasSideEffects = 0 in
4312 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4313 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4314 !strconcat(OpcodeStr,
4315 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4316 []>, VEX_W;
4317
4318 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004319 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004320 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4321 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4322 !strconcat(OpcodeStr,
4323 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4324 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4325 VEX_W;
4326
4327 // Intrinsic operation, mem.
4328 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4329 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4330 !strconcat(OpcodeStr,
4331 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4332 [(set VR128X:$dst,
4333 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4334 VEX_W, EVEX_CD8<64, CD8VT1>;
4335} // ExeDomain = GenericDomain
4336}
4337
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004338multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4339 X86MemOperand x86memop, RegisterClass RC,
4340 PatFrag mem_frag, Domain d> {
4341let ExeDomain = d in {
4342 // Intrinsic operation, reg.
4343 // Vector intrinsic operation, reg
4344 def r : AVX512AIi8<opc, MRMSrcReg,
4345 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4346 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004347 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004348 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004349
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004350 // Vector intrinsic operation, mem
4351 def m : AVX512AIi8<opc, MRMSrcMem,
4352 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4353 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004354 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004355 []>, EVEX;
4356} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004357}
4358
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004359
4360defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4361 memopv16f32, SSEPackedSingle>, EVEX_V512,
4362 EVEX_CD8<32, CD8VF>;
4363
4364def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004365 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004366 FROUND_CURRENT)),
4367 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4368
4369
4370defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4371 memopv8f64, SSEPackedDouble>, EVEX_V512,
4372 VEX_W, EVEX_CD8<64, CD8VF>;
4373
4374def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004375 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004376 FROUND_CURRENT)),
4377 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4378
4379multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4380 Operand x86memop, RegisterClass RC, Domain d> {
4381let ExeDomain = d in {
4382 def r : AVX512AIi8<opc, MRMSrcReg,
4383 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4384 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004385 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004386 []>, EVEX_4V;
4387
4388 def m : AVX512AIi8<opc, MRMSrcMem,
4389 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4390 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004391 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004392 []>, EVEX_4V;
4393} // ExeDomain
4394}
4395
4396defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4397 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4398
4399defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4400 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4401
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004402def : Pat<(ffloor FR32X:$src),
4403 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4404def : Pat<(f64 (ffloor FR64X:$src)),
4405 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4406def : Pat<(f32 (fnearbyint FR32X:$src)),
4407 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4408def : Pat<(f64 (fnearbyint FR64X:$src)),
4409 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4410def : Pat<(f32 (fceil FR32X:$src)),
4411 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4412def : Pat<(f64 (fceil FR64X:$src)),
4413 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4414def : Pat<(f32 (frint FR32X:$src)),
4415 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4416def : Pat<(f64 (frint FR64X:$src)),
4417 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4418def : Pat<(f32 (ftrunc FR32X:$src)),
4419 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4420def : Pat<(f64 (ftrunc FR64X:$src)),
4421 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4422
4423def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004424 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004425def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004426 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004427def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004428 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004429def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004430 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004431def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004432 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004433
4434def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004435 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004436def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004437 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004438def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004439 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004440def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004441 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004442def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004443 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004444
4445//-------------------------------------------------
4446// Integer truncate and extend operations
4447//-------------------------------------------------
4448
4449multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4450 RegisterClass dstRC, RegisterClass srcRC,
4451 RegisterClass KRC, X86MemOperand x86memop> {
4452 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4453 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004454 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004455 []>, EVEX;
4456
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004457 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4458 (ins KRC:$mask, srcRC:$src),
4459 !strconcat(OpcodeStr,
4460 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4461 []>, EVEX, EVEX_K;
4462
4463 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004464 (ins KRC:$mask, srcRC:$src),
4465 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004466 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004467 []>, EVEX, EVEX_KZ;
4468
4469 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004470 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004471 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004472
4473 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4474 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4475 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4476 []>, EVEX, EVEX_K;
4477
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004478}
4479defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4480 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4481defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4482 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4483defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4484 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4485defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4486 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4487defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4488 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4489defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4490 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4491defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4492 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4493defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4494 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4495defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4496 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4497defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4498 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4499defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4500 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4501defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4502 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4503defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4504 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4505defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4506 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4507defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4508 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4509
4510def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4511def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4512def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4513def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4514def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4515
4516def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004517 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004518def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004519 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004520def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004521 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004522def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004523 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004524
4525
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004526multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4527 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4528 PatFrag mem_frag, X86MemOperand x86memop,
4529 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004530
4531 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4532 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004533 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004534 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004535
4536 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4537 (ins KRC:$mask, SrcRC:$src),
4538 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4539 []>, EVEX, EVEX_K;
4540
4541 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4542 (ins KRC:$mask, SrcRC:$src),
4543 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4544 []>, EVEX, EVEX_KZ;
4545
4546 let mayLoad = 1 in {
4547 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004548 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004549 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004550 [(set DstRC:$dst,
4551 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4552 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004553
4554 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4555 (ins KRC:$mask, x86memop:$src),
4556 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4557 []>,
4558 EVEX, EVEX_K;
4559
4560 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4561 (ins KRC:$mask, x86memop:$src),
4562 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4563 []>,
4564 EVEX, EVEX_KZ;
4565 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004566}
4567
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004568defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004569 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4570 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004571defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004572 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4573 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004574defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004575 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4576 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004577defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004578 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4579 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004580defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004581 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4582 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004583
4584defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004585 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4586 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004587defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004588 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4589 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004590defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004591 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4592 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004593defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004594 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4595 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004596defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004597 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4598 EVEX_CD8<32, CD8VH>;
4599
4600//===----------------------------------------------------------------------===//
4601// GATHER - SCATTER Operations
4602
4603multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4604 RegisterClass RC, X86MemOperand memop> {
4605let mayLoad = 1,
4606 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4607 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4608 (ins RC:$src1, KRC:$mask, memop:$src2),
4609 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004610 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004611 []>, EVEX, EVEX_K;
4612}
Cameron McInally45325962014-03-26 13:50:50 +00004613
4614let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004615defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4616 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004617defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4618 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004619}
4620
4621let ExeDomain = SSEPackedSingle in {
4622defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4623 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004624defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4625 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004626}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004627
4628defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4629 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4630defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4631 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4632
4633defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4634 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4635defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4636 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4637
4638multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4639 RegisterClass RC, X86MemOperand memop> {
4640let mayStore = 1, Constraints = "$mask = $mask_wb" in
4641 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4642 (ins memop:$dst, KRC:$mask, RC:$src2),
4643 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004644 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004645 []>, EVEX, EVEX_K;
4646}
4647
Cameron McInally45325962014-03-26 13:50:50 +00004648let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004649defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4650 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004651defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4652 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004653}
4654
4655let ExeDomain = SSEPackedSingle in {
4656defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4657 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004658defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4659 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004660}
4661
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004662defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4663 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4664defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4665 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4666
4667defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4668 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4669defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4670 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4671
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004672// prefetch
4673multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4674 RegisterClass KRC, X86MemOperand memop> {
4675 let Predicates = [HasPFI], hasSideEffects = 1 in
4676 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4677 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4678 []>, EVEX, EVEX_K;
4679}
4680
4681defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4682 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4683
4684defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4685 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4686
4687defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4688 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4689
4690defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4691 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4692
4693defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4694 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4695
4696defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4697 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4698
4699defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4700 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4701
4702defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4703 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4704
4705defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4706 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4707
4708defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4709 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4710
4711defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4712 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4713
4714defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4715 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4716
4717defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4718 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4719
4720defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4721 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4722
4723defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4724 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4725
4726defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4727 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004728//===----------------------------------------------------------------------===//
4729// VSHUFPS - VSHUFPD Operations
4730
4731multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4732 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4733 Domain d> {
4734 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4735 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4736 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004737 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004738 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4739 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004740 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004741 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4742 (ins RC:$src1, RC:$src2, i8imm:$src3),
4743 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004744 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004745 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4746 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004747 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004748}
4749
4750defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004751 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004752defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004753 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004754
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004755def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4756 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4757def : Pat<(v16i32 (X86Shufp VR512:$src1,
4758 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4759 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4760
4761def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4762 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4763def : Pat<(v8i64 (X86Shufp VR512:$src1,
4764 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4765 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004766
Adam Nemet5ed17da2014-08-21 19:50:07 +00004767multiclass avx512_valign<X86VectorVTInfo _> {
4768 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4769 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4770 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004771 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004772 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4773 (i8 imm:$src3))),
4774 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004775 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004776
Adam Nemetf92139d2014-08-05 17:22:50 +00004777 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004778 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4779 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004780
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004781 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004782 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4783 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4784 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004785 " \t{$src3, $src2, $src1, $dst|"
4786 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004787 []>, EVEX_4V;
4788}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004789defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4790defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004791
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004792// Helper fragments to match sext vXi1 to vXiY.
4793def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4794def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4795
4796multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4797 RegisterClass KRC, RegisterClass RC,
4798 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4799 string BrdcstStr> {
4800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4801 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4802 []>, EVEX;
4803 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4804 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4805 []>, EVEX, EVEX_K;
4806 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4807 !strconcat(OpcodeStr,
4808 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4809 []>, EVEX, EVEX_KZ;
4810 let mayLoad = 1 in {
4811 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4812 (ins x86memop:$src),
4813 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4814 []>, EVEX;
4815 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4816 (ins KRC:$mask, x86memop:$src),
4817 !strconcat(OpcodeStr,
4818 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4819 []>, EVEX, EVEX_K;
4820 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4821 (ins KRC:$mask, x86memop:$src),
4822 !strconcat(OpcodeStr,
4823 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4824 []>, EVEX, EVEX_KZ;
4825 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4826 (ins x86scalar_mop:$src),
4827 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4828 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4829 []>, EVEX, EVEX_B;
4830 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4831 (ins KRC:$mask, x86scalar_mop:$src),
4832 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4833 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4834 []>, EVEX, EVEX_B, EVEX_K;
4835 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4836 (ins KRC:$mask, x86scalar_mop:$src),
4837 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4838 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4839 BrdcstStr, "}"),
4840 []>, EVEX, EVEX_B, EVEX_KZ;
4841 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004842}
4843
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004844defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4845 i512mem, i32mem, "{1to16}">, EVEX_V512,
4846 EVEX_CD8<32, CD8VF>;
4847defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4848 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4849 EVEX_CD8<64, CD8VF>;
4850
4851def : Pat<(xor
4852 (bc_v16i32 (v16i1sextv16i32)),
4853 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4854 (VPABSDZrr VR512:$src)>;
4855def : Pat<(xor
4856 (bc_v8i64 (v8i1sextv8i64)),
4857 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4858 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004859
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004860def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4861 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004862 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004863def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4864 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004865 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004866
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004867multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004868 RegisterClass RC, RegisterClass KRC,
4869 X86MemOperand x86memop,
4870 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004871 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4872 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004873 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004874 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004875 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4876 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004877 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004878 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004879 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4880 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004881 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004882 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4883 []>, EVEX, EVEX_B;
4884 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4885 (ins KRC:$mask, RC:$src),
4886 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004887 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004888 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004889 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4890 (ins KRC:$mask, x86memop:$src),
4891 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004892 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004893 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004894 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4895 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004896 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004897 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4898 BrdcstStr, "}"),
4899 []>, EVEX, EVEX_KZ, EVEX_B;
4900
4901 let Constraints = "$src1 = $dst" in {
4902 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4903 (ins RC:$src1, KRC:$mask, RC:$src2),
4904 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004905 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004906 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004907 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4908 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4909 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004910 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004911 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004912 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4913 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004914 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004915 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4916 []>, EVEX, EVEX_K, EVEX_B;
4917 }
4918}
4919
4920let Predicates = [HasCDI] in {
4921defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004922 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004923 EVEX_V512, EVEX_CD8<32, CD8VF>;
4924
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004925
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004926defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004927 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004928 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004929
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004930}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004931
4932def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
4933 GR16:$mask),
4934 (VPCONFLICTDrrk VR512:$src1,
4935 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4936
4937def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
4938 GR8:$mask),
4939 (VPCONFLICTQrrk VR512:$src1,
4940 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00004941
Cameron McInally5d1b7b92014-06-11 12:54:45 +00004942let Predicates = [HasCDI] in {
4943defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
4944 i512mem, i32mem, "{1to16}">,
4945 EVEX_V512, EVEX_CD8<32, CD8VF>;
4946
4947
4948defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
4949 i512mem, i64mem, "{1to8}">,
4950 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4951
4952}
4953
4954def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
4955 GR16:$mask),
4956 (VPLZCNTDrrk VR512:$src1,
4957 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4958
4959def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
4960 GR8:$mask),
4961 (VPLZCNTQrrk VR512:$src1,
4962 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4963
Cameron McInally0d0489c2014-06-16 14:12:28 +00004964def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
4965 (VPLZCNTDrm addr:$src)>;
4966def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
4967 (VPLZCNTDrr VR512:$src)>;
4968def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
4969 (VPLZCNTQrm addr:$src)>;
4970def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
4971 (VPLZCNTQrr VR512:$src)>;
4972
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00004973def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4974def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4975def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00004976
4977def : Pat<(store VK1:$src, addr:$dst),
4978 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
4979
4980def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
4981 (truncstore node:$val, node:$ptr), [{
4982 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
4983}]>;
4984
4985def : Pat<(truncstorei1 GR8:$src, addr:$dst),
4986 (MOV8mr addr:$dst, GR8:$src)>;
4987