blob: 90f45f97225e6203c1d8e0332dc41cc7c78cf232 [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
Robert Khasanov2ea081d2014-08-25 14:49:34 +000025 string VTName = "v" # NumElts # EltVT;
26
Adam Nemet5ed17da2014-08-21 19:50:07 +000027 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000028 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000029
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000032 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000034
35 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000036 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000037
38 // Size of RC in bits, e.g. 512 for VR512.
39 int Size = VT.Size;
40
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000043 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
44
45 // Load patterns
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
52 VTName)), VTName));
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
60 VTName,
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
63 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +000064
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +000067
68 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
69 !if (!eq (Size, 256), sub_ymm, ?));
70
71 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
72 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
73 SSEPackedInt));
Adam Nemet5ed17da2014-08-21 19:50:07 +000074}
75
Robert Khasanov2ea081d2014-08-25 14:49:34 +000076def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
77def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +000078def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
79def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
80
Robert Khasanov2ea081d2014-08-25 14:49:34 +000081// "x" in v32i8x_info means RC = VR256X
82def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
83def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
84def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
85def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
86
87def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
88def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
89def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
90def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
91
92class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
93 X86VectorVTInfo i128> {
94 X86VectorVTInfo info512 = i512;
95 X86VectorVTInfo info256 = i256;
96 X86VectorVTInfo info128 = i128;
97}
98
99def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
100 v16i8x_info>;
101def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
102 v8i16x_info>;
103def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
104 v4i32x_info>;
105def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
106 v2i64x_info>;
107
108
Adam Nemet2e91ee52014-08-14 17:13:19 +0000109// Common base class of AVX512_masking and AVX512_masking_3src.
110multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
111 dag MaskingIns, dag ZeroMaskingIns,
112 string OpcodeStr,
113 string AttSrcAsm, string IntelSrcAsm,
114 dag RHS, dag MaskingRHS, ValueType OpVT,
115 RegisterClass RC, RegisterClass KRC,
116 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +0000117 def NAME: AVX512<O, F, Outs, Ins,
118 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
119 "$dst, "#IntelSrcAsm#"}",
120 [(set RC:$dst, RHS)]>;
121
Adam Nemetfa1f7202014-08-07 23:18:18 +0000122 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000123 let AddedComplexity = 20 in
124 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +0000125 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
126 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +0000127 [(set RC:$dst, MaskingRHS)]>,
128 EVEX_K {
129 // In case of the 3src subclass this is overridden with a let.
130 string Constraints = MaskingConstraint;
131 }
Adam Nemet7d498622014-08-07 23:53:38 +0000132 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000133 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +0000134 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
135 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
136 [(set RC:$dst,
137 (vselect KRC:$mask, RHS,
138 (OpVT (bitconvert
139 (v16i32 immAllZerosV)))))]>,
140 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000141}
142
Adam Nemet2e91ee52014-08-14 17:13:19 +0000143// This multiclass generates the unconditional/non-masking, the masking and
144// the zero-masking variant of the instruction. In the masking case, the
145// perserved vector elements come from a new dummy input operand tied to $dst.
146multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
147 string OpcodeStr,
148 string AttSrcAsm, string IntelSrcAsm,
149 dag RHS, ValueType OpVT, RegisterClass RC,
150 RegisterClass KRC> :
151 AVX512_masking_common<O, F, Outs,
152 Ins,
153 !con((ins RC:$src0, KRC:$mask), Ins),
154 !con((ins KRC:$mask), Ins),
155 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
156 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
157 "$src0 = $dst">;
158
159// Similar to AVX512_masking but in this case one of the source operands
160// ($src1) is already tied to $dst so we just use that for the preserved
161// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
162// $src1.
163multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
164 string OpcodeStr,
165 string AttSrcAsm, string IntelSrcAsm,
166 dag RHS, ValueType OpVT,
167 RegisterClass RC, RegisterClass KRC> :
168 AVX512_masking_common<O, F, Outs,
169 !con((ins RC:$src1), NonTiedIns),
Adam Nemetce465422014-09-26 00:53:12 +0000170 !con((ins RC:$src1, KRC:$mask), NonTiedIns),
171 !con((ins RC:$src1, KRC:$mask), NonTiedIns),
Adam Nemet2e91ee52014-08-14 17:13:19 +0000172 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
173 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
174
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000175// Bitcasts between 512-bit vector types. Return the original type since
176// no instruction is needed for the conversion
177let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000178 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000179 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000180 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
181 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
182 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000183 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000184 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
185 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
186 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000187 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000188 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000189 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
190 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000191 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000192 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
193 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000194 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000195 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
196 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000197 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000198 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
200 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
201 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
202 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
203 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
204 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
205 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
206 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
207 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
208 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000209
210 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
211 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
212 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
213 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
214 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
215 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
216 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
217 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
218 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
219 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
220 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
221 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
222 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
223 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
224 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
225 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
226 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
227 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
228 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
229 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
230 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
231 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
232 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
233 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
234 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
235 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
236 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
237 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
238 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
239 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
240
241// Bitcasts between 256-bit vector types. Return the original type since
242// no instruction is needed for the conversion
243 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
244 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
245 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
246 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
247 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
248 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
249 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
250 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
251 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
252 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
253 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
254 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
255 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
256 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
257 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
258 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
259 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
260 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
261 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
262 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
263 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
264 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
265 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
266 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
267 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
268 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
269 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
270 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
271 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
272 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
273}
274
275//
276// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
277//
278
279let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
280 isPseudo = 1, Predicates = [HasAVX512] in {
281def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
282 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
283}
284
Craig Topperfb1746b2014-01-30 06:03:19 +0000285let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000286def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
287def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
288def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000289}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000290
291//===----------------------------------------------------------------------===//
292// AVX-512 - VECTOR INSERT
293//
294// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000295let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000296def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
297 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512;
300let mayLoad = 1 in
301def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
302 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
303 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
304 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
305}
306
307// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000308let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000309def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
310 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W;
313let mayLoad = 1 in
314def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
315 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
316 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
317 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
318}
319// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000320let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000321def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
322 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512;
325let mayLoad = 1 in
326def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
327 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
328 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
329 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000330}
331
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000332let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000333// -- 64x4 form --
334def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
335 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W;
338let mayLoad = 1 in
339def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
340 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
341 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
342 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
343}
344
345def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
346 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
347 (INSERT_get_vinsert128_imm VR512:$ins))>;
348def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
349 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
350 (INSERT_get_vinsert128_imm VR512:$ins))>;
351def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
352 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
353 (INSERT_get_vinsert128_imm VR512:$ins))>;
354def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
355 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
356 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000357
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000358def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
359 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
360 (INSERT_get_vinsert128_imm VR512:$ins))>;
361def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000362 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000363 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
364 (INSERT_get_vinsert128_imm VR512:$ins))>;
365def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
366 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
367 (INSERT_get_vinsert128_imm VR512:$ins))>;
368def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
369 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
370 (INSERT_get_vinsert128_imm VR512:$ins))>;
371
372def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
373 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
374 (INSERT_get_vinsert256_imm VR512:$ins))>;
375def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
376 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
377 (INSERT_get_vinsert256_imm VR512:$ins))>;
378def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
379 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
380 (INSERT_get_vinsert256_imm VR512:$ins))>;
381def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
382 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
383 (INSERT_get_vinsert256_imm VR512:$ins))>;
384
385def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
386 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
387 (INSERT_get_vinsert256_imm VR512:$ins))>;
388def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
389 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
390 (INSERT_get_vinsert256_imm VR512:$ins))>;
391def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
392 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
393 (INSERT_get_vinsert256_imm VR512:$ins))>;
394def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
395 (bc_v8i32 (loadv4i64 addr:$src2)),
396 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
397 (INSERT_get_vinsert256_imm VR512:$ins))>;
398
399// vinsertps - insert f32 to XMM
400def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000401 (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000403 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000404 EVEX_4V;
405def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000406 (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000407 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000408 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000409 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
410 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
411
412//===----------------------------------------------------------------------===//
413// AVX-512 VECTOR EXTRACT
414//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000415
Adam Nemet55536c62014-09-25 23:48:45 +0000416multiclass vextract_for_size<int Opcode,
417 X86VectorVTInfo From, X86VectorVTInfo To,
418 X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
419 PatFrag vextract_extract,
420 SDNodeXForm EXTRACT_get_vextract_imm> {
421 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
422 def rr : AVX512AIi8<Opcode, MRMDestReg, (outs To.RC:$dst),
Adam Nemetf7988d72014-09-25 23:48:49 +0000423 (ins VR512:$src1, i8imm:$idx),
424 "vextract" # To.EltTypeName # "x4\t{$idx, $src1, $dst|"
425 "$dst, $src1, $idx}",
426 [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
427 (iPTR imm)))]>,
428 EVEX, EVEX_V512;
Adam Nemet55536c62014-09-25 23:48:45 +0000429 let mayStore = 1 in
430 def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
431 (ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
432 "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
433 "$dst, $src1, $src2}",
434 []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
435 }
436
Adam Nemet55536c62014-09-25 23:48:45 +0000437 // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
438 // vextracti32x4
439 def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
440 (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
441 VR512:$src1,
442 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
443
444 // A 128/256-bit subvector extract from the first 512-bit vector position is
445 // a subregister copy that needs no instruction.
446 def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
447 (To.VT
448 (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
449
450 // And for the alternative types.
451 def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
452 (AltTo.VT
453 (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000454}
455
Adam Nemet55536c62014-09-25 23:48:45 +0000456multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
457 ValueType EltVT64, int Opcode64> {
458 defm NAME # "32x4" : vextract_for_size<Opcode32,
459 X86VectorVTInfo<16, EltVT32, VR512>,
460 X86VectorVTInfo< 4, EltVT32, VR128X>,
461 X86VectorVTInfo< 8, EltVT64, VR512>,
462 X86VectorVTInfo< 2, EltVT64, VR128X>,
463 vextract128_extract,
464 EXTRACT_get_vextract128_imm>;
465 defm NAME # "64x4" : vextract_for_size<Opcode64,
466 X86VectorVTInfo< 8, EltVT64, VR512>,
467 X86VectorVTInfo< 4, EltVT64, VR256X>,
468 X86VectorVTInfo<16, EltVT32, VR512>,
469 X86VectorVTInfo< 8, EltVT32, VR256>,
470 vextract256_extract,
471 EXTRACT_get_vextract256_imm>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000472}
473
Adam Nemet55536c62014-09-25 23:48:45 +0000474defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
475defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000476
477// A 128-bit subvector insert to the first 512-bit vector position
478// is a subregister copy that needs no instruction.
479def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
480 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
481 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
482 sub_ymm)>;
483def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
484 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
485 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
486 sub_ymm)>;
487def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
488 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
489 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
490 sub_ymm)>;
491def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
492 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
493 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
494 sub_ymm)>;
495
496def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
497 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
498def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
499 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
500def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
501 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
502def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
503 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
504
505// vextractps - extract 32 bits from XMM
506def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000507 (ins VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000508 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000509 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
510 EVEX;
511
512def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000513 (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000514 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000515 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000516 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000517
518//===---------------------------------------------------------------------===//
519// AVX-512 BROADCAST
520//---
521multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
522 RegisterClass DestRC,
523 RegisterClass SrcRC, X86MemOperand x86memop> {
524 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000525 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000526 []>, EVEX;
527 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000528 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000529}
530let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000531 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000532 VR128X, f32mem>,
533 EVEX_V512, EVEX_CD8<32, CD8VT1>;
534}
535
536let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000537 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000538 VR128X, f64mem>,
539 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
540}
541
542def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
543 (VBROADCASTSSZrm addr:$src)>;
544def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
545 (VBROADCASTSDZrm addr:$src)>;
546
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000547def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
548 (VBROADCASTSSZrm addr:$src)>;
549def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
550 (VBROADCASTSDZrm addr:$src)>;
551
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000552multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
553 RegisterClass SrcRC, RegisterClass KRC> {
554 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000555 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000556 []>, EVEX, EVEX_V512;
557 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
558 (ins KRC:$mask, SrcRC:$src),
559 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000560 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000561 []>, EVEX, EVEX_V512, EVEX_KZ;
562}
563
564defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
565defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
566 VEX_W;
567
568def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
569 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
570
571def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
572 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
573
574def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
575 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000576def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
577 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000578def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
579 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000580def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
581 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000582
Cameron McInally394d5572013-10-31 13:56:31 +0000583def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
584 (VPBROADCASTDrZrr GR32:$src)>;
585def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
586 (VPBROADCASTQrZrr GR64:$src)>;
587
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000588def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
589 (v16i32 immAllZerosV), (i16 GR16:$mask))),
590 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
591def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
592 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
593 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
594
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
596 X86MemOperand x86memop, PatFrag ld_frag,
597 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
598 RegisterClass KRC> {
599 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000600 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000601 [(set DstRC:$dst,
602 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
603 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
604 VR128X:$src),
605 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000606 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000607 [(set DstRC:$dst,
608 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
609 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000610 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000611 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000612 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000613 [(set DstRC:$dst,
614 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
615 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
616 x86memop:$src),
617 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000618 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000619 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
620 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000621 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000622}
623
624defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
625 loadi32, VR512, v16i32, v4i32, VK16WM>,
626 EVEX_V512, EVEX_CD8<32, CD8VT1>;
627defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
628 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
629 EVEX_CD8<64, CD8VT1>;
630
Adam Nemet73f72e12014-06-27 00:43:38 +0000631multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
632 X86MemOperand x86memop, PatFrag ld_frag,
633 RegisterClass KRC> {
634 let mayLoad = 1 in {
635 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
636 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
637 []>, EVEX;
638 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
639 x86memop:$src),
640 !strconcat(OpcodeStr,
641 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
642 []>, EVEX, EVEX_KZ;
643 }
644}
645
646defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
647 i128mem, loadv2i64, VK16WM>,
648 EVEX_V512, EVEX_CD8<32, CD8VT4>;
649defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
650 i256mem, loadv4i64, VK16WM>, VEX_W,
651 EVEX_V512, EVEX_CD8<64, CD8VT4>;
652
Cameron McInally394d5572013-10-31 13:56:31 +0000653def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
654 (VPBROADCASTDZrr VR128X:$src)>;
655def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
656 (VPBROADCASTQZrr VR128X:$src)>;
657
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000658def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
659 (VBROADCASTSSZrr VR128X:$src)>;
660def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
661 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000662
663def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
664 (VBROADCASTSSZrr VR128X:$src)>;
665def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
666 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000667
668// Provide fallback in case the load node that is used in the patterns above
669// is used by additional users, which prevents the pattern selection.
670def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
671 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
672def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
673 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
674
675
676let Predicates = [HasAVX512] in {
677def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
678 (EXTRACT_SUBREG
679 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
680 addr:$src)), sub_ymm)>;
681}
682//===----------------------------------------------------------------------===//
683// AVX-512 BROADCAST MASK TO VECTOR REGISTER
684//---
685
686multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
687 RegisterClass DstRC, RegisterClass KRC,
688 ValueType OpVT, ValueType SrcVT> {
689def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000690 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000691 []>, EVEX;
692}
693
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000694let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000695defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
696 VK16, v16i32, v16i1>, EVEX_V512;
697defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
698 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000699}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000700
701//===----------------------------------------------------------------------===//
702// AVX-512 - VPERM
703//
704// -- immediate form --
705multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
706 SDNode OpNode, PatFrag mem_frag,
707 X86MemOperand x86memop, ValueType OpVT> {
708 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
709 (ins RC:$src1, i8imm:$src2),
710 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000711 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000712 [(set RC:$dst,
713 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
714 EVEX;
715 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
716 (ins x86memop:$src1, i8imm:$src2),
717 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000718 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000719 [(set RC:$dst,
720 (OpVT (OpNode (mem_frag addr:$src1),
721 (i8 imm:$src2))))]>, EVEX;
722}
723
724defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
725 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
726let ExeDomain = SSEPackedDouble in
727defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
728 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
729
730// -- VPERM - register form --
731multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
732 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
733
734 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
735 (ins RC:$src1, RC:$src2),
736 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000737 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000738 [(set RC:$dst,
739 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
740
741 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
742 (ins RC:$src1, x86memop:$src2),
743 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000744 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000745 [(set RC:$dst,
746 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
747 EVEX_4V;
748}
749
750defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
751 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
752defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
753 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
754let ExeDomain = SSEPackedSingle in
755defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
756 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
757let ExeDomain = SSEPackedDouble in
758defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
759 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760
761// -- VPERM2I - 3 source operands form --
762multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
763 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000764 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000765let Constraints = "$src1 = $dst" in {
766 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
767 (ins RC:$src1, RC:$src2, RC:$src3),
768 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000769 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000770 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000771 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000772 EVEX_4V;
773
Adam Nemet2415a492014-07-02 21:25:54 +0000774 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
775 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
776 !strconcat(OpcodeStr,
777 " \t{$src3, $src2, $dst {${mask}}|"
778 "$dst {${mask}}, $src2, $src3}"),
779 [(set RC:$dst, (OpVT (vselect KRC:$mask,
780 (OpNode RC:$src1, RC:$src2,
781 RC:$src3),
782 RC:$src1)))]>,
783 EVEX_4V, EVEX_K;
784
785 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
786 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
787 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
788 !strconcat(OpcodeStr,
789 " \t{$src3, $src2, $dst {${mask}} {z} |",
790 "$dst {${mask}} {z}, $src2, $src3}"),
791 [(set RC:$dst, (OpVT (vselect KRC:$mask,
792 (OpNode RC:$src1, RC:$src2,
793 RC:$src3),
794 (OpVT (bitconvert
795 (v16i32 immAllZerosV))))))]>,
796 EVEX_4V, EVEX_KZ;
797
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000798 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
799 (ins RC:$src1, RC:$src2, x86memop:$src3),
800 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000801 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000802 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000803 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000804 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000805
806 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
807 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
808 !strconcat(OpcodeStr,
809 " \t{$src3, $src2, $dst {${mask}}|"
810 "$dst {${mask}}, $src2, $src3}"),
811 [(set RC:$dst,
812 (OpVT (vselect KRC:$mask,
813 (OpNode RC:$src1, RC:$src2,
814 (mem_frag addr:$src3)),
815 RC:$src1)))]>,
816 EVEX_4V, EVEX_K;
817
818 let AddedComplexity = 10 in // Prefer over the rrkz variant
819 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
820 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
821 !strconcat(OpcodeStr,
822 " \t{$src3, $src2, $dst {${mask}} {z}|"
823 "$dst {${mask}} {z}, $src2, $src3}"),
824 [(set RC:$dst,
825 (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
827 (mem_frag addr:$src3)),
828 (OpVT (bitconvert
829 (v16i32 immAllZerosV))))))]>,
830 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000831 }
832}
Adam Nemet2415a492014-07-02 21:25:54 +0000833defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
834 i512mem, X86VPermiv3, v16i32, VK16WM>,
835 EVEX_V512, EVEX_CD8<32, CD8VF>;
836defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
837 i512mem, X86VPermiv3, v8i64, VK8WM>,
838 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
839defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
840 i512mem, X86VPermiv3, v16f32, VK16WM>,
841 EVEX_V512, EVEX_CD8<32, CD8VF>;
842defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
843 i512mem, X86VPermiv3, v8f64, VK8WM>,
844 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000845
Adam Nemetefe9c982014-07-02 21:25:58 +0000846multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
847 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000848 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
849 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000850 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
851 OpVT, KRC> {
852 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
853 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
854 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000855
856 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
857 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
858 (!cast<Instruction>(NAME#rrk) VR512:$src1,
859 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000860}
861
862defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000863 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
864 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000865defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000866 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
867 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000868defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000869 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
870 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000871defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000872 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
873 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000874
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000875//===----------------------------------------------------------------------===//
876// AVX-512 - BLEND using mask
877//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000878multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000879 RegisterClass KRC, RegisterClass RC,
880 X86MemOperand x86memop, PatFrag mem_frag,
881 SDNode OpNode, ValueType vt> {
882 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000883 (ins KRC:$mask, RC:$src1, RC:$src2),
884 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000885 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000886 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000887 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000888 let mayLoad = 1 in
889 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
890 (ins KRC:$mask, RC:$src1, x86memop:$src2),
891 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000892 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000893 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000894}
895
896let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000897defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000898 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000899 memopv16f32, vselect, v16f32>,
900 EVEX_CD8<32, CD8VF>, EVEX_V512;
901let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000902defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000903 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000904 memopv8f64, vselect, v8f64>,
905 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
906
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000907def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
908 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000909 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000910 VR512:$src1, VR512:$src2)>;
911
912def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
913 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000914 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000915 VR512:$src1, VR512:$src2)>;
916
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000917defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000918 VK16WM, VR512, f512mem,
919 memopv16i32, vselect, v16i32>,
920 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000921
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000922defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000923 VK8WM, VR512, f512mem,
924 memopv8i64, vselect, v8i64>,
925 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000926
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000927def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
928 (v16i32 VR512:$src2), (i16 GR16:$mask))),
929 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
930 VR512:$src1, VR512:$src2)>;
931
932def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
933 (v8i64 VR512:$src2), (i8 GR8:$mask))),
934 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
935 VR512:$src1, VR512:$src2)>;
936
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000937let Predicates = [HasAVX512] in {
938def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
939 (v8f32 VR256X:$src2))),
940 (EXTRACT_SUBREG
941 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
942 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
943 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
944
945def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
946 (v8i32 VR256X:$src2))),
947 (EXTRACT_SUBREG
948 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
949 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
950 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
951}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000952//===----------------------------------------------------------------------===//
953// Compare Instructions
954//===----------------------------------------------------------------------===//
955
956// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
957multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
958 Operand CC, SDNode OpNode, ValueType VT,
959 PatFrag ld_frag, string asm, string asm_alt> {
960 def rr : AVX512Ii8<0xC2, MRMSrcReg,
961 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
962 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
963 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
964 def rm : AVX512Ii8<0xC2, MRMSrcMem,
965 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
966 [(set VK1:$dst, (OpNode (VT RC:$src1),
967 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +0000968 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000969 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
970 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
971 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
972 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
973 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
974 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
975 }
976}
977
978let Predicates = [HasAVX512] in {
979defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
980 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
981 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
982 XS;
983defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
984 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
985 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
986 XD, VEX_W;
987}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000988
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000989multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
990 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000991 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000992 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
993 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
994 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000995 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000996 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000997 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000998 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
999 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1000 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1001 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001002 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001003 def rrk : AVX512BI<opc, MRMSrcReg,
1004 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1005 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1006 "$dst {${mask}}, $src1, $src2}"),
1007 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1008 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1009 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1010 let mayLoad = 1 in
1011 def rmk : AVX512BI<opc, MRMSrcMem,
1012 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1013 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1014 "$dst {${mask}}, $src1, $src2}"),
1015 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1016 (OpNode (_.VT _.RC:$src1),
1017 (_.VT (bitconvert
1018 (_.LdFrag addr:$src2))))))],
1019 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001020}
1021
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001022multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanovf70f7982014-09-18 14:06:55 +00001023 X86VectorVTInfo _> :
1024 avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001025 let mayLoad = 1 in {
1026 def rmb : AVX512BI<opc, MRMSrcMem,
1027 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1028 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1029 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1030 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1031 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1032 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1033 def rmbk : AVX512BI<opc, MRMSrcMem,
1034 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1035 _.ScalarMemOp:$src2),
1036 !strconcat(OpcodeStr,
1037 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1038 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1039 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1040 (OpNode (_.VT _.RC:$src1),
1041 (X86VBroadcast
1042 (_.ScalarLdFrag addr:$src2)))))],
1043 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1044 }
1045}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001046
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001047multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1048 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1049 let Predicates = [prd] in
1050 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1051 EVEX_V512;
1052
1053 let Predicates = [prd, HasVLX] in {
1054 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1055 EVEX_V256;
1056 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1057 EVEX_V128;
1058 }
1059}
1060
1061multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1062 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1063 Predicate prd> {
1064 let Predicates = [prd] in
1065 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1066 EVEX_V512;
1067
1068 let Predicates = [prd, HasVLX] in {
1069 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1070 EVEX_V256;
1071 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1072 EVEX_V128;
1073 }
1074}
1075
1076defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1077 avx512vl_i8_info, HasBWI>,
1078 EVEX_CD8<8, CD8VF>;
1079
1080defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1081 avx512vl_i16_info, HasBWI>,
1082 EVEX_CD8<16, CD8VF>;
1083
Robert Khasanovf70f7982014-09-18 14:06:55 +00001084defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001085 avx512vl_i32_info, HasAVX512>,
1086 EVEX_CD8<32, CD8VF>;
1087
Robert Khasanovf70f7982014-09-18 14:06:55 +00001088defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001089 avx512vl_i64_info, HasAVX512>,
1090 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1091
1092defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1093 avx512vl_i8_info, HasBWI>,
1094 EVEX_CD8<8, CD8VF>;
1095
1096defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1097 avx512vl_i16_info, HasBWI>,
1098 EVEX_CD8<16, CD8VF>;
1099
Robert Khasanovf70f7982014-09-18 14:06:55 +00001100defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001101 avx512vl_i32_info, HasAVX512>,
1102 EVEX_CD8<32, CD8VF>;
1103
Robert Khasanovf70f7982014-09-18 14:06:55 +00001104defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001105 avx512vl_i64_info, HasAVX512>,
1106 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001107
1108def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001109 (COPY_TO_REGCLASS (VPCMPGTDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001110 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1111 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1112
1113def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001114 (COPY_TO_REGCLASS (VPCMPEQDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1116 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1117
Robert Khasanov29e3b962014-08-27 09:34:37 +00001118multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1119 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001120 def rri : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001121 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001122 !strconcat("vpcmp${cc}", Suffix,
1123 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001124 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1125 imm:$cc))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001126 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001127 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001128 def rmi : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001129 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001130 !strconcat("vpcmp${cc}", Suffix,
1131 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001132 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1133 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1134 imm:$cc))],
1135 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1136 def rrik : AVX512AIi8<opc, MRMSrcReg,
1137 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1138 AVXCC:$cc),
1139 !strconcat("vpcmp${cc}", Suffix,
1140 "\t{$src2, $src1, $dst {${mask}}|",
1141 "$dst {${mask}}, $src1, $src2}"),
1142 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1143 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1144 imm:$cc)))],
1145 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1146 let mayLoad = 1 in
1147 def rmik : AVX512AIi8<opc, MRMSrcMem,
1148 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1149 AVXCC:$cc),
1150 !strconcat("vpcmp${cc}", Suffix,
1151 "\t{$src2, $src1, $dst {${mask}}|",
1152 "$dst {${mask}}, $src1, $src2}"),
1153 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1154 (OpNode (_.VT _.RC:$src1),
1155 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1156 imm:$cc)))],
1157 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1158
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001159 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001160 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001161 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001162 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1163 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1164 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001165 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001166 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001167 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1168 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1169 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001170 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001171 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1172 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1173 i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001174 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001175 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1176 "$dst {${mask}}, $src1, $src2, $cc}"),
1177 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1178 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1179 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1180 i8imm:$cc),
1181 !strconcat("vpcmp", Suffix,
1182 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1183 "$dst {${mask}}, $src1, $src2, $cc}"),
Adam Nemet16de2482014-07-01 18:03:45 +00001184 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001185 }
1186}
1187
Robert Khasanov29e3b962014-08-27 09:34:37 +00001188multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Robert Khasanovf70f7982014-09-18 14:06:55 +00001189 X86VectorVTInfo _> :
1190 avx512_icmp_cc<opc, Suffix, OpNode, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00001191 let mayLoad = 1 in {
1192 def rmib : AVX512AIi8<opc, MRMSrcMem,
1193 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1194 AVXCC:$cc),
1195 !strconcat("vpcmp${cc}", Suffix,
1196 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1197 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1198 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1199 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1200 imm:$cc))],
1201 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1202 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1203 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1204 _.ScalarMemOp:$src2, AVXCC:$cc),
1205 !strconcat("vpcmp${cc}", Suffix,
1206 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1207 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1208 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1209 (OpNode (_.VT _.RC:$src1),
1210 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1211 imm:$cc)))],
1212 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1213 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001214
Robert Khasanov29e3b962014-08-27 09:34:37 +00001215 // Accept explicit immediate argument form instead of comparison code.
1216 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1217 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1218 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1219 i8imm:$cc),
1220 !strconcat("vpcmp", Suffix,
1221 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1222 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1223 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1224 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1225 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1226 _.ScalarMemOp:$src2, i8imm:$cc),
1227 !strconcat("vpcmp", Suffix,
1228 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1229 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1230 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1231 }
1232}
1233
1234multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1235 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1236 let Predicates = [prd] in
1237 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1238
1239 let Predicates = [prd, HasVLX] in {
1240 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1241 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1242 }
1243}
1244
1245multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1246 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1247 let Predicates = [prd] in
1248 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1249 EVEX_V512;
1250
1251 let Predicates = [prd, HasVLX] in {
1252 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1253 EVEX_V256;
1254 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1255 EVEX_V128;
1256 }
1257}
1258
1259defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1260 HasBWI>, EVEX_CD8<8, CD8VF>;
1261defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1262 HasBWI>, EVEX_CD8<8, CD8VF>;
1263
1264defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1265 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1266defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1267 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1268
Robert Khasanovf70f7982014-09-18 14:06:55 +00001269defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001270 HasAVX512>, EVEX_CD8<32, CD8VF>;
Robert Khasanovf70f7982014-09-18 14:06:55 +00001271defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001272 HasAVX512>, EVEX_CD8<32, CD8VF>;
1273
Robert Khasanovf70f7982014-09-18 14:06:55 +00001274defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001275 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanovf70f7982014-09-18 14:06:55 +00001276defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001277 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001278
Adam Nemet905832b2014-06-26 00:21:12 +00001279// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001280multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001281 X86MemOperand x86memop, ValueType vt,
1282 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001283 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001284 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1285 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001286 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001287 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1288 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001289 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001290 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001291 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001292 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001293 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001294 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001295 !strconcat("vcmp${cc}", suffix,
1296 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001297 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001298 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001299
1300 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001301 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001302 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001303 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001304 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001305 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001306 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001307 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001308 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001309 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001310 }
1311}
1312
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001313defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001314 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001315 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001316defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001317 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001318 EVEX_CD8<64, CD8VF>;
1319
1320def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1321 (COPY_TO_REGCLASS (VCMPPSZrri
1322 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1323 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1324 imm:$cc), VK8)>;
1325def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1326 (COPY_TO_REGCLASS (VPCMPDZrri
1327 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1328 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1329 imm:$cc), VK8)>;
1330def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1331 (COPY_TO_REGCLASS (VPCMPUDZrri
1332 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1333 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1334 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001335
1336def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1337 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1338 FROUND_NO_EXC)),
1339 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001340 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001341
1342def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1343 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1344 FROUND_NO_EXC)),
1345 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001346 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001347
1348def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1349 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1350 FROUND_CURRENT)),
1351 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1352 (I8Imm imm:$cc)), GR16)>;
1353
1354def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1355 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1356 FROUND_CURRENT)),
1357 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1358 (I8Imm imm:$cc)), GR8)>;
1359
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001360// Mask register copy, including
1361// - copy between mask registers
1362// - load/store mask registers
1363// - copy from GPR to mask register and vice versa
1364//
1365multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1366 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001367 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001368 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001369 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001370 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001371 let mayLoad = 1 in
1372 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001373 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001374 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001375 let mayStore = 1 in
1376 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001377 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001378 }
1379}
1380
1381multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1382 string OpcodeStr,
1383 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001384 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001385 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001386 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001387 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001388 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001389 }
1390}
1391
Robert Khasanov74acbb72014-07-23 14:49:42 +00001392let Predicates = [HasDQI] in
1393 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1394 i8mem>,
1395 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1396 VEX, PD;
1397
1398let Predicates = [HasAVX512] in
1399 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1400 i16mem>,
1401 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001402 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001403
1404let Predicates = [HasBWI] in {
1405 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1406 i32mem>, VEX, PD, VEX_W;
1407 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1408 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001409}
1410
Robert Khasanov74acbb72014-07-23 14:49:42 +00001411let Predicates = [HasBWI] in {
1412 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1413 i64mem>, VEX, PS, VEX_W;
1414 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1415 VEX, XD, VEX_W;
1416}
1417
1418// GR from/to mask register
1419let Predicates = [HasDQI] in {
1420 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1421 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1422 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1423 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1424}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001425let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001426 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1427 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1428 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1429 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001430}
1431let Predicates = [HasBWI] in {
1432 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1433 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1434}
1435let Predicates = [HasBWI] in {
1436 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1437 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1438}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001439
Robert Khasanov74acbb72014-07-23 14:49:42 +00001440// Load/store kreg
1441let Predicates = [HasDQI] in {
1442 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1443 (KMOVBmk addr:$dst, VK8:$src)>;
1444}
1445let Predicates = [HasAVX512] in {
1446 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001447 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001448 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001449 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001450 def : Pat<(i1 (load addr:$src)),
1451 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001452 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001453 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001454}
1455let Predicates = [HasBWI] in {
1456 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1457 (KMOVDmk addr:$dst, VK32:$src)>;
1458}
1459let Predicates = [HasBWI] in {
1460 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1461 (KMOVQmk addr:$dst, VK64:$src)>;
1462}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001463
Robert Khasanov74acbb72014-07-23 14:49:42 +00001464let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001465 def : Pat<(i1 (trunc (i64 GR64:$src))),
1466 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1467 (i32 1))), VK1)>;
1468
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001469 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001470 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001471
1472 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001473 (COPY_TO_REGCLASS
1474 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1475 VK1)>;
1476 def : Pat<(i1 (trunc (i16 GR16:$src))),
1477 (COPY_TO_REGCLASS
1478 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1479 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001480
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001481 def : Pat<(i32 (zext VK1:$src)),
1482 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001483 def : Pat<(i8 (zext VK1:$src)),
1484 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001485 (AND32ri (KMOVWrk
1486 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001487 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001488 (AND64ri8 (SUBREG_TO_REG (i64 0),
1489 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001490 def : Pat<(i16 (zext VK1:$src)),
1491 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001492 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1493 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001494 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1495 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1496 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1497 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001498}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001499let Predicates = [HasBWI] in {
1500 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1501 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1502 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1503 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1504}
1505
1506
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001507// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1508let Predicates = [HasAVX512] in {
1509 // GR from/to 8-bit mask without native support
1510 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1511 (COPY_TO_REGCLASS
1512 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1513 VK8)>;
1514 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1515 (EXTRACT_SUBREG
1516 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1517 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001518
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001519 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001520 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001521 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001522 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001523}
1524let Predicates = [HasBWI] in {
1525 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1526 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1527 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1528 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001529}
1530
1531// Mask unary operation
1532// - KNOT
1533multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001534 RegisterClass KRC, SDPatternOperator OpNode,
1535 Predicate prd> {
1536 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001537 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001538 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001539 [(set KRC:$dst, (OpNode KRC:$src))]>;
1540}
1541
Robert Khasanov74acbb72014-07-23 14:49:42 +00001542multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1543 SDPatternOperator OpNode> {
1544 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1545 HasDQI>, VEX, PD;
1546 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1547 HasAVX512>, VEX, PS;
1548 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1549 HasBWI>, VEX, PD, VEX_W;
1550 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1551 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001552}
1553
Robert Khasanov74acbb72014-07-23 14:49:42 +00001554defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001555
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001556multiclass avx512_mask_unop_int<string IntName, string InstName> {
1557 let Predicates = [HasAVX512] in
1558 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1559 (i16 GR16:$src)),
1560 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1561 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1562}
1563defm : avx512_mask_unop_int<"knot", "KNOT">;
1564
Robert Khasanov74acbb72014-07-23 14:49:42 +00001565let Predicates = [HasDQI] in
1566def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1567let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001568def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001569let Predicates = [HasBWI] in
1570def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1571let Predicates = [HasBWI] in
1572def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1573
1574// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1575let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001576def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1577 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1578
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001579def : Pat<(not VK8:$src),
1580 (COPY_TO_REGCLASS
1581 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001582}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001583
1584// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001585// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001586multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001587 RegisterClass KRC, SDPatternOperator OpNode,
1588 Predicate prd> {
1589 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001590 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1591 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001592 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001593 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1594}
1595
Robert Khasanov595683d2014-07-28 13:46:45 +00001596multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1597 SDPatternOperator OpNode> {
1598 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1599 HasDQI>, VEX_4V, VEX_L, PD;
1600 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1601 HasAVX512>, VEX_4V, VEX_L, PS;
1602 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1603 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1604 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1605 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001606}
1607
1608def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1609def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1610
1611let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001612 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1613 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1614 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1615 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001616}
Robert Khasanov595683d2014-07-28 13:46:45 +00001617let isCommutable = 0 in
1618 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001619
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001620def : Pat<(xor VK1:$src1, VK1:$src2),
1621 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1622 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1623
1624def : Pat<(or VK1:$src1, VK1:$src2),
1625 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1626 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1627
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001628def : Pat<(and VK1:$src1, VK1:$src2),
1629 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1630 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1631
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001632multiclass avx512_mask_binop_int<string IntName, string InstName> {
1633 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001634 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1635 (i16 GR16:$src1), (i16 GR16:$src2)),
1636 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1637 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1638 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001639}
1640
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001641defm : avx512_mask_binop_int<"kand", "KAND">;
1642defm : avx512_mask_binop_int<"kandn", "KANDN">;
1643defm : avx512_mask_binop_int<"kor", "KOR">;
1644defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1645defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001646
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001647// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1648multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1649 let Predicates = [HasAVX512] in
1650 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1651 (COPY_TO_REGCLASS
1652 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1653 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1654}
1655
1656defm : avx512_binop_pat<and, KANDWrr>;
1657defm : avx512_binop_pat<andn, KANDNWrr>;
1658defm : avx512_binop_pat<or, KORWrr>;
1659defm : avx512_binop_pat<xnor, KXNORWrr>;
1660defm : avx512_binop_pat<xor, KXORWrr>;
1661
1662// Mask unpacking
1663multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001664 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001665 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001666 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001667 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001668 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001669}
1670
1671multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001672 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001673 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001674}
1675
1676defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001677def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1678 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1679 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1680
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001681
1682multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1683 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001684 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1685 (i16 GR16:$src1), (i16 GR16:$src2)),
1686 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1687 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1688 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001689}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001690defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001691
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001692// Mask bit testing
1693multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1694 SDNode OpNode> {
1695 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1696 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001697 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001698 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1699}
1700
1701multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1702 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001703 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001704}
1705
1706defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001707
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001708def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001709 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001710 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001711
1712// Mask shift
1713multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1714 SDNode OpNode> {
1715 let Predicates = [HasAVX512] in
1716 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1717 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001718 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001719 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1720}
1721
1722multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1723 SDNode OpNode> {
1724 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001725 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001726}
1727
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001728defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1729defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001730
1731// Mask setting all 0s or 1s
1732multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1733 let Predicates = [HasAVX512] in
1734 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1735 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1736 [(set KRC:$dst, (VT Val))]>;
1737}
1738
1739multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001740 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001741 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1742}
1743
1744defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1745defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1746
1747// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1748let Predicates = [HasAVX512] in {
1749 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1750 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001751 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1752 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1753 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001754}
1755def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1756 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1757
1758def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1759 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1760
1761def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1762 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1763
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001764def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1765 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1766
1767def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1768 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001769//===----------------------------------------------------------------------===//
1770// AVX-512 - Aligned and unaligned load and store
1771//
1772
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001773multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1774 RegisterClass KRC, RegisterClass RC,
1775 ValueType vt, ValueType zvt, X86MemOperand memop,
1776 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001777let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001778 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001779 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1780 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001781 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001782 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1783 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001784 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001785 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1786 SchedRW = [WriteLoad] in
1787 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1788 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1789 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1790 d>, EVEX;
1791
1792 let AddedComplexity = 20 in {
1793 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1794 let hasSideEffects = 0 in
1795 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1796 (ins RC:$src0, KRC:$mask, RC:$src1),
1797 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1798 "${dst} {${mask}}, $src1}"),
1799 [(set RC:$dst, (vt (vselect KRC:$mask,
1800 (vt RC:$src1),
1801 (vt RC:$src0))))],
1802 d>, EVEX, EVEX_K;
1803 let mayLoad = 1, SchedRW = [WriteLoad] in
1804 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1805 (ins RC:$src0, KRC:$mask, memop:$src1),
1806 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1807 "${dst} {${mask}}, $src1}"),
1808 [(set RC:$dst, (vt
1809 (vselect KRC:$mask,
1810 (vt (bitconvert (ld_frag addr:$src1))),
1811 (vt RC:$src0))))],
1812 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001813 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001814 let mayLoad = 1, SchedRW = [WriteLoad] in
1815 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1816 (ins KRC:$mask, memop:$src),
1817 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1818 "${dst} {${mask}} {z}, $src}"),
1819 [(set RC:$dst, (vt
1820 (vselect KRC:$mask,
1821 (vt (bitconvert (ld_frag addr:$src))),
1822 (vt (bitconvert (zvt immAllZerosV))))))],
1823 d>, EVEX, EVEX_KZ;
1824 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001825}
1826
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001827multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1828 string elty, string elsz, string vsz512,
1829 string vsz256, string vsz128, Domain d,
1830 Predicate prd, bit IsReMaterializable = 1> {
1831 let Predicates = [prd] in
1832 defm Z : avx512_load<opc, OpcodeStr,
1833 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1834 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1835 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1836 !cast<X86MemOperand>(elty##"512mem"), d,
1837 IsReMaterializable>, EVEX_V512;
1838
1839 let Predicates = [prd, HasVLX] in {
1840 defm Z256 : avx512_load<opc, OpcodeStr,
1841 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1842 "v"##vsz256##elty##elsz, "v4i64")),
1843 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1844 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1845 !cast<X86MemOperand>(elty##"256mem"), d,
1846 IsReMaterializable>, EVEX_V256;
1847
1848 defm Z128 : avx512_load<opc, OpcodeStr,
1849 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1850 "v"##vsz128##elty##elsz, "v2i64")),
1851 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1852 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1853 !cast<X86MemOperand>(elty##"128mem"), d,
1854 IsReMaterializable>, EVEX_V128;
1855 }
1856}
1857
1858
1859multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1860 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1861 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001862 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1863 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001864 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001865 EVEX;
1866 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001867 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1868 (ins RC:$src1, KRC:$mask, RC:$src2),
1869 !strconcat(OpcodeStr,
1870 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001871 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001872 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001873 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001874 !strconcat(OpcodeStr,
1875 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001876 [], d>, EVEX, EVEX_KZ;
1877 }
1878 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001879 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1880 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1881 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001882 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001883 (ins memop:$dst, KRC:$mask, RC:$src),
1884 !strconcat(OpcodeStr,
1885 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001886 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001887 }
1888}
1889
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001890
1891multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1892 string st_suff_512, string st_suff_256,
1893 string st_suff_128, string elty, string elsz,
1894 string vsz512, string vsz256, string vsz128,
1895 Domain d, Predicate prd> {
1896 let Predicates = [prd] in
1897 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1898 !cast<ValueType>("v"##vsz512##elty##elsz),
1899 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1900 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1901
1902 let Predicates = [prd, HasVLX] in {
1903 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1904 !cast<ValueType>("v"##vsz256##elty##elsz),
1905 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1906 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1907
1908 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1909 !cast<ValueType>("v"##vsz128##elty##elsz),
1910 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1911 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1912 }
1913}
1914
1915defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1916 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1917 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1918 "512", "256", "", "f", "32", "16", "8", "4",
1919 SSEPackedSingle, HasAVX512>,
1920 PS, EVEX_CD8<32, CD8VF>;
1921
1922defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1923 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1924 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1925 "512", "256", "", "f", "64", "8", "4", "2",
1926 SSEPackedDouble, HasAVX512>,
1927 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1928
1929defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1930 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1931 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1932 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1933 PS, EVEX_CD8<32, CD8VF>;
1934
1935defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1936 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1937 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1938 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1939 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1940
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001941def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001942 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001943 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001944
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001945def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1946 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1947 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001948
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001949def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1950 GR16:$mask),
1951 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1952 VR512:$src)>;
1953def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1954 GR8:$mask),
1955 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1956 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001957
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001958defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1959 "16", "8", "4", SSEPackedInt, HasAVX512>,
1960 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1961 "512", "256", "", "i", "32", "16", "8", "4",
1962 SSEPackedInt, HasAVX512>,
1963 PD, EVEX_CD8<32, CD8VF>;
1964
1965defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1966 "8", "4", "2", SSEPackedInt, HasAVX512>,
1967 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1968 "512", "256", "", "i", "64", "8", "4", "2",
1969 SSEPackedInt, HasAVX512>,
1970 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1971
1972defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1973 "64", "32", "16", SSEPackedInt, HasBWI>,
1974 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1975 "i", "8", "64", "32", "16", SSEPackedInt,
1976 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1977
1978defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1979 "32", "16", "8", SSEPackedInt, HasBWI>,
1980 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1981 "i", "16", "32", "16", "8", SSEPackedInt,
1982 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1983
1984defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1985 "16", "8", "4", SSEPackedInt, HasAVX512>,
1986 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1987 "i", "32", "16", "8", "4", SSEPackedInt,
1988 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1989
1990defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1991 "8", "4", "2", SSEPackedInt, HasAVX512>,
1992 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1993 "i", "64", "8", "4", "2", SSEPackedInt,
1994 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001995
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001996def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1997 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001998 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001999
2000def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002001 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2002 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002003
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002004def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002005 GR16:$mask),
2006 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002007 VR512:$src)>;
2008def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002009 GR8:$mask),
2010 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002011 VR512:$src)>;
2012
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002013let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002014def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002015 (bc_v8i64 (v16i32 immAllZerosV)))),
2016 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002017
2018def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002019 (v8i64 VR512:$src))),
2020 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002021 VK8), VR512:$src)>;
2022
2023def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2024 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002025 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002026
2027def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002028 (v16i32 VR512:$src))),
2029 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002030}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002031
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002032// Move Int Doubleword to Packed Double Int
2033//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002034def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002035 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002036 [(set VR128X:$dst,
2037 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2038 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002039def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002040 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002041 [(set VR128X:$dst,
2042 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2043 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002044def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002045 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002046 [(set VR128X:$dst,
2047 (v2i64 (scalar_to_vector GR64:$src)))],
2048 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00002049let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002050def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002051 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002052 [(set FR64:$dst, (bitconvert GR64:$src))],
2053 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002054def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002055 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002056 [(set GR64:$dst, (bitconvert FR64:$src))],
2057 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002058}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002059def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002060 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002061 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2062 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2063 EVEX_CD8<64, CD8VT1>;
2064
2065// Move Int Doubleword to Single Scalar
2066//
Craig Topper88adf2a2013-10-12 05:41:08 +00002067let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002068def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002069 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002070 [(set FR32X:$dst, (bitconvert GR32:$src))],
2071 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2072
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002073def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002074 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002075 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2076 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002077}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002078
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002079// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002080//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002081def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002082 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002083 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2084 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2085 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002086def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002087 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002088 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002089 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2090 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2091 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2092
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002093// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002094//
2095def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002096 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002097 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2098 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00002099 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002100 Requires<[HasAVX512, In64BitMode]>;
2101
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00002102def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002103 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002104 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002105 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2106 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00002107 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002108 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2109
2110// Move Scalar Single to Double Int
2111//
Craig Topper88adf2a2013-10-12 05:41:08 +00002112let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002113def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002114 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002115 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002116 [(set GR32:$dst, (bitconvert FR32X:$src))],
2117 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002118def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002119 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002120 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002121 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2122 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002123}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002124
2125// Move Quadword Int to Packed Quadword Int
2126//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002127def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002128 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002129 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002130 [(set VR128X:$dst,
2131 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2132 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2133
2134//===----------------------------------------------------------------------===//
2135// AVX-512 MOVSS, MOVSD
2136//===----------------------------------------------------------------------===//
2137
2138multiclass avx512_move_scalar <string asm, RegisterClass RC,
2139 SDNode OpNode, ValueType vt,
2140 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002141 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002142 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002143 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002144 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2145 (scalar_to_vector RC:$src2))))],
2146 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002147 let Constraints = "$src1 = $dst" in
2148 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2149 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2150 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002151 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002152 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002153 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002154 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002155 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2156 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002157 let mayStore = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002158 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002159 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002160 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2161 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002162 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2163 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2164 [], IIC_SSE_MOV_S_MR>,
2165 EVEX, VEX_LIG, EVEX_K;
2166 } // mayStore
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002167 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002168}
2169
2170let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002171defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002172 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2173
2174let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002175defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002176 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2177
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002178def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2179 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2180 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2181
2182def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2183 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2184 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002185
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002186def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2187 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2188 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2189
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002190// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00002191let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002192 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2193 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002194 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002195 IIC_SSE_MOV_S_RR>,
2196 XS, EVEX_4V, VEX_LIG;
2197 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2198 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002199 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002200 IIC_SSE_MOV_S_RR>,
2201 XD, EVEX_4V, VEX_LIG, VEX_W;
2202}
2203
2204let Predicates = [HasAVX512] in {
2205 let AddedComplexity = 15 in {
2206 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2207 // MOVS{S,D} to the lower bits.
2208 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2209 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2210 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2211 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2212 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2213 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2214 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2215 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2216
2217 // Move low f32 and clear high bits.
2218 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2219 (SUBREG_TO_REG (i32 0),
2220 (VMOVSSZrr (v4f32 (V_SET0)),
2221 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2222 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2223 (SUBREG_TO_REG (i32 0),
2224 (VMOVSSZrr (v4i32 (V_SET0)),
2225 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2226 }
2227
2228 let AddedComplexity = 20 in {
2229 // MOVSSrm zeros the high parts of the register; represent this
2230 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2231 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2232 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2233 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2234 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2235 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2236 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2237
2238 // MOVSDrm zeros the high parts of the register; represent this
2239 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2240 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2241 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2242 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2243 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2244 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2245 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2246 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2247 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2248 def : Pat<(v2f64 (X86vzload addr:$src)),
2249 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2250
2251 // Represent the same patterns above but in the form they appear for
2252 // 256-bit types
2253 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2254 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002255 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002256 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2257 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2258 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2259 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2260 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2261 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2262 }
2263 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2264 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2265 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2266 FR32X:$src)), sub_xmm)>;
2267 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2268 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2269 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2270 FR64X:$src)), sub_xmm)>;
2271 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2272 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002273 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002274
2275 // Move low f64 and clear high bits.
2276 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2277 (SUBREG_TO_REG (i32 0),
2278 (VMOVSDZrr (v2f64 (V_SET0)),
2279 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2280
2281 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2282 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2283 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2284
2285 // Extract and store.
2286 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2287 addr:$dst),
2288 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2289 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2290 addr:$dst),
2291 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2292
2293 // Shuffle with VMOVSS
2294 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2295 (VMOVSSZrr (v4i32 VR128X:$src1),
2296 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2297 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2298 (VMOVSSZrr (v4f32 VR128X:$src1),
2299 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2300
2301 // 256-bit variants
2302 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2303 (SUBREG_TO_REG (i32 0),
2304 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2305 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2306 sub_xmm)>;
2307 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2308 (SUBREG_TO_REG (i32 0),
2309 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2310 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2311 sub_xmm)>;
2312
2313 // Shuffle with VMOVSD
2314 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2315 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2316 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2317 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2318 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2319 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2320 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2321 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2322
2323 // 256-bit variants
2324 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2325 (SUBREG_TO_REG (i32 0),
2326 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2327 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2328 sub_xmm)>;
2329 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2330 (SUBREG_TO_REG (i32 0),
2331 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2332 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2333 sub_xmm)>;
2334
2335 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2336 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2337 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2338 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2339 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2340 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2341 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2342 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2343}
2344
2345let AddedComplexity = 15 in
2346def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2347 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002348 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002349 [(set VR128X:$dst, (v2i64 (X86vzmovl
2350 (v2i64 VR128X:$src))))],
2351 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2352
2353let AddedComplexity = 20 in
2354def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2355 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002356 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002357 [(set VR128X:$dst, (v2i64 (X86vzmovl
2358 (loadv2i64 addr:$src))))],
2359 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2360 EVEX_CD8<8, CD8VT8>;
2361
2362let Predicates = [HasAVX512] in {
2363 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2364 let AddedComplexity = 20 in {
2365 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2366 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002367 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2368 (VMOV64toPQIZrr GR64:$src)>;
2369 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2370 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002371
2372 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2373 (VMOVDI2PDIZrm addr:$src)>;
2374 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2375 (VMOVDI2PDIZrm addr:$src)>;
2376 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2377 (VMOVZPQILo2PQIZrm addr:$src)>;
2378 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2379 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002380 def : Pat<(v2i64 (X86vzload addr:$src)),
2381 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002382 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002383
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002384 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2385 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2386 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2387 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2388 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2389 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2390 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2391}
2392
2393def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2394 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2395
2396def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2397 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2398
2399def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2400 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2401
2402def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2403 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2404
2405//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002406// AVX-512 - Non-temporals
2407//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002408let SchedRW = [WriteLoad] in {
2409 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2410 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2411 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2412 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2413 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002414
Robert Khasanoved882972014-08-13 10:46:00 +00002415 let Predicates = [HasAVX512, HasVLX] in {
2416 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2417 (ins i256mem:$src),
2418 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2419 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2420 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002421
Robert Khasanoved882972014-08-13 10:46:00 +00002422 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2423 (ins i128mem:$src),
2424 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2425 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2426 EVEX_CD8<64, CD8VF>;
2427 }
Adam Nemetefd07852014-06-18 16:51:10 +00002428}
2429
Robert Khasanoved882972014-08-13 10:46:00 +00002430multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2431 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2432 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2433 let SchedRW = [WriteStore], mayStore = 1,
2434 AddedComplexity = 400 in
2435 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2436 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2437 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2438}
2439
2440multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2441 string elty, string elsz, string vsz512,
2442 string vsz256, string vsz128, Domain d,
2443 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2444 let Predicates = [prd] in
2445 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2446 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2447 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2448 EVEX_V512;
2449
2450 let Predicates = [prd, HasVLX] in {
2451 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2452 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2453 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2454 EVEX_V256;
2455
2456 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2457 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2458 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2459 EVEX_V128;
2460 }
2461}
2462
2463defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2464 "i", "64", "8", "4", "2", SSEPackedInt,
2465 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2466
2467defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2468 "f", "64", "8", "4", "2", SSEPackedDouble,
2469 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2470
2471defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2472 "f", "32", "16", "8", "4", SSEPackedSingle,
2473 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2474
Adam Nemet7f62b232014-06-10 16:39:53 +00002475//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002476// AVX-512 - Integer arithmetic
2477//
2478multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002479 ValueType OpVT, RegisterClass KRC,
2480 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002481 X86MemOperand x86memop, PatFrag scalar_mfrag,
2482 X86MemOperand x86scalar_mop, string BrdcstStr,
2483 OpndItins itins, bit IsCommutable = 0> {
2484 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002485 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2486 (ins RC:$src1, RC:$src2),
2487 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2488 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2489 itins.rr>, EVEX_4V;
2490 let AddedComplexity = 30 in {
2491 let Constraints = "$src0 = $dst" in
2492 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2493 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2494 !strconcat(OpcodeStr,
2495 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2496 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2497 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2498 RC:$src0)))],
2499 itins.rr>, EVEX_4V, EVEX_K;
2500 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2501 (ins KRC:$mask, RC:$src1, RC:$src2),
2502 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2503 "|$dst {${mask}} {z}, $src1, $src2}"),
2504 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2505 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2506 (OpVT immAllZerosV))))],
2507 itins.rr>, EVEX_4V, EVEX_KZ;
2508 }
2509
2510 let mayLoad = 1 in {
2511 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2512 (ins RC:$src1, x86memop:$src2),
2513 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2514 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2515 itins.rm>, EVEX_4V;
2516 let AddedComplexity = 30 in {
2517 let Constraints = "$src0 = $dst" in
2518 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2519 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2520 !strconcat(OpcodeStr,
2521 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2522 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2523 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2524 RC:$src0)))],
2525 itins.rm>, EVEX_4V, EVEX_K;
2526 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2527 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2528 !strconcat(OpcodeStr,
2529 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2530 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2531 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2532 (OpVT immAllZerosV))))],
2533 itins.rm>, EVEX_4V, EVEX_KZ;
2534 }
2535 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2536 (ins RC:$src1, x86scalar_mop:$src2),
2537 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2538 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2539 [(set RC:$dst, (OpNode RC:$src1,
2540 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2541 itins.rm>, EVEX_4V, EVEX_B;
2542 let AddedComplexity = 30 in {
2543 let Constraints = "$src0 = $dst" in
2544 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2545 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2546 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2547 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2548 BrdcstStr, "}"),
2549 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2550 (OpNode (OpVT RC:$src1),
2551 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2552 RC:$src0)))],
2553 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2554 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2555 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2556 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2557 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2558 BrdcstStr, "}"),
2559 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2560 (OpNode (OpVT RC:$src1),
2561 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2562 (OpVT immAllZerosV))))],
2563 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2564 }
2565 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002566}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002567
2568multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2569 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2570 PatFrag memop_frag, X86MemOperand x86memop,
2571 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2572 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002573 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002574 {
2575 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002576 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002577 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002578 []>, EVEX_4V;
2579 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2580 (ins KRC:$mask, RC:$src1, RC:$src2),
2581 !strconcat(OpcodeStr,
2582 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2583 [], itins.rr>, EVEX_4V, EVEX_K;
2584 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2585 (ins KRC:$mask, RC:$src1, RC:$src2),
2586 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2587 "|$dst {${mask}} {z}, $src1, $src2}"),
2588 [], itins.rr>, EVEX_4V, EVEX_KZ;
2589 }
2590 let mayLoad = 1 in {
2591 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2592 (ins RC:$src1, x86memop:$src2),
2593 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2594 []>, EVEX_4V;
2595 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2596 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2597 !strconcat(OpcodeStr,
2598 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2599 [], itins.rm>, EVEX_4V, EVEX_K;
2600 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2601 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2602 !strconcat(OpcodeStr,
2603 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2604 [], itins.rm>, EVEX_4V, EVEX_KZ;
2605 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2606 (ins RC:$src1, x86scalar_mop:$src2),
2607 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2608 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2609 [], itins.rm>, EVEX_4V, EVEX_B;
2610 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2611 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2612 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2613 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2614 BrdcstStr, "}"),
2615 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2616 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2617 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2618 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2619 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2620 BrdcstStr, "}"),
2621 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2622 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002623}
2624
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002625defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2626 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2627 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002628
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002629defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2630 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2631 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002632
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002633defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2634 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2635 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002636
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002637defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2638 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2639 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002640
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002641defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2642 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2643 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002644
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002645defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2646 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2647 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2648 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002649
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002650defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2651 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2652 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002653
2654def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2655 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2656
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002657def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2658 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2659 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2660def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2661 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2662 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2663
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002664defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2665 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2666 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002667 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002668defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2669 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2670 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002671 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002672
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002673defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2674 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2675 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002676 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002677defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2678 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2679 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002680 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002681
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002682defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2683 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2684 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002685 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002686defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2687 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2688 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002689 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002690
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002691defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2692 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2693 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002694 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002695defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2696 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2697 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002698 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002699
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002700def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2701 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2702 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2703def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2704 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2705 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2706def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2707 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2708 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2709def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2710 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2711 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2712def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2713 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2714 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2715def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2716 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2717 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2718def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2719 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2720 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2721def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2722 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2723 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002724//===----------------------------------------------------------------------===//
2725// AVX-512 - Unpack Instructions
2726//===----------------------------------------------------------------------===//
2727
2728multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2729 PatFrag mem_frag, RegisterClass RC,
2730 X86MemOperand x86memop, string asm,
2731 Domain d> {
2732 def rr : AVX512PI<opc, MRMSrcReg,
2733 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2734 asm, [(set RC:$dst,
2735 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002736 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002737 def rm : AVX512PI<opc, MRMSrcMem,
2738 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2739 asm, [(set RC:$dst,
2740 (vt (OpNode RC:$src1,
2741 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002742 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002743}
2744
2745defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2746 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002747 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002748defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2749 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002750 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002751defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2752 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002753 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002754defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2755 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002756 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002757
2758multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2759 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2760 X86MemOperand x86memop> {
2761 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2762 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002763 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002764 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2765 IIC_SSE_UNPCK>, EVEX_4V;
2766 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2767 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002768 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002769 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2770 (bitconvert (memop_frag addr:$src2)))))],
2771 IIC_SSE_UNPCK>, EVEX_4V;
2772}
2773defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2774 VR512, memopv16i32, i512mem>, EVEX_V512,
2775 EVEX_CD8<32, CD8VF>;
2776defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2777 VR512, memopv8i64, i512mem>, EVEX_V512,
2778 VEX_W, EVEX_CD8<64, CD8VF>;
2779defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2780 VR512, memopv16i32, i512mem>, EVEX_V512,
2781 EVEX_CD8<32, CD8VF>;
2782defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2783 VR512, memopv8i64, i512mem>, EVEX_V512,
2784 VEX_W, EVEX_CD8<64, CD8VF>;
2785//===----------------------------------------------------------------------===//
2786// AVX-512 - PSHUFD
2787//
2788
2789multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2790 SDNode OpNode, PatFrag mem_frag,
2791 X86MemOperand x86memop, ValueType OpVT> {
2792 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2793 (ins RC:$src1, i8imm:$src2),
2794 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002795 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002796 [(set RC:$dst,
2797 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2798 EVEX;
2799 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2800 (ins x86memop:$src1, i8imm:$src2),
2801 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002802 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002803 [(set RC:$dst,
2804 (OpVT (OpNode (mem_frag addr:$src1),
2805 (i8 imm:$src2))))]>, EVEX;
2806}
2807
2808defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002809 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002810
2811let ExeDomain = SSEPackedSingle in
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002812defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilpi,
Craig Topperae11aed2014-01-14 07:41:20 +00002813 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002814 EVEX_CD8<32, CD8VF>;
2815let ExeDomain = SSEPackedDouble in
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002816defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilpi,
Craig Topperae11aed2014-01-14 07:41:20 +00002817 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002818 VEX_W, EVEX_CD8<32, CD8VF>;
2819
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002820def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002821 (VPERMILPSZri VR512:$src1, imm:$imm)>;
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002822def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002823 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2824
2825//===----------------------------------------------------------------------===//
2826// AVX-512 Logical Instructions
2827//===----------------------------------------------------------------------===//
2828
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002829defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002830 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2831 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002832defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002833 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2834 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002835defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002836 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2837 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002838defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002839 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2840 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002841defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002842 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2843 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002844defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002845 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2846 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002847defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002848 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2849 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002850defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2851 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2852 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002853
2854//===----------------------------------------------------------------------===//
2855// AVX-512 FP arithmetic
2856//===----------------------------------------------------------------------===//
2857
2858multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2859 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002860 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002861 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2862 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002863 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002864 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2865 EVEX_CD8<64, CD8VT1>;
2866}
2867
2868let isCommutable = 1 in {
2869defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2870defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2871defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2872defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2873}
2874let isCommutable = 0 in {
2875defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2876defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2877}
2878
2879multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002880 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002881 RegisterClass RC, ValueType vt,
2882 X86MemOperand x86memop, PatFrag mem_frag,
2883 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2884 string BrdcstStr,
2885 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002886 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002887 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002888 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002889 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002890 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002891
2892 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2893 !strconcat(OpcodeStr,
2894 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2895 [], itins.rr, d>, EVEX_4V, EVEX_K;
2896
2897 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2898 !strconcat(OpcodeStr,
2899 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2900 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2901 }
2902
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002903 let mayLoad = 1 in {
2904 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002905 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002906 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002907 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002908
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002909 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2910 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002911 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002912 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002913 [(set RC:$dst, (OpNode RC:$src1,
2914 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002915 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002916
2917 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2918 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2919 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2920 [], itins.rm, d>, EVEX_4V, EVEX_K;
2921
2922 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2923 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2924 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2925 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2926
2927 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2928 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2929 " \t{${src2}", BrdcstStr,
2930 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2931 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2932
2933 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2934 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2935 " \t{${src2}", BrdcstStr,
2936 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2937 BrdcstStr, "}"),
2938 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2939 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002940}
2941
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002942defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002943 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002944 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002945
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002946defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002947 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2948 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002949 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002950
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002951defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002952 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002953 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002954defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002955 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2956 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002957 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002958
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002959defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002960 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2961 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002962 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002963defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002964 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2965 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002966 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002967
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002968defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002969 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2970 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002971 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002972defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002973 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2974 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002975 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002976
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002977defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002978 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002979 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002980defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002981 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002982 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002983
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002984defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002985 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2986 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002987 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002988defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002989 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2990 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002991 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002992
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00002993def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2994 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2995 (i16 -1), FROUND_CURRENT)),
2996 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2997
2998def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2999 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3000 (i8 -1), FROUND_CURRENT)),
3001 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3002
3003def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3004 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3005 (i16 -1), FROUND_CURRENT)),
3006 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3007
3008def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3009 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3010 (i8 -1), FROUND_CURRENT)),
3011 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003012//===----------------------------------------------------------------------===//
3013// AVX-512 VPTESTM instructions
3014//===----------------------------------------------------------------------===//
3015
3016multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3017 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3018 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003019 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003020 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003021 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003022 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3023 SSEPackedInt>, EVEX_4V;
3024 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003025 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003026 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003027 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003028 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003029}
3030
3031defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003032 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003033 EVEX_CD8<32, CD8VF>;
3034defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003035 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003036 EVEX_CD8<64, CD8VF>;
3037
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003038let Predicates = [HasCDI] in {
3039defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3040 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3041 EVEX_CD8<32, CD8VF>;
3042defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003043 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003044 EVEX_CD8<64, CD8VF>;
3045}
3046
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003047def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3048 (v16i32 VR512:$src2), (i16 -1))),
3049 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3050
3051def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3052 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003053 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003054//===----------------------------------------------------------------------===//
3055// AVX-512 Shift instructions
3056//===----------------------------------------------------------------------===//
3057multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3058 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3059 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3060 RegisterClass KRC> {
3061 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003062 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003063 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00003064 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003065 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3066 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003067 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003068 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003069 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003070 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3071 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003072 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003073 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003074 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00003075 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003076 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003077 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003078 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003079 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003080 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3081}
3082
3083multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3084 RegisterClass RC, ValueType vt, ValueType SrcVT,
3085 PatFrag bc_frag, RegisterClass KRC> {
3086 // src2 is always 128-bit
3087 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3088 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003089 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003090 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3091 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3092 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3093 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3094 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003095 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003096 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3097 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3098 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003099 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003100 [(set RC:$dst, (vt (OpNode RC:$src1,
3101 (bc_frag (memopv2i64 addr:$src2)))))],
3102 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3103 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3104 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3105 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003106 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003107 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3108}
3109
3110defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3111 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3112 EVEX_V512, EVEX_CD8<32, CD8VF>;
3113defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3114 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3115 EVEX_CD8<32, CD8VQ>;
3116
3117defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3118 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3119 EVEX_CD8<64, CD8VF>, VEX_W;
3120defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3121 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3122 EVEX_CD8<64, CD8VQ>, VEX_W;
3123
3124defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3125 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3126 EVEX_CD8<32, CD8VF>;
3127defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3128 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3129 EVEX_CD8<32, CD8VQ>;
3130
3131defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3132 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3133 EVEX_CD8<64, CD8VF>, VEX_W;
3134defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3135 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3136 EVEX_CD8<64, CD8VQ>, VEX_W;
3137
3138defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3139 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3140 EVEX_V512, EVEX_CD8<32, CD8VF>;
3141defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3142 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3143 EVEX_CD8<32, CD8VQ>;
3144
3145defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3146 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3147 EVEX_CD8<64, CD8VF>, VEX_W;
3148defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3149 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3150 EVEX_CD8<64, CD8VQ>, VEX_W;
3151
3152//===-------------------------------------------------------------------===//
3153// Variable Bit Shifts
3154//===-------------------------------------------------------------------===//
3155multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3156 RegisterClass RC, ValueType vt,
3157 X86MemOperand x86memop, PatFrag mem_frag> {
3158 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3159 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003160 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003161 [(set RC:$dst,
3162 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3163 EVEX_4V;
3164 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3165 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003166 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003167 [(set RC:$dst,
3168 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3169 EVEX_4V;
3170}
3171
3172defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3173 i512mem, memopv16i32>, EVEX_V512,
3174 EVEX_CD8<32, CD8VF>;
3175defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3176 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3177 EVEX_CD8<64, CD8VF>;
3178defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3179 i512mem, memopv16i32>, EVEX_V512,
3180 EVEX_CD8<32, CD8VF>;
3181defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3182 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3183 EVEX_CD8<64, CD8VF>;
3184defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3185 i512mem, memopv16i32>, EVEX_V512,
3186 EVEX_CD8<32, CD8VF>;
3187defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3188 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3189 EVEX_CD8<64, CD8VF>;
3190
3191//===----------------------------------------------------------------------===//
3192// AVX-512 - MOVDDUP
3193//===----------------------------------------------------------------------===//
3194
3195multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3196 X86MemOperand x86memop, PatFrag memop_frag> {
3197def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003198 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003199 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3200def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003201 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003202 [(set RC:$dst,
3203 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3204}
3205
3206defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3207 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3208def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3209 (VMOVDDUPZrm addr:$src)>;
3210
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003211//===---------------------------------------------------------------------===//
3212// Replicate Single FP - MOVSHDUP and MOVSLDUP
3213//===---------------------------------------------------------------------===//
3214multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3215 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3216 X86MemOperand x86memop> {
3217 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003218 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003219 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3220 let mayLoad = 1 in
3221 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003222 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003223 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3224}
3225
3226defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3227 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3228 EVEX_CD8<32, CD8VF>;
3229defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3230 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3231 EVEX_CD8<32, CD8VF>;
3232
3233def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3234def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3235 (VMOVSHDUPZrm addr:$src)>;
3236def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3237def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3238 (VMOVSLDUPZrm addr:$src)>;
3239
3240//===----------------------------------------------------------------------===//
3241// Move Low to High and High to Low packed FP Instructions
3242//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003243def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3244 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003245 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003246 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3247 IIC_SSE_MOV_LH>, EVEX_4V;
3248def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3249 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003250 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003251 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3252 IIC_SSE_MOV_LH>, EVEX_4V;
3253
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003254let Predicates = [HasAVX512] in {
3255 // MOVLHPS patterns
3256 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3257 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3258 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3259 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003260
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003261 // MOVHLPS patterns
3262 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3263 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3264}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003265
3266//===----------------------------------------------------------------------===//
3267// FMA - Fused Multiply Operations
3268//
3269let Constraints = "$src1 = $dst" in {
3270multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3271 RegisterClass RC, X86MemOperand x86memop,
3272 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003273 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3274 RegisterClass KRC> {
3275 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3276 (ins RC:$src2, RC:$src3),
3277 OpcodeStr, "$src3, $src2", "$src2, $src3",
3278 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3279 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003280
3281 let mayLoad = 1 in
3282 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3283 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003284 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003285 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3286 (mem_frag addr:$src3))))]>;
3287 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3288 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003289 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003290 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3291 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3292 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3293}
3294} // Constraints = "$src1 = $dst"
3295
3296let ExeDomain = SSEPackedSingle in {
3297 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3298 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003299 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003300 EVEX_CD8<32, CD8VF>;
3301 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3302 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003303 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003304 EVEX_CD8<32, CD8VF>;
3305 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3306 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003307 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003308 EVEX_V512, EVEX_CD8<32, CD8VF>;
3309 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3310 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003311 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003312 EVEX_V512, EVEX_CD8<32, CD8VF>;
3313 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3314 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003315 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003316 EVEX_CD8<32, CD8VF>;
3317 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3318 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003319 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003320 EVEX_CD8<32, CD8VF>;
3321}
3322let ExeDomain = SSEPackedDouble in {
3323 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3324 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003325 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003326 VEX_W, EVEX_CD8<64, CD8VF>;
3327 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3328 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003329 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003330 EVEX_CD8<64, CD8VF>;
3331 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3332 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003333 X86Fmaddsub, v8f64, VK8WM>,
3334 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003335 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3336 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003337 X86Fmsubadd, v8f64, VK8WM>,
3338 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003339 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3340 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003341 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003342 EVEX_CD8<64, CD8VF>;
3343 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3344 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003345 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003346 EVEX_CD8<64, CD8VF>;
3347}
3348
3349let Constraints = "$src1 = $dst" in {
3350multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3351 RegisterClass RC, X86MemOperand x86memop,
3352 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3353 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3354 let mayLoad = 1 in
3355 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3356 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003357 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003358 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3359 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3360 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003361 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003362 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3363 [(set RC:$dst, (OpNode RC:$src1,
3364 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3365}
3366} // Constraints = "$src1 = $dst"
3367
3368
3369let ExeDomain = SSEPackedSingle in {
3370 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3371 memopv16f32, f32mem, loadf32, "{1to16}",
3372 X86Fmadd, v16f32>, EVEX_V512,
3373 EVEX_CD8<32, CD8VF>;
3374 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3375 memopv16f32, f32mem, loadf32, "{1to16}",
3376 X86Fmsub, v16f32>, EVEX_V512,
3377 EVEX_CD8<32, CD8VF>;
3378 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3379 memopv16f32, f32mem, loadf32, "{1to16}",
3380 X86Fmaddsub, v16f32>,
3381 EVEX_V512, EVEX_CD8<32, CD8VF>;
3382 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3383 memopv16f32, f32mem, loadf32, "{1to16}",
3384 X86Fmsubadd, v16f32>,
3385 EVEX_V512, EVEX_CD8<32, CD8VF>;
3386 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3387 memopv16f32, f32mem, loadf32, "{1to16}",
3388 X86Fnmadd, v16f32>, EVEX_V512,
3389 EVEX_CD8<32, CD8VF>;
3390 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3391 memopv16f32, f32mem, loadf32, "{1to16}",
3392 X86Fnmsub, v16f32>, EVEX_V512,
3393 EVEX_CD8<32, CD8VF>;
3394}
3395let ExeDomain = SSEPackedDouble in {
3396 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3397 memopv8f64, f64mem, loadf64, "{1to8}",
3398 X86Fmadd, v8f64>, EVEX_V512,
3399 VEX_W, EVEX_CD8<64, CD8VF>;
3400 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3401 memopv8f64, f64mem, loadf64, "{1to8}",
3402 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3403 EVEX_CD8<64, CD8VF>;
3404 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3405 memopv8f64, f64mem, loadf64, "{1to8}",
3406 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3407 EVEX_CD8<64, CD8VF>;
3408 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3409 memopv8f64, f64mem, loadf64, "{1to8}",
3410 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3411 EVEX_CD8<64, CD8VF>;
3412 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3413 memopv8f64, f64mem, loadf64, "{1to8}",
3414 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3415 EVEX_CD8<64, CD8VF>;
3416 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3417 memopv8f64, f64mem, loadf64, "{1to8}",
3418 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3419 EVEX_CD8<64, CD8VF>;
3420}
3421
3422// Scalar FMA
3423let Constraints = "$src1 = $dst" in {
3424multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3425 RegisterClass RC, ValueType OpVT,
3426 X86MemOperand x86memop, Operand memop,
3427 PatFrag mem_frag> {
3428 let isCommutable = 1 in
3429 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3430 (ins RC:$src1, RC:$src2, RC:$src3),
3431 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003432 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003433 [(set RC:$dst,
3434 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3435 let mayLoad = 1 in
3436 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3437 (ins RC:$src1, RC:$src2, f128mem:$src3),
3438 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003439 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003440 [(set RC:$dst,
3441 (OpVT (OpNode RC:$src2, RC:$src1,
3442 (mem_frag addr:$src3))))]>;
3443}
3444
3445} // Constraints = "$src1 = $dst"
3446
Elena Demikhovskycf088092013-12-11 14:31:04 +00003447defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003448 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003449defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003450 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003451defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003452 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003453defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003454 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003455defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003456 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003457defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003458 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003459defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003460 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003461defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003462 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3463
3464//===----------------------------------------------------------------------===//
3465// AVX-512 Scalar convert from sign integer to float/double
3466//===----------------------------------------------------------------------===//
3467
3468multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3469 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003470let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003471 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003472 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003473 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003474 let mayLoad = 1 in
3475 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3476 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003477 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003478 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003479} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003480}
Andrew Trick15a47742013-10-09 05:11:10 +00003481let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003482defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003483 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003484defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003485 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003486defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003487 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003488defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003489 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3490
3491def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3492 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3493def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003494 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003495def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3496 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3497def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003498 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003499
3500def : Pat<(f32 (sint_to_fp GR32:$src)),
3501 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3502def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003503 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003504def : Pat<(f64 (sint_to_fp GR32:$src)),
3505 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3506def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003507 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3508
Elena Demikhovskycf088092013-12-11 14:31:04 +00003509defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003510 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003511defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003512 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003513defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003514 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003515defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003516 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3517
3518def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3519 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3520def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3521 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3522def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3523 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3524def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3525 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3526
3527def : Pat<(f32 (uint_to_fp GR32:$src)),
3528 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3529def : Pat<(f32 (uint_to_fp GR64:$src)),
3530 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3531def : Pat<(f64 (uint_to_fp GR32:$src)),
3532 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3533def : Pat<(f64 (uint_to_fp GR64:$src)),
3534 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003535}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003536
3537//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003538// AVX-512 Scalar convert from float/double to integer
3539//===----------------------------------------------------------------------===//
3540multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3541 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3542 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003543let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003544 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003545 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003546 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3547 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003548 let mayLoad = 1 in
3549 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003550 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003551 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003552} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003553}
3554let Predicates = [HasAVX512] in {
3555// Convert float/double to signed/unsigned int 32/64
3556defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003557 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003558 XS, EVEX_CD8<32, CD8VT1>;
3559defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003560 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003561 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3562defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003563 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003564 XS, EVEX_CD8<32, CD8VT1>;
3565defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3566 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003567 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003568 EVEX_CD8<32, CD8VT1>;
3569defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003570 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003571 XD, EVEX_CD8<64, CD8VT1>;
3572defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003573 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003574 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3575defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003576 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003577 XD, EVEX_CD8<64, CD8VT1>;
3578defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3579 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003580 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003581 EVEX_CD8<64, CD8VT1>;
3582
Craig Topper9dd48c82014-01-02 17:28:14 +00003583let isCodeGenOnly = 1 in {
3584 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3585 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3586 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3587 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3588 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3589 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3590 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3591 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3592 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3593 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3594 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3595 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003596
Craig Topper9dd48c82014-01-02 17:28:14 +00003597 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3598 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3599 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3600 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3601 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3602 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3603 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3604 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3605 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3606 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3607 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3608 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3609} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003610
3611// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003612let isCodeGenOnly = 1 in {
3613 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3614 ssmem, sse_load_f32, "cvttss2si">,
3615 XS, EVEX_CD8<32, CD8VT1>;
3616 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3617 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3618 "cvttss2si">, XS, VEX_W,
3619 EVEX_CD8<32, CD8VT1>;
3620 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3621 sdmem, sse_load_f64, "cvttsd2si">, XD,
3622 EVEX_CD8<64, CD8VT1>;
3623 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3624 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3625 "cvttsd2si">, XD, VEX_W,
3626 EVEX_CD8<64, CD8VT1>;
3627 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3628 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3629 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3630 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3631 int_x86_avx512_cvttss2usi64, ssmem,
3632 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3633 EVEX_CD8<32, CD8VT1>;
3634 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3635 int_x86_avx512_cvttsd2usi,
3636 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3637 EVEX_CD8<64, CD8VT1>;
3638 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3639 int_x86_avx512_cvttsd2usi64, sdmem,
3640 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3641 EVEX_CD8<64, CD8VT1>;
3642} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003643
3644multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3645 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3646 string asm> {
3647 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003648 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003649 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3650 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003651 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003652 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3653}
3654
3655defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003656 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003657 EVEX_CD8<32, CD8VT1>;
3658defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003659 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003660 EVEX_CD8<32, CD8VT1>;
3661defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003662 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003663 EVEX_CD8<32, CD8VT1>;
3664defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003665 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003666 EVEX_CD8<32, CD8VT1>;
3667defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003668 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003669 EVEX_CD8<64, CD8VT1>;
3670defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003671 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003672 EVEX_CD8<64, CD8VT1>;
3673defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003674 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003675 EVEX_CD8<64, CD8VT1>;
3676defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003677 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003678 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003679} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003680//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003681// AVX-512 Convert form float to double and back
3682//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003683let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003684def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3685 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003686 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003687 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3688let mayLoad = 1 in
3689def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3690 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003691 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003692 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3693 EVEX_CD8<32, CD8VT1>;
3694
3695// Convert scalar double to scalar single
3696def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3697 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003698 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003699 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3700let mayLoad = 1 in
3701def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3702 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003703 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003704 []>, EVEX_4V, VEX_LIG, VEX_W,
3705 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3706}
3707
3708def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3709 Requires<[HasAVX512]>;
3710def : Pat<(fextend (loadf32 addr:$src)),
3711 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3712
3713def : Pat<(extloadf32 addr:$src),
3714 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3715 Requires<[HasAVX512, OptForSize]>;
3716
3717def : Pat<(extloadf32 addr:$src),
3718 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3719 Requires<[HasAVX512, OptForSpeed]>;
3720
3721def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3722 Requires<[HasAVX512]>;
3723
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003724multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003725 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3726 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3727 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003728let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003729 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003730 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003731 [(set DstRC:$dst,
3732 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003733 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003734 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003735 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003736 let mayLoad = 1 in
3737 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003738 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003739 [(set DstRC:$dst,
3740 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003741} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003742}
3743
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003744multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003745 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3746 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3747 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003748let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003749 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003750 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003751 [(set DstRC:$dst,
3752 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3753 let mayLoad = 1 in
3754 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003755 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003756 [(set DstRC:$dst,
3757 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003758} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003759}
3760
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003761defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003762 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003763 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003764 EVEX_CD8<64, CD8VF>;
3765
3766defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3767 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003768 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003769 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003770def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3771 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003772
3773def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3774 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3775 (VCVTPD2PSZrr VR512:$src)>;
3776
3777def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3778 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3779 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003780
3781//===----------------------------------------------------------------------===//
3782// AVX-512 Vector convert from sign integer to float/double
3783//===----------------------------------------------------------------------===//
3784
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003785defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003786 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003787 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003788 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003789
3790defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3791 memopv4i64, i256mem, v8f64, v8i32,
3792 SSEPackedDouble>, EVEX_V512, XS,
3793 EVEX_CD8<32, CD8VH>;
3794
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003795defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003796 memopv16f32, f512mem, v16i32, v16f32,
3797 SSEPackedSingle>, EVEX_V512, XS,
3798 EVEX_CD8<32, CD8VF>;
3799
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003800defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003801 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003802 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003803 EVEX_CD8<64, CD8VF>;
3804
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003805defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003806 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003807 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003808 EVEX_CD8<32, CD8VF>;
3809
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003810// cvttps2udq (src, 0, mask-all-ones, sae-current)
3811def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3812 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3813 (VCVTTPS2UDQZrr VR512:$src)>;
3814
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003815defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003816 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003817 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003818 EVEX_CD8<64, CD8VF>;
3819
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003820// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3821def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3822 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3823 (VCVTTPD2UDQZrr VR512:$src)>;
3824
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003825defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3826 memopv4i64, f256mem, v8f64, v8i32,
3827 SSEPackedDouble>, EVEX_V512, XS,
3828 EVEX_CD8<32, CD8VH>;
3829
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003830defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003831 memopv16i32, f512mem, v16f32, v16i32,
3832 SSEPackedSingle>, EVEX_V512, XD,
3833 EVEX_CD8<32, CD8VF>;
3834
3835def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3836 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3837 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3838
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003839def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3840 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3841 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3842
3843def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3844 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3845 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3846
3847def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3848 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3849 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003850
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003851def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3852 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3853 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3854
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003855def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003856 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003857 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003858def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3859 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3860 (VCVTDQ2PDZrr VR256X:$src)>;
3861def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3862 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3863 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3864def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3865 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3866 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003867
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003868multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3869 RegisterClass DstRC, PatFrag mem_frag,
3870 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003871let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003872 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003873 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003874 [], d>, EVEX;
3875 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003876 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003877 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003878 let mayLoad = 1 in
3879 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003880 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003881 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003882} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003883}
3884
3885defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003886 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003887 EVEX_V512, EVEX_CD8<32, CD8VF>;
3888defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3889 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3890 EVEX_V512, EVEX_CD8<64, CD8VF>;
3891
3892def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3893 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3894 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3895
3896def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3897 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3898 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3899
3900defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3901 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003902 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003903defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3904 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003905 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003906
3907def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3908 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3909 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3910
3911def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3912 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3913 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003914
3915let Predicates = [HasAVX512] in {
3916 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3917 (VCVTPD2PSZrm addr:$src)>;
3918 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3919 (VCVTPS2PDZrm addr:$src)>;
3920}
3921
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003922//===----------------------------------------------------------------------===//
3923// Half precision conversion instructions
3924//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003925multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3926 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003927 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3928 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003929 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003930 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003931 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3932 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3933}
3934
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003935multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3936 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003937 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3938 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003939 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3940 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003941 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003942 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3943 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003944 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003945}
3946
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003947defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003948 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003949defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003950 EVEX_CD8<32, CD8VH>;
3951
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003952def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3953 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3954 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3955
3956def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3957 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3958 (VCVTPH2PSZrr VR256X:$src)>;
3959
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003960let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3961 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003962 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003963 EVEX_CD8<32, CD8VT1>;
3964 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00003965 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003966 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3967 let Pattern = []<dag> in {
3968 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00003969 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003970 EVEX_CD8<32, CD8VT1>;
3971 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00003972 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003973 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3974 }
Craig Topper9dd48c82014-01-02 17:28:14 +00003975 let isCodeGenOnly = 1 in {
3976 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003977 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003978 EVEX_CD8<32, CD8VT1>;
3979 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003980 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003981 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003982
Craig Topper9dd48c82014-01-02 17:28:14 +00003983 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003984 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003985 EVEX_CD8<32, CD8VT1>;
3986 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003987 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003988 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3989 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003990}
3991
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003992/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3993multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3994 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003995 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003996 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3997 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003998 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003999 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004000 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004001 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4002 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004003 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004004 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004005 }
4006}
4007}
4008
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004009defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4010 EVEX_CD8<32, CD8VT1>;
4011defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4012 VEX_W, EVEX_CD8<64, CD8VT1>;
4013defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4014 EVEX_CD8<32, CD8VT1>;
4015defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4016 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004017
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004018def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4019 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4020 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4021 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004022
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004023def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4024 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4025 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4026 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004027
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004028def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4029 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4030 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4031 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004032
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004033def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4034 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4035 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4036 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004037
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004038/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4039multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4040 RegisterClass RC, X86MemOperand x86memop,
4041 PatFrag mem_frag, ValueType OpVt> {
4042 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4043 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004044 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004045 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4046 EVEX;
4047 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004048 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004049 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4050 EVEX;
4051}
4052defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4053 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4054defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4055 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4056defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4057 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4058defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4059 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4060
4061def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4062 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4063 (VRSQRT14PSZr VR512:$src)>;
4064def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4065 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4066 (VRSQRT14PDZr VR512:$src)>;
4067
4068def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4069 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4070 (VRCP14PSZr VR512:$src)>;
4071def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4072 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4073 (VRCP14PDZr VR512:$src)>;
4074
4075/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4076multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4077 X86MemOperand x86memop> {
4078 let hasSideEffects = 0, Predicates = [HasERI] in {
4079 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4080 (ins RC:$src1, RC:$src2),
4081 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004082 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004083 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4084 (ins RC:$src1, RC:$src2),
4085 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004086 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004087 []>, EVEX_4V, EVEX_B;
4088 let mayLoad = 1 in {
4089 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4090 (ins RC:$src1, x86memop:$src2),
4091 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004092 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004093 }
4094}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004095}
4096
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004097defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4098 EVEX_CD8<32, CD8VT1>;
4099defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4100 VEX_W, EVEX_CD8<64, CD8VT1>;
4101defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4102 EVEX_CD8<32, CD8VT1>;
4103defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4104 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004105
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004106def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4107 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4108 FROUND_NO_EXC)),
4109 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4110 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4111
4112def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4113 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4114 FROUND_NO_EXC)),
4115 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4116 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4117
4118def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4119 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4120 FROUND_NO_EXC)),
4121 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4122 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4123
4124def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4125 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4126 FROUND_NO_EXC)),
4127 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4128 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4129
4130/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4131multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4132 RegisterClass RC, X86MemOperand x86memop> {
4133 let hasSideEffects = 0, Predicates = [HasERI] in {
4134 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4135 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004136 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004137 []>, EVEX;
4138 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4139 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004140 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004141 []>, EVEX, EVEX_B;
4142 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004143 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004144 []>, EVEX;
4145 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004146}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004147defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4148 EVEX_V512, EVEX_CD8<32, CD8VF>;
4149defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4150 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4151defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4152 EVEX_V512, EVEX_CD8<32, CD8VF>;
4153defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4154 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4155
4156def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4157 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4158 (VRSQRT28PSZrb VR512:$src)>;
4159def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4160 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4161 (VRSQRT28PDZrb VR512:$src)>;
4162
4163def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4164 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4165 (VRCP28PSZrb VR512:$src)>;
4166def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4167 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4168 (VRCP28PDZrb VR512:$src)>;
4169
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004170multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004171 OpndItins itins_s, OpndItins itins_d> {
4172 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004173 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004174 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4175 EVEX, EVEX_V512;
4176
4177 let mayLoad = 1 in
4178 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004179 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004180 [(set VR512:$dst,
4181 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4182 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4183
4184 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004185 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004186 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4187 EVEX, EVEX_V512;
4188
4189 let mayLoad = 1 in
4190 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004191 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004192 [(set VR512:$dst, (OpNode
4193 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4194 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4195
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004196}
4197
4198multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4199 Intrinsic F32Int, Intrinsic F64Int,
4200 OpndItins itins_s, OpndItins itins_d> {
4201 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4202 (ins FR32X:$src1, FR32X:$src2),
4203 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004204 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004205 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00004206 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004207 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4208 (ins VR128X:$src1, VR128X:$src2),
4209 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004210 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004211 [(set VR128X:$dst,
4212 (F32Int VR128X:$src1, VR128X:$src2))],
4213 itins_s.rr>, XS, EVEX_4V;
4214 let mayLoad = 1 in {
4215 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4216 (ins FR32X:$src1, f32mem:$src2),
4217 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004218 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004219 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004220 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004221 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4222 (ins VR128X:$src1, ssmem:$src2),
4223 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004224 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004225 [(set VR128X:$dst,
4226 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4227 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4228 }
4229 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4230 (ins FR64X:$src1, FR64X:$src2),
4231 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004232 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004233 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004234 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004235 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4236 (ins VR128X:$src1, VR128X:$src2),
4237 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004238 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004239 [(set VR128X:$dst,
4240 (F64Int VR128X:$src1, VR128X:$src2))],
4241 itins_s.rr>, XD, EVEX_4V, VEX_W;
4242 let mayLoad = 1 in {
4243 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4244 (ins FR64X:$src1, f64mem:$src2),
4245 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004246 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004247 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004248 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004249 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4250 (ins VR128X:$src1, sdmem:$src2),
4251 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004252 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004253 [(set VR128X:$dst,
4254 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4255 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4256 }
4257}
4258
4259
4260defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4261 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4262 SSE_SQRTSS, SSE_SQRTSD>,
4263 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004264 SSE_SQRTPS, SSE_SQRTPD>;
4265
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004266let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004267 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4268 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4269 (VSQRTPSZrr VR512:$src1)>;
4270 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4271 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4272 (VSQRTPDZrr VR512:$src1)>;
4273
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004274 def : Pat<(f32 (fsqrt FR32X:$src)),
4275 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4276 def : Pat<(f32 (fsqrt (load addr:$src))),
4277 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4278 Requires<[OptForSize]>;
4279 def : Pat<(f64 (fsqrt FR64X:$src)),
4280 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4281 def : Pat<(f64 (fsqrt (load addr:$src))),
4282 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4283 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004284
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004285 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004286 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004287 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004288 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004289 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004290
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004291 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004292 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004293 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004294 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004295 Requires<[OptForSize]>;
4296
4297 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4298 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4299 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4300 VR128X)>;
4301 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4302 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4303
4304 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4305 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4306 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4307 VR128X)>;
4308 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4309 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4310}
4311
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004312
4313multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4314 X86MemOperand x86memop, RegisterClass RC,
4315 PatFrag mem_frag32, PatFrag mem_frag64,
4316 Intrinsic V4F32Int, Intrinsic V2F64Int,
4317 CD8VForm VForm> {
4318let ExeDomain = SSEPackedSingle in {
4319 // Intrinsic operation, reg.
4320 // Vector intrinsic operation, reg
4321 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4322 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4323 !strconcat(OpcodeStr,
4324 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4325 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4326
4327 // Vector intrinsic operation, mem
4328 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4329 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4330 !strconcat(OpcodeStr,
4331 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4332 [(set RC:$dst,
4333 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4334 EVEX_CD8<32, VForm>;
4335} // ExeDomain = SSEPackedSingle
4336
4337let ExeDomain = SSEPackedDouble in {
4338 // Vector intrinsic operation, reg
4339 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4340 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4341 !strconcat(OpcodeStr,
4342 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4343 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4344
4345 // Vector intrinsic operation, mem
4346 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4347 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4348 !strconcat(OpcodeStr,
4349 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4350 [(set RC:$dst,
4351 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4352 EVEX_CD8<64, VForm>;
4353} // ExeDomain = SSEPackedDouble
4354}
4355
4356multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4357 string OpcodeStr,
4358 Intrinsic F32Int,
4359 Intrinsic F64Int> {
4360let ExeDomain = GenericDomain in {
4361 // Operation, reg.
4362 let hasSideEffects = 0 in
4363 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4364 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4365 !strconcat(OpcodeStr,
4366 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4367 []>;
4368
4369 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004370 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004371 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4372 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4373 !strconcat(OpcodeStr,
4374 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4375 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4376
4377 // Intrinsic operation, mem.
4378 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4379 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4380 !strconcat(OpcodeStr,
4381 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4382 [(set VR128X:$dst, (F32Int VR128X:$src1,
4383 sse_load_f32:$src2, imm:$src3))]>,
4384 EVEX_CD8<32, CD8VT1>;
4385
4386 // Operation, reg.
4387 let hasSideEffects = 0 in
4388 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4389 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4390 !strconcat(OpcodeStr,
4391 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4392 []>, VEX_W;
4393
4394 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004395 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004396 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4397 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4398 !strconcat(OpcodeStr,
4399 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4400 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4401 VEX_W;
4402
4403 // Intrinsic operation, mem.
4404 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4405 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4406 !strconcat(OpcodeStr,
4407 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4408 [(set VR128X:$dst,
4409 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4410 VEX_W, EVEX_CD8<64, CD8VT1>;
4411} // ExeDomain = GenericDomain
4412}
4413
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004414multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4415 X86MemOperand x86memop, RegisterClass RC,
4416 PatFrag mem_frag, Domain d> {
4417let ExeDomain = d in {
4418 // Intrinsic operation, reg.
4419 // Vector intrinsic operation, reg
4420 def r : AVX512AIi8<opc, MRMSrcReg,
4421 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4422 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004423 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004424 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004425
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004426 // Vector intrinsic operation, mem
4427 def m : AVX512AIi8<opc, MRMSrcMem,
4428 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4429 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004430 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004431 []>, EVEX;
4432} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004433}
4434
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004435
4436defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4437 memopv16f32, SSEPackedSingle>, EVEX_V512,
4438 EVEX_CD8<32, CD8VF>;
4439
4440def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004441 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004442 FROUND_CURRENT)),
4443 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4444
4445
4446defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4447 memopv8f64, SSEPackedDouble>, EVEX_V512,
4448 VEX_W, EVEX_CD8<64, CD8VF>;
4449
4450def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004451 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004452 FROUND_CURRENT)),
4453 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4454
4455multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4456 Operand x86memop, RegisterClass RC, Domain d> {
4457let ExeDomain = d in {
4458 def r : AVX512AIi8<opc, MRMSrcReg,
4459 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4460 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004461 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004462 []>, EVEX_4V;
4463
4464 def m : AVX512AIi8<opc, MRMSrcMem,
4465 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4466 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004467 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004468 []>, EVEX_4V;
4469} // ExeDomain
4470}
4471
4472defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4473 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4474
4475defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4476 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4477
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004478def : Pat<(ffloor FR32X:$src),
4479 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4480def : Pat<(f64 (ffloor FR64X:$src)),
4481 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4482def : Pat<(f32 (fnearbyint FR32X:$src)),
4483 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4484def : Pat<(f64 (fnearbyint FR64X:$src)),
4485 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4486def : Pat<(f32 (fceil FR32X:$src)),
4487 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4488def : Pat<(f64 (fceil FR64X:$src)),
4489 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4490def : Pat<(f32 (frint FR32X:$src)),
4491 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4492def : Pat<(f64 (frint FR64X:$src)),
4493 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4494def : Pat<(f32 (ftrunc FR32X:$src)),
4495 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4496def : Pat<(f64 (ftrunc FR64X:$src)),
4497 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4498
4499def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004500 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004501def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004502 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004503def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004504 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004505def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004506 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004507def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004508 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004509
4510def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004511 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004512def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004513 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004514def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004515 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004516def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004517 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004518def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004519 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004520
4521//-------------------------------------------------
4522// Integer truncate and extend operations
4523//-------------------------------------------------
4524
4525multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4526 RegisterClass dstRC, RegisterClass srcRC,
4527 RegisterClass KRC, X86MemOperand x86memop> {
4528 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4529 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004530 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004531 []>, EVEX;
4532
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004533 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4534 (ins KRC:$mask, srcRC:$src),
4535 !strconcat(OpcodeStr,
4536 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4537 []>, EVEX, EVEX_K;
4538
4539 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004540 (ins KRC:$mask, srcRC:$src),
4541 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004542 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004543 []>, EVEX, EVEX_KZ;
4544
4545 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004546 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004547 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004548
4549 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4550 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4551 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4552 []>, EVEX, EVEX_K;
4553
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004554}
4555defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4556 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4557defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4558 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4559defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4560 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4561defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4562 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4563defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4564 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4565defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4566 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4567defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4568 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4569defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4570 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4571defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4572 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4573defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4574 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4575defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4576 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4577defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4578 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4579defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4580 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4581defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4582 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4583defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4584 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4585
4586def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4587def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4588def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4589def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4590def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4591
4592def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004593 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004594def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004595 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004596def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004597 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004598def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004599 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004600
4601
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004602multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4603 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4604 PatFrag mem_frag, X86MemOperand x86memop,
4605 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004606
4607 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4608 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004609 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004610 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004611
4612 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4613 (ins KRC:$mask, SrcRC:$src),
4614 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4615 []>, EVEX, EVEX_K;
4616
4617 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4618 (ins KRC:$mask, SrcRC:$src),
4619 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4620 []>, EVEX, EVEX_KZ;
4621
4622 let mayLoad = 1 in {
4623 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004624 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004625 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004626 [(set DstRC:$dst,
4627 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4628 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004629
4630 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4631 (ins KRC:$mask, x86memop:$src),
4632 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4633 []>,
4634 EVEX, EVEX_K;
4635
4636 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4637 (ins KRC:$mask, x86memop:$src),
4638 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4639 []>,
4640 EVEX, EVEX_KZ;
4641 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004642}
4643
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004644defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004645 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4646 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004647defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004648 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4649 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004650defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004651 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4652 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004653defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004654 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4655 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004656defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004657 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4658 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004659
4660defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004661 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4662 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004663defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004664 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4665 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004666defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004667 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4668 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004669defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004670 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4671 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004672defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004673 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4674 EVEX_CD8<32, CD8VH>;
4675
4676//===----------------------------------------------------------------------===//
4677// GATHER - SCATTER Operations
4678
4679multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4680 RegisterClass RC, X86MemOperand memop> {
4681let mayLoad = 1,
4682 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4683 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4684 (ins RC:$src1, KRC:$mask, memop:$src2),
4685 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004686 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004687 []>, EVEX, EVEX_K;
4688}
Cameron McInally45325962014-03-26 13:50:50 +00004689
4690let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004691defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4692 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004693defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4694 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004695}
4696
4697let ExeDomain = SSEPackedSingle in {
4698defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4699 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004700defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4701 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004702}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004703
4704defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4705 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4706defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4707 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4708
4709defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4710 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4711defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4712 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4713
4714multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4715 RegisterClass RC, X86MemOperand memop> {
4716let mayStore = 1, Constraints = "$mask = $mask_wb" in
4717 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4718 (ins memop:$dst, KRC:$mask, RC:$src2),
4719 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004720 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004721 []>, EVEX, EVEX_K;
4722}
4723
Cameron McInally45325962014-03-26 13:50:50 +00004724let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004725defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4726 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004727defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4728 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004729}
4730
4731let ExeDomain = SSEPackedSingle in {
4732defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4733 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004734defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4735 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004736}
4737
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004738defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4739 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4740defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4741 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4742
4743defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4744 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4745defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4746 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4747
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004748// prefetch
4749multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4750 RegisterClass KRC, X86MemOperand memop> {
4751 let Predicates = [HasPFI], hasSideEffects = 1 in
4752 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4753 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4754 []>, EVEX, EVEX_K;
4755}
4756
4757defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4758 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4759
4760defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4761 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4762
4763defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4764 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4765
4766defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4767 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4768
4769defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4770 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4771
4772defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4773 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4774
4775defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4776 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4777
4778defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4779 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4780
4781defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4782 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4783
4784defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4785 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4786
4787defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4788 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4789
4790defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4791 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4792
4793defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4794 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4795
4796defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4797 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4798
4799defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4800 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4801
4802defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4803 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004804//===----------------------------------------------------------------------===//
4805// VSHUFPS - VSHUFPD Operations
4806
4807multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4808 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4809 Domain d> {
4810 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4811 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4812 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004813 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004814 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4815 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004816 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004817 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4818 (ins RC:$src1, RC:$src2, i8imm:$src3),
4819 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004820 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004821 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4822 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004823 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004824}
4825
4826defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004827 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004828defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004829 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004830
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004831def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4832 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4833def : Pat<(v16i32 (X86Shufp VR512:$src1,
4834 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4835 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4836
4837def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4838 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4839def : Pat<(v8i64 (X86Shufp VR512:$src1,
4840 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4841 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004842
Adam Nemet5ed17da2014-08-21 19:50:07 +00004843multiclass avx512_valign<X86VectorVTInfo _> {
4844 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4845 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4846 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004847 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004848 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4849 (i8 imm:$src3))),
4850 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004851 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004852
Adam Nemetf92139d2014-08-05 17:22:50 +00004853 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004854 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4855 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004856
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004857 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004858 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4859 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4860 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004861 " \t{$src3, $src2, $src1, $dst|"
4862 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004863 []>, EVEX_4V;
4864}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004865defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4866defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004867
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004868// Helper fragments to match sext vXi1 to vXiY.
4869def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4870def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4871
4872multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4873 RegisterClass KRC, RegisterClass RC,
4874 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4875 string BrdcstStr> {
4876 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4877 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4878 []>, EVEX;
4879 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4880 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4881 []>, EVEX, EVEX_K;
4882 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4883 !strconcat(OpcodeStr,
4884 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4885 []>, EVEX, EVEX_KZ;
4886 let mayLoad = 1 in {
4887 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4888 (ins x86memop:$src),
4889 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4890 []>, EVEX;
4891 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4892 (ins KRC:$mask, x86memop:$src),
4893 !strconcat(OpcodeStr,
4894 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4895 []>, EVEX, EVEX_K;
4896 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4897 (ins KRC:$mask, x86memop:$src),
4898 !strconcat(OpcodeStr,
4899 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4900 []>, EVEX, EVEX_KZ;
4901 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4902 (ins x86scalar_mop:$src),
4903 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4904 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4905 []>, EVEX, EVEX_B;
4906 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4907 (ins KRC:$mask, x86scalar_mop:$src),
4908 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4909 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4910 []>, EVEX, EVEX_B, EVEX_K;
4911 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4912 (ins KRC:$mask, x86scalar_mop:$src),
4913 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4914 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4915 BrdcstStr, "}"),
4916 []>, EVEX, EVEX_B, EVEX_KZ;
4917 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004918}
4919
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004920defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4921 i512mem, i32mem, "{1to16}">, EVEX_V512,
4922 EVEX_CD8<32, CD8VF>;
4923defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4924 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4925 EVEX_CD8<64, CD8VF>;
4926
4927def : Pat<(xor
4928 (bc_v16i32 (v16i1sextv16i32)),
4929 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4930 (VPABSDZrr VR512:$src)>;
4931def : Pat<(xor
4932 (bc_v8i64 (v8i1sextv8i64)),
4933 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4934 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004935
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004936def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4937 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004938 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004939def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4940 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004941 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004942
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004943multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004944 RegisterClass RC, RegisterClass KRC,
4945 X86MemOperand x86memop,
4946 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004947 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4948 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004949 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004950 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004951 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4952 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004953 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004954 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004955 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4956 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004957 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004958 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4959 []>, EVEX, EVEX_B;
4960 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4961 (ins KRC:$mask, RC:$src),
4962 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004963 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004964 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004965 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4966 (ins KRC:$mask, x86memop:$src),
4967 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004968 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004969 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004970 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4971 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004972 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004973 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4974 BrdcstStr, "}"),
4975 []>, EVEX, EVEX_KZ, EVEX_B;
4976
4977 let Constraints = "$src1 = $dst" in {
4978 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4979 (ins RC:$src1, KRC:$mask, RC:$src2),
4980 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004981 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004982 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004983 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4984 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4985 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004986 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004987 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004988 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4989 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004990 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004991 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4992 []>, EVEX, EVEX_K, EVEX_B;
4993 }
4994}
4995
4996let Predicates = [HasCDI] in {
4997defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004998 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004999 EVEX_V512, EVEX_CD8<32, CD8VF>;
5000
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005001
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005002defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005003 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005004 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005005
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005006}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005007
5008def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5009 GR16:$mask),
5010 (VPCONFLICTDrrk VR512:$src1,
5011 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5012
5013def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5014 GR8:$mask),
5015 (VPCONFLICTQrrk VR512:$src1,
5016 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005017
Cameron McInally5d1b7b92014-06-11 12:54:45 +00005018let Predicates = [HasCDI] in {
5019defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5020 i512mem, i32mem, "{1to16}">,
5021 EVEX_V512, EVEX_CD8<32, CD8VF>;
5022
5023
5024defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5025 i512mem, i64mem, "{1to8}">,
5026 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5027
5028}
5029
5030def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5031 GR16:$mask),
5032 (VPLZCNTDrrk VR512:$src1,
5033 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5034
5035def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5036 GR8:$mask),
5037 (VPLZCNTQrrk VR512:$src1,
5038 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5039
Cameron McInally0d0489c2014-06-16 14:12:28 +00005040def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5041 (VPLZCNTDrm addr:$src)>;
5042def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5043 (VPLZCNTDrr VR512:$src)>;
5044def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5045 (VPLZCNTQrm addr:$src)>;
5046def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5047 (VPLZCNTQrr VR512:$src)>;
5048
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005049def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5050def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5051def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00005052
5053def : Pat<(store VK1:$src, addr:$dst),
5054 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5055
5056def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5057 (truncstore node:$val, node:$ptr), [{
5058 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5059}]>;
5060
5061def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5062 (MOV8mr addr:$dst, GR8:$src)>;
5063