blob: d932637214740f287542e25857a03746c8279db3 [file] [log] [blame]
Eric Christopher06b32cd2015-02-20 00:36:53 +00001//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
Adam Nemet5ed17da2014-08-21 19:50:07 +000016// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT). These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000020// The template is also used for scalar types, in this case numelts is 1.
Robert Khasanov4204c1a2014-12-12 14:21:30 +000021class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Adam Nemet5ed17da2014-08-21 19:50:07 +000022 string suffix = ""> {
23 RegisterClass RC = rc;
Robert Khasanov4204c1a2014-12-12 14:21:30 +000024 ValueType EltVT = eltvt;
Adam Nemet449b3f02014-10-15 23:42:09 +000025 int NumElts = numelts;
Adam Nemet5ed17da2014-08-21 19:50:07 +000026
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
Igor Bregerfca0a342016-01-28 13:19:25 +000033 // The mask VT.
Guy Blank548e22a2017-05-19 12:35:15 +000034 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
Simon Pilgrimb13961d2016-06-11 14:34:10 +000035
Adam Nemet5ed17da2014-08-21 19:50:07 +000036 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
38
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000039 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
Robert Khasanov2ea081d2014-08-25 14:49:34 +000046
Adam Nemet5ed17da2014-08-21 19:50:07 +000047 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000048 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000049
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000052 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000057
58 // Size of RC in bits, e.g. 512 for VR512.
59 int Size = VT.Size;
60
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000063 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
Ayman Musaf77219e2017-02-13 09:55:48 +000064 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
Robert Khasanov2ea081d2014-08-25 14:49:34 +000067
68 // Load patterns
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
Craig Toppera78b7682016-08-11 06:04:07 +000075 !if (!eq (Size, 512), "v8i64",
76 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000077
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
Craig Toppera78b7682016-08-11 06:04:07 +000079 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
83 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000084
Robert Khasanov2ea081d2014-08-25 14:49:34 +000085 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000086
Craig Topperd9fe6642017-02-21 04:26:10 +000087 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
91 ?));
92
Adam Nemet5ed17da2014-08-21 19:50:07 +000093 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000094 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
98 VTName,
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
101 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000102
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +0000103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
105 VTName,
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
108 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +0000111
Adam Nemet449b3f02014-10-15 23:42:09 +0000112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
Adam Nemet55536c62014-09-25 23:48:45 +0000117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
119
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122 SSEPackedInt));
Adam Nemet09377232014-10-08 23:25:31 +0000123
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +0000124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
Craig Topperabe80cc2016-08-28 06:06:28 +0000126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
Adam Nemet09377232014-10-08 23:25:31 +0000130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
Elena Demikhovskyd207f172015-03-03 15:03:35 +0000134
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000137}
138
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000139def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000141def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
Adam Nemet6bddb8c2014-09-29 22:54:41 +0000143def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000145
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000146// "x" in v32i8x_info means RC = VR256X
147def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000151def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000153
154def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000158def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000160
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +0000161// We map scalar types to the smallest (128-bit) vector type
162// with the appropriate element type. This allows to use the same masking logic.
Asaf Badouh2744d212015-09-20 14:31:19 +0000163def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000165def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
167
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000168class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
173}
174
175def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176 v16i8x_info>;
177def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178 v8i16x_info>;
179def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180 v4i32x_info>;
181def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182 v2i64x_info>;
Robert Khasanovaf318f72014-10-30 14:21:47 +0000183def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184 v4f32x_info>;
185def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186 v2f64x_info>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000187
Ayman Musa721d97f2017-06-27 12:08:37 +0000188class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189 ValueType _vt> {
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
192 ValueType KVT = _vt;
193}
194
Michael Zuckerman9e588312017-10-31 10:00:19 +0000195def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
Ayman Musa721d97f2017-06-27 12:08:37 +0000196def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000203// This multiclass generates the masking variants from the non-masking
204// variant. It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
Adam Nemet34801422014-10-08 23:25:39 +0000207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208 dag Outs,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210 string OpcodeStr,
211 string AttSrcAsm, string IntelSrcAsm,
212 list<dag> Pattern,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000215 InstrItinClass itin,
Adam Nemet34801422014-10-08 23:25:39 +0000216 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
Craig Topper9d2cab72016-01-11 01:03:40 +0000222 "$dst, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000223 Pattern, itin>;
224
225 // Prefer over VMOV*rrk Pat<>
Craig Topper63801df2017-02-19 21:44:35 +0000226 let isCommutable = IsKCommutable in
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000230 MaskingPattern, itin>,
231 EVEX_K {
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000234 }
235
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
Craig Topper63801df2017-02-19 21:44:35 +0000238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000242 ZeroMaskingPattern,
243 itin>,
244 EVEX_KZ;
245}
246
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000247
Adam Nemet34801422014-10-08 23:25:39 +0000248// Common base class of AVX512_maskable and AVX512_maskable_3src.
249multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250 dag Outs,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252 string OpcodeStr,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000255 InstrItinClass itin,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000256 SDNode Select = vselect,
Adam Nemet34801422014-10-08 23:25:39 +0000257 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000266 itin, MaskingConstraint, IsCommutable,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000267 IsKCommutable>;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000268
Adam Nemet2e91ee52014-08-14 17:13:19 +0000269// This multiclass generates the unconditional/non-masking, the masking and
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000270// the zero-masking variant of the vector instruction. In the masking case, the
Adam Nemet2e91ee52014-08-14 17:13:19 +0000271// perserved vector elements come from a new dummy input operand tied to $dst.
Craig Topper3a622a12017-08-17 15:40:25 +0000272// This version uses a separate dag for non-masking and masking.
273multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000277 InstrItinClass itin,
Craig Topper3a622a12017-08-17 15:40:25 +0000278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
285 [(set _.RC:$dst,
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287 [(set _.RC:$dst,
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
Craig Topper3a622a12017-08-17 15:40:25 +0000290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the vector instruction. In the masking case, the
293// perserved vector elements come from a new dummy input operand tied to $dst.
Adam Nemet34801422014-10-08 23:25:39 +0000294multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000297 dag RHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000298 InstrItinClass itin,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
Adam Nemet34801422014-10-08 23:25:39 +0000301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000307
308// This multiclass generates the unconditional/non-masking, the masking and
309// the zero-masking variant of the scalar instruction.
310multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000313 dag RHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000314 InstrItinClass itin,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000315 bit IsCommutable = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000318
Adam Nemet34801422014-10-08 23:25:39 +0000319// Similar to AVX512_maskable but in this case one of the source operands
Adam Nemet2e91ee52014-08-14 17:13:19 +0000320// ($src1) is already tied to $dst so we just use that for the preserved
321// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
322// $src1.
Adam Nemet34801422014-10-08 23:25:39 +0000323multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000326 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000327 bit IsCommutable = 0,
Craig Topper1aa49ca2017-09-01 07:58:14 +0000328 bit IsKCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000329 SDNode Select = vselect,
330 bit MaskOnly = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
Craig Topperb16598d2017-09-01 07:58:16 +0000335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000339
Igor Breger15820b02015-07-01 13:24:28 +0000340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000343 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000344 bit IsCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000345 bit IsKCommutable = 0,
346 bit MaskOnly = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
Craig Topperb16598d2017-09-01 07:58:16 +0000349 X86selects, MaskOnly>;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000350
Adam Nemet34801422014-10-08 23:25:39 +0000351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352 dag Outs, dag Ins,
353 string OpcodeStr,
354 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim569e53b2017-12-03 21:43:54 +0000355 list<dag> Pattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000356 InstrItinClass itin> :
Adam Nemet34801422014-10-08 23:25:39 +0000357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000361 itin, "$src0 = $dst">;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000362
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000363
364// Instruction with mask that puts result in mask register,
365// like "compare" and "vptest"
366multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367 dag Outs,
368 dag Ins, dag MaskingIns,
369 string OpcodeStr,
370 string AttSrcAsm, string IntelSrcAsm,
371 list<dag> Pattern,
Craig Topper225da2c2016-08-27 05:22:15 +0000372 list<dag> MaskingPattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000373 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000376 def NAME: AVX512<O, F, Outs, Ins,
Craig Topper156622a2016-01-11 00:44:56 +0000377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000379 Pattern, itin>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000380
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Craig Topper156622a2016-01-11 00:44:56 +0000382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000384 MaskingPattern, itin>, EVEX_K;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000385}
386
387multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388 dag Outs,
389 dag Ins, dag MaskingIns,
390 string OpcodeStr,
391 string AttSrcAsm, string IntelSrcAsm,
Craig Topper225da2c2016-08-27 05:22:15 +0000392 dag RHS, dag MaskingRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000393 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000394 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000399
400multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000403 dag RHS, InstrItinClass itin,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000404 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000409
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000410multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000412 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000413 InstrItinClass itin> :
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000417
Craig Topperabe80cc2016-08-28 06:06:28 +0000418// This multiclass generates the unconditional/non-masking, the masking and
419// the zero-masking variant of the vector instruction. In the masking case, the
420// perserved vector elements come from a new dummy input operand tied to $dst.
421multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000425 InstrItinClass itin,
Craig Topperabe80cc2016-08-28 06:06:28 +0000426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
432 [(set _.RC:$dst,
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434 [(set _.RC:$dst,
435 (Select _.KRCWM:$mask, MaskedRHS,
436 _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000437 itin, "$src0 = $dst", IsCommutable>;
Craig Topperabe80cc2016-08-28 06:06:28 +0000438
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000439
Craig Topper9d9251b2016-05-08 20:10:20 +0000440// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
Marina Yatsina6fc2aaa2018-01-22 10:05:23 +0000442// swizzled by ExecutionDomainFix to pxor.
Craig Topper9d9251b2016-05-08 20:10:20 +0000443// We set canFoldAsLoad because this can be converted to a constant-pool
444// load of an all-zeros value if folding it would be beneficial.
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000445let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper86748492016-07-11 05:36:41 +0000446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000447def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
Craig Topper9d9251b2016-05-08 20:10:20 +0000448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
Craig Topper516e14c2016-07-11 05:36:48 +0000449def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000451}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000452
Craig Topper6393afc2017-01-09 02:44:34 +0000453// Alias instructions that allow VPTERNLOG to be used with a mask to create
454// a mix of all ones and all zeros elements. This is done this way to force
455// the same register to be used as input for all three sources.
Simon Pilgrim26f106f2017-12-08 15:17:32 +0000456let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
Craig Topper6393afc2017-01-09 02:44:34 +0000457def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
467}
468
Craig Toppere5ce84a2016-05-08 21:33:53 +0000469let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Craig Toppere5ce84a2016-05-08 21:33:53 +0000471def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475}
476
Craig Topperadd9cc62016-12-18 06:23:14 +0000477// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478// This is expanded by ExpandPostRAPseudos.
479let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
Craig Topperadd9cc62016-12-18 06:23:14 +0000481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
485}
486
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000487//===----------------------------------------------------------------------===//
488// AVX-512 - VECTOR INSERT
489//
Craig Topper3a622a12017-08-17 15:40:25 +0000490
491// Supports two different pattern operators for mask and unmasked ops. Allows
492// null_frag to be passed for one.
493multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494 X86VectorVTInfo To,
495 SDPatternOperator vinsert_insert,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000496 SDPatternOperator vinsert_for_mask,
497 OpndItins itins> {
Craig Topperc228d792017-09-05 05:49:44 +0000498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
Craig Topper3a622a12017-08-17 15:40:25 +0000505 (iPTR imm)),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Topperc228d792017-09-05 05:49:44 +0000510 let mayLoad = 1 in
Craig Topper3a622a12017-08-17 15:40:25 +0000511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Craig Topper3a622a12017-08-17 15:40:25 +0000517 (iPTR imm)),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemet4e2ef472014-10-02 23:18:28 +0000523 }
Adam Nemet4285c1f2014-10-15 23:42:17 +0000524}
Adam Nemet4e2ef472014-10-02 23:18:28 +0000525
Craig Topper3a622a12017-08-17 15:40:25 +0000526// Passes the same pattern operator for masked and unmasked ops.
527multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000529 SDPatternOperator vinsert_insert,
530 OpndItins itins> :
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000532
Igor Breger0ede3cb2015-09-20 06:52:42 +0000533multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
Adam Nemet4285c1f2014-10-15 23:42:17 +0000537 def : Pat<(vinsert_insert:$ins
Igor Breger0ede3cb2015-09-20 06:52:42 +0000538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543 def : Pat<(vinsert_insert:$ins
544 (To.VT To.RC:$src1),
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
546 (iPTR imm)),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
550 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551}
552
Adam Nemetb1c3ef42014-10-15 23:42:04 +0000553multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000554 ValueType EltVT64, int Opcode256,
555 OpndItins itins> {
Igor Breger0ede3cb2015-09-20 06:52:42 +0000556
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000561 vinsert128_insert, itins>, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000562
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000566 vinsert128_insert, itins>, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000567
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000572
Craig Topper3a622a12017-08-17 15:40:25 +0000573 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000574 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000578 null_frag, vinsert128_insert, itins>,
579 VEX_W, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000580
Craig Topper3a622a12017-08-17 15:40:25 +0000581 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000582 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000586 null_frag, vinsert128_insert, itins>,
587 VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000588
Craig Topper3a622a12017-08-17 15:40:25 +0000589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000592 null_frag, vinsert256_insert, itins>,
593 EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000594 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595}
596
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000597// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598let Sched = WriteFShuffle256 in
599def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
601>;
602let Sched = WriteShuffle256 in
603def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605>;
606
607defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000609
Igor Breger0ede3cb2015-09-20 06:52:42 +0000610// Codegen pattern with the alternative types,
Craig Topper3a622a12017-08-17 15:40:25 +0000611// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000612defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000614defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000616
617defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000619defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000621
622defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000624defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000626
627// Codegen pattern with the alternative types insert VEC128 into VEC256
628defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632// Codegen pattern with the alternative types insert VEC128 into VEC512
633defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637// Codegen pattern with the alternative types insert VEC256 into VEC512
638defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
Craig Topperf7a19db2017-10-08 01:33:40 +0000643
644multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
648 list<Predicate> p> {
649let Predicates = p in {
650 def : Pat<(Cast.VT
651 (vselect Cast.KRCWM:$mask,
652 (bitconvert
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
655 (iPTR imm))),
656 Cast.RC:$src0)),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
660 def : Pat<(Cast.VT
661 (vselect Cast.KRCWM:$mask,
662 (bitconvert
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT
665 (bitconvert
666 (From.LdFrag addr:$src2))),
667 (iPTR imm))),
668 Cast.RC:$src0)),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673 def : Pat<(Cast.VT
674 (vselect Cast.KRCWM:$mask,
675 (bitconvert
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
678 (iPTR imm))),
679 Cast.ImmAllZerosV)),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
683 def : Pat<(Cast.VT
684 (vselect Cast.KRCWM:$mask,
685 (bitconvert
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
687 (From.VT
688 (bitconvert
689 (From.LdFrag addr:$src2))),
690 (iPTR imm))),
691 Cast.ImmAllZerosV)),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
695}
696}
697
698defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
730
731defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
749
750defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
775
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000776// vinsertps - insert f32 to XMM
Craig Topper43973152016-10-09 06:41:47 +0000777let ExeDomain = SSEPackedSingle in {
Craig Topper6189d3e2016-07-19 01:26:19 +0000778def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd255a622017-12-06 18:46:06 +0000781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
Craig Topper6189d3e2016-07-19 01:26:19 +0000783def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
Simon Pilgrimd255a622017-12-06 18:46:06 +0000788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
Craig Topper43973152016-10-09 06:41:47 +0000790}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000791
792//===----------------------------------------------------------------------===//
793// AVX-512 VECTOR EXTRACT
794//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000795
Craig Topper3a622a12017-08-17 15:40:25 +0000796// Supports two different pattern operators for mask and unmasked ops. Allows
797// null_frag to be passed for one.
798multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000801 SDPatternOperator vextract_for_mask,
802 OpndItins itins> {
Igor Breger7f69a992015-09-10 12:54:54 +0000803
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000806 (ins From.RC:$src1, u8imm:$idx),
Igor Breger7f69a992015-09-10 12:54:54 +0000807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
Craig Topper3a622a12017-08-17 15:40:25 +0000809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
Craig Toppere1cac152016-06-07 07:27:54 +0000813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
Ayman Musaf77219e2017-02-13 09:55:48 +0000814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000821
Craig Toppere1cac152016-06-07 07:27:54 +0000822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
Ayman Musaf77219e2017-02-13 09:55:48 +0000825 From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000831 }
Igor Bregerac29a822015-09-09 14:35:09 +0000832}
833
Craig Topper3a622a12017-08-17 15:40:25 +0000834// Passes the same pattern operator for masked and unmasked ops.
835multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000837 SDPatternOperator vextract_extract,
838 OpndItins itins> :
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000840
Igor Bregerdefab3c2015-10-08 12:55:01 +0000841// Codegen pattern for the alternative types
842multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
Craig Topper5f3fef82016-05-22 07:40:58 +0000844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
Craig Topperdb960ed2016-05-21 22:50:14 +0000845 let Predicates = p in {
Igor Bregerdefab3c2015-10-08 12:55:01 +0000846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
848 From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
Craig Topperdb960ed2016-05-21 22:50:14 +0000850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854 }
Igor Breger7f69a992015-09-10 12:54:54 +0000855}
856
857multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000858 ValueType EltVT64, int Opcode256,
859 OpndItins itins> {
Craig Topperaadec702017-08-14 01:53:10 +0000860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000864 vextract128_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000869 vextract256_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871 }
Igor Breger7f69a992015-09-10 12:54:54 +0000872 let Predicates = [HasVLX] in
Igor Bregerdefab3c2015-10-08 12:55:01 +0000873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000876 vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Craig Topper3a622a12017-08-17 15:40:25 +0000878
879 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000880 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000884 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000886
887 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000888 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000892 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
Igor Breger7f69a992015-09-10 12:54:54 +0000895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000897 null_frag, vextract256_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
899 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000900}
901
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000902// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903let Sched = WriteFShuffle256 in
904def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
906>;
907let Sched = WriteShuffle256 in
908def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910>;
911
912defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000914
Igor Bregerdefab3c2015-10-08 12:55:01 +0000915// extract_subvector codegen patterns with the alternative types.
Craig Topper3a622a12017-08-17 15:40:25 +0000916// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Bregerdefab3c2015-10-08 12:55:01 +0000917defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000919defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000921
922defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000924defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000926
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000931
Craig Topper08a68572016-05-21 22:50:04 +0000932// Codegen pattern with the alternative types extract VEC128 from VEC256
Craig Topper02626c02016-05-21 07:08:56 +0000933defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938// Codegen pattern with the alternative types extract VEC128 from VEC512
Igor Bregerdefab3c2015-10-08 12:55:01 +0000939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943// Codegen pattern with the alternative types extract VEC256 from VEC512
944defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
Craig Topper5f3fef82016-05-22 07:40:58 +0000949
Craig Topper48a79172017-08-30 07:26:12 +0000950// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951// smaller extract to enable EVEX->VEX.
952let Predicates = [NoVLX] in {
953def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956 (iPTR 1)))>;
957def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960 (iPTR 1)))>;
961def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964 (iPTR 1)))>;
965def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968 (iPTR 1)))>;
969def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972 (iPTR 1)))>;
973def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 (iPTR 1)))>;
977}
978
979// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980// smaller extract to enable EVEX->VEX.
981let Predicates = [HasVLX] in {
982def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985 (iPTR 1)))>;
986def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989 (iPTR 1)))>;
990def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993 (iPTR 1)))>;
994def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997 (iPTR 1)))>;
998def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001 (iPTR 1)))>;
1002def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005 (iPTR 1)))>;
1006}
1007
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001008
Craig Toppera0883622017-08-26 22:24:57 +00001009// Additional patterns for handling a bitcast between the vselect and the
1010// extract_subvector.
1011multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018 (bitconvert
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1021 To.RC:$src0)),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027 (bitconvert
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034}
1035}
1036
1037defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115// vextractps - extract 32 bits from XMM
Craig Topper03b849e2016-05-21 22:50:11 +00001116def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Craig Topperfc946a02015-01-25 02:21:13 +00001117 (ins VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimd255a622017-12-06 18:46:06 +00001119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001121
Craig Topper03b849e2016-05-21 22:50:11 +00001122def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Craig Topperfc946a02015-01-25 02:21:13 +00001123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00001126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001128
1129//===---------------------------------------------------------------------===//
1130// AVX-512 BROADCAST
1131//---
Igor Breger131008f2016-05-01 08:40:00 +00001132// broadcast with a scalar argument.
1133multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
Craig Topperf6df4a62017-01-30 06:59:06 +00001135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
Igor Breger131008f2016-05-01 08:40:00 +00001149}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001150
Craig Topper17854ec2017-08-30 07:48:39 +00001151// Split version to allow mask and broadcast node to be different types. This
1152// helps support the 32x2 broadcasts.
1153multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001154 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
Igor Breger21296d22015-10-20 11:56:42 +00001162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001163 (MaskInfo.VT
1164 (bitconvert
1165 (DestInfo.VT
Craig Topperbf0de9d2017-10-13 06:07:10 +00001166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167 (MaskInfo.VT
1168 (bitconvert
1169 (DestInfo.VT
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
Craig Topperbf0de9d2017-10-13 06:07:10 +00001172 let mayLoad = 1 in
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
Igor Breger52bd1d52016-05-31 07:43:39 +00001175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001176 (MaskInfo.VT
1177 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1180 (MaskInfo.VT
1181 (bitconvert
Craig Topper17854ec2017-08-30 07:48:39 +00001182 (DestInfo.VT (X86VBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185 Sched<[SchedRM]>;
Craig Topper80934372016-07-16 03:42:59 +00001186 }
Craig Toppere1cac152016-06-07 07:27:54 +00001187
Craig Topper17854ec2017-08-30 07:48:39 +00001188 def : Pat<(MaskInfo.VT
1189 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001190 (DestInfo.VT (UnmaskedOp
Craig Topper17854ec2017-08-30 07:48:39 +00001191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195 (bitconvert
1196 (DestInfo.VT
1197 (X86VBroadcast
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
Craig Topper80934372016-07-16 03:42:59 +00001201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
Craig Topper17854ec2017-08-30 07:48:39 +00001202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204 (bitconvert
1205 (DestInfo.VT
1206 (X86VBroadcast
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001213
Craig Topper17854ec2017-08-30 07:48:39 +00001214// Helper class to force mask and broadcast result to same type.
1215multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001216 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
Craig Topper17854ec2017-08-30 07:48:39 +00001221
Craig Topper80934372016-07-16 03:42:59 +00001222multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
Igor Breger21296d22015-10-20 11:56:42 +00001223 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001224 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001228 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001229 }
Robert Khasanovaf318f72014-10-30 14:21:47 +00001230
1231 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001232 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1233 WriteFShuffle256Ld, _.info256, _.info128>,
Igor Breger131008f2016-05-01 08:40:00 +00001234 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001235 EVEX_V256;
Robert Khasanovaf318f72014-10-30 14:21:47 +00001236 }
1237}
1238
Craig Topper80934372016-07-16 03:42:59 +00001239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001241 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001244 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1245 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001246 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001247
Craig Topper80934372016-07-16 03:42:59 +00001248 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001249 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1250 WriteFShuffle256Ld, _.info256, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001251 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1252 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001253 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1254 WriteFShuffle256Ld, _.info128, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001255 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1256 EVEX_V128;
1257 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001258}
Craig Topper80934372016-07-16 03:42:59 +00001259defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1260 avx512vl_f32_info>;
1261defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1262 avx512vl_f64_info>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001263
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001264def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001265 (VBROADCASTSSZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001266def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001267 (VBROADCASTSDZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001268
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001269multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001271 RegisterClass SrcRC> {
Craig Topperfe259882017-02-26 06:45:51 +00001272 let ExeDomain = _.ExeDomain in
Igor Breger0aeda372016-02-07 08:30:50 +00001273 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001274 (ins SrcRC:$src),
1275 "vpbroadcast"##_.Suffix, "$src", "$src",
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001276 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1277 Sched<[SchedRR]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001278}
1279
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001280multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
Guy Blank7f60c992017-08-09 17:21:01 +00001281 X86VectorVTInfo _, SDPatternOperator OpNode,
1282 RegisterClass SrcRC, SubRegIndex Subreg> {
Craig Topper508aa972017-08-14 05:09:34 +00001283 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
Guy Blank7f60c992017-08-09 17:21:01 +00001284 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1285 (outs _.RC:$dst), (ins GR32:$src),
1286 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1287 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1288 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +00001289 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
Guy Blank7f60c992017-08-09 17:21:01 +00001290
1291 def : Pat <(_.VT (OpNode SrcRC:$src)),
1292 (!cast<Instruction>(Name#r)
1293 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1294
1295 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1296 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1297 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1298
1299 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1300 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1301 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302}
1303
1304multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1305 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1306 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1307 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001308 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1309 OpNode, SrcRC, Subreg>, EVEX_V512;
Guy Blank7f60c992017-08-09 17:21:01 +00001310 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001311 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1312 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1313 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1314 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
Guy Blank7f60c992017-08-09 17:21:01 +00001315 }
1316}
1317
Robert Khasanovcbc57032014-12-09 16:38:41 +00001318multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
Craig Topper49ba3f52017-02-26 06:45:48 +00001319 SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001320 RegisterClass SrcRC, Predicate prd> {
1321 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001322 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1323 SrcRC>, EVEX_V512;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001324 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001325 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1326 SrcRC>, EVEX_V256;
1327 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1328 SrcRC>, EVEX_V128;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001329 }
1330}
1331
Guy Blank7f60c992017-08-09 17:21:01 +00001332defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1333 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1334defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1335 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1336 HasBWI>;
Craig Topper49ba3f52017-02-26 06:45:48 +00001337defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1338 X86VBroadcast, GR32, HasAVX512>;
1339defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1340 X86VBroadcast, GR64, HasAVX512>, VEX_W;
Michael Liao5bf95782014-12-04 05:20:33 +00001341
Igor Breger21296d22015-10-20 11:56:42 +00001342// Provide aliases for broadcast from the same register class that
1343// automatically does the extract.
1344multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1345 X86VectorVTInfo SrcInfo> {
1346 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1347 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1348 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1349}
1350
1351multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1352 AVX512VLVectorVTInfo _, Predicate prd> {
1353 let Predicates = [prd] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001354 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1355 WriteShuffle256Ld, _.info512, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001356 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1357 EVEX_V512;
1358 // Defined separately to avoid redefinition.
1359 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1360 }
1361 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001362 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1363 WriteShuffle256Ld, _.info256, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001364 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1365 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001366 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1367 WriteShuffleLd, _.info128, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001368 EVEX_V128;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00001369 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001370}
1371
Igor Breger21296d22015-10-20 11:56:42 +00001372defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1373 avx512vl_i8_info, HasBWI>;
1374defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1375 avx512vl_i16_info, HasBWI>;
1376defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1377 avx512vl_i32_info, HasAVX512>;
1378defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1379 avx512vl_i64_info, HasAVX512>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001380
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001381multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1382 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001383 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00001384 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1385 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001386 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1387 NoItinerary>, AVX5128IBase, EVEX,
1388 Sched<[WriteShuffleLd]>;
Adam Nemet73f72e12014-06-27 00:43:38 +00001389}
1390
Craig Topperd6f4be92017-08-21 05:29:02 +00001391// This should be used for the AVX512DQ broadcast instructions. It disables
1392// the unmasked patterns so that we only use the DQ instructions when masking
1393// is requested.
1394multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1395 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Craig Topperc228d792017-09-05 05:49:44 +00001396 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperd6f4be92017-08-21 05:29:02 +00001397 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1398 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1399 (null_frag),
1400 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001401 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1402 NoItinerary>, AVX5128IBase, EVEX,
1403 Sched<[WriteShuffleLd]>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001404}
1405
Simon Pilgrim79195582017-02-21 16:41:44 +00001406let Predicates = [HasAVX512] in {
1407 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1408 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1409 (VPBROADCASTQZm addr:$src)>;
1410}
1411
Craig Topperad3d0312017-10-10 21:07:14 +00001412let Predicates = [HasVLX] in {
Simon Pilgrim79195582017-02-21 16:41:44 +00001413 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1414 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1415 (VPBROADCASTQZ128m addr:$src)>;
1416 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1417 (VPBROADCASTQZ256m addr:$src)>;
Craig Topperad3d0312017-10-10 21:07:14 +00001418}
1419let Predicates = [HasVLX, HasBWI] in {
Craig Topperbe351ee2016-10-01 06:01:23 +00001420 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1421 // This means we'll encounter truncated i32 loads; match that here.
1422 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1423 (VPBROADCASTWZ128m addr:$src)>;
1424 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1425 (VPBROADCASTWZ256m addr:$src)>;
1426 def : Pat<(v8i16 (X86VBroadcast
1427 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1428 (VPBROADCASTWZ128m addr:$src)>;
1429 def : Pat<(v16i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ256m addr:$src)>;
1432}
1433
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001434//===----------------------------------------------------------------------===//
1435// AVX-512 BROADCAST SUBVECTORS
1436//
1437
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001438defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1439 v16i32_info, v4i32x_info>,
Adam Nemet73f72e12014-06-27 00:43:38 +00001440 EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001441defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1442 v16f32_info, v4f32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1445 v8i64_info, v4i64x_info>, VEX_W,
Adam Nemet73f72e12014-06-27 00:43:38 +00001446 EVEX_V512, EVEX_CD8<64, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001447defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1448 v8f64_info, v4f64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450
Craig Topper715ad7f2016-10-16 23:29:51 +00001451let Predicates = [HasAVX512] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001452def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1453 (VBROADCASTF64X4rm addr:$src)>;
1454def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1455 (VBROADCASTI64X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001456def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1457 (VBROADCASTI64X4rm addr:$src)>;
1458def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1459 (VBROADCASTI64X4rm addr:$src)>;
1460
1461// Provide fallback in case the load node that is used in the patterns above
1462// is used by additional users, which prevents the pattern selection.
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001463def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1464 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001465 (v4f64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001466def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v8f32 VR256X:$src), 1)>;
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001469def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001471 (v4i64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001472def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v8i32 VR256X:$src), 1)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001475def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v16i16 VR256X:$src), 1)>;
1478def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v32i8 VR256X:$src), 1)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001481
Craig Topperd6f4be92017-08-21 05:29:02 +00001482def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1483 (VBROADCASTF32X4rm addr:$src)>;
1484def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1485 (VBROADCASTI32X4rm addr:$src)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001486def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1487 (VBROADCASTI32X4rm addr:$src)>;
1488def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1489 (VBROADCASTI32X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001490}
1491
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001492let Predicates = [HasVLX] in {
1493defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1494 v8i32x_info, v4i32x_info>,
1495 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1496defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1497 v8f32x_info, v4f32x_info>,
1498 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001499
Craig Topperd6f4be92017-08-21 05:29:02 +00001500def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1501 (VBROADCASTF32X4Z256rm addr:$src)>;
1502def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1503 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001504def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1505 (VBROADCASTI32X4Z256rm addr:$src)>;
1506def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1507 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001508
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001509// Provide fallback in case the load node that is used in the patterns above
1510// is used by additional users, which prevents the pattern selection.
Craig Topperd6f4be92017-08-21 05:29:02 +00001511def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1512 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1513 (v2f64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001514def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001515 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001516 (v4f32 VR128X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001517def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1518 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1519 (v2i64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001520def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001521 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001522 (v4i32 VR128X:$src), 1)>;
1523def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001524 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001525 (v8i16 VR128X:$src), 1)>;
1526def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001527 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001528 (v16i8 VR128X:$src), 1)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001529}
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001530
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001531let Predicates = [HasVLX, HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001532defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001533 v4i64x_info, v2i64x_info>, VEX_W,
1534 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001535defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001536 v4f64x_info, v2f64x_info>, VEX_W,
1537 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001538}
1539
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001540let Predicates = [HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001541defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001542 v8i64_info, v2i64x_info>, VEX_W,
1543 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001544defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001545 v16i32_info, v8i32x_info>,
1546 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001547defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001548 v8f64_info, v2f64x_info>, VEX_W,
1549 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001550defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001551 v16f32_info, v8f32x_info>,
1552 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1553}
Adam Nemet73f72e12014-06-27 00:43:38 +00001554
Igor Bregerfa798a92015-11-02 07:39:36 +00001555multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001556 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001557 let Predicates = [HasDQI] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001558 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1559 WriteShuffle256Ld, _Dst.info512,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001560 _Src.info512, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001561 EVEX_V512;
Igor Bregerfa798a92015-11-02 07:39:36 +00001562 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001563 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1564 WriteShuffle256Ld, _Dst.info256,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001565 _Src.info256, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001566 EVEX_V256;
Igor Bregerfa798a92015-11-02 07:39:36 +00001567}
1568
1569multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001570 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1571 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001572
1573 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001574 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1575 WriteShuffleLd, _Dst.info128,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001576 _Src.info128, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001577 EVEX_V128;
Igor Bregerfa798a92015-11-02 07:39:36 +00001578}
1579
Craig Topper51e052f2016-10-15 16:26:02 +00001580defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1581 avx512vl_i32_info, avx512vl_i64_info>;
1582defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1583 avx512vl_f32_info, avx512vl_f64_info>;
Igor Bregerfa798a92015-11-02 07:39:36 +00001584
Craig Topper52317e82017-01-15 05:47:45 +00001585let Predicates = [HasVLX] in {
1586def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1587 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1588def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1589 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1590}
1591
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001592def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001593 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001594def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1595 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1596
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001597def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001598 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001599def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1600 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001601
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001602//===----------------------------------------------------------------------===//
1603// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1604//---
Asaf Badouh0d957b82015-11-18 09:42:45 +00001605multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1606 X86VectorVTInfo _, RegisterClass KRC> {
1607 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00001608 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001609 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1610 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001611}
1612
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001613multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
Asaf Badouh0d957b82015-11-18 09:42:45 +00001614 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1615 let Predicates = [HasCDI] in
1616 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1617 let Predicates = [HasCDI, HasVLX] in {
1618 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1619 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1620 }
1621}
1622
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001623defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001624 avx512vl_i32_info, VK16>;
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001625defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001626 avx512vl_i64_info, VK8>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001627
1628//===----------------------------------------------------------------------===//
Craig Topperaad5f112015-11-30 00:13:24 +00001629// -- VPERMI2 - 3 source operands form --
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001630
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001631let Sched = WriteFShuffle256 in
1632def AVX512_PERM2_F : OpndItins<
1633 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1634>;
1635
1636let Sched = WriteShuffle256 in
1637def AVX512_PERM2_I : OpndItins<
1638 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1639>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001640
1641multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1642 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001643let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Craig Topper4fa3b502016-09-06 06:56:59 +00001644 // The index operand in the pattern should really be an integer type. However,
1645 // if we do that and it happens to come from a bitcast, then it becomes
1646 // difficult to find the bitcast needed to convert the index to the
1647 // destination type for the passthru since it will be folded with the bitcast
1648 // of the index operand.
1649 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001650 (ins _.RC:$src2, _.RC:$src3),
1651 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001652 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001653 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001654
Craig Topper4fa3b502016-09-06 06:56:59 +00001655 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001656 (ins _.RC:$src2, _.MemOp:$src3),
1657 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Topper4fa3b502016-09-06 06:56:59 +00001658 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001659 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1660 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001661 }
1662}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001663
1664multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001665 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001666 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Craig Topper4fa3b502016-09-06 06:56:59 +00001667 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001668 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1669 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1670 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper4fa3b502016-09-06 06:56:59 +00001671 (_.VT (X86VPermi2X _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001672 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001673 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1674 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemetefe9c982014-07-02 21:25:58 +00001675}
1676
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001677multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001678 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001679 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1680 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001681 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001682 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1683 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1684 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1685 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001686 }
1687}
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001688
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001689multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001690 OpndItins itins,
1691 AVX512VLVectorVTInfo VTInfo,
1692 Predicate Prd> {
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001693 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001694 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001695 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001696 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1697 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001698 }
1699}
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001700
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001701defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001702 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001703defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001704 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001705defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001706 avx512vl_i16_info, HasBWI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001707 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001708defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001709 avx512vl_i8_info, HasVBMI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001710 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001711defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001712 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001713defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001714 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001715
Craig Topperaad5f112015-11-30 00:13:24 +00001716// VPERMT2
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001717multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001718 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001719let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001720 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1721 (ins IdxVT.RC:$src2, _.RC:$src3),
1722 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001723 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001724 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001725
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001726 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1727 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1728 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Toppera47576f2015-11-26 20:21:29 +00001729 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001730 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1731 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001732 }
1733}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001734multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001735 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001737 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1738 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1739 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1740 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Toppera47576f2015-11-26 20:21:29 +00001741 (_.VT (X86VPermt2 _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001742 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001743 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1744 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001745}
1746
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001747multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001748 AVX512VLVectorVTInfo VTInfo,
1749 AVX512VLVectorVTInfo ShuffleMask> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001750 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001751 ShuffleMask.info512>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001752 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001753 ShuffleMask.info512>, EVEX_V512;
1754 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001755 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001756 ShuffleMask.info128>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001757 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001758 ShuffleMask.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001759 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001760 ShuffleMask.info256>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001761 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001762 ShuffleMask.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001763 }
1764}
1765
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001766multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001767 AVX512VLVectorVTInfo VTInfo,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001768 AVX512VLVectorVTInfo Idx,
1769 Predicate Prd> {
1770 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001771 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Craig Toppera47576f2015-11-26 20:21:29 +00001772 Idx.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001773 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001774 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Craig Toppera47576f2015-11-26 20:21:29 +00001775 Idx.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001776 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001777 Idx.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001778 }
1779}
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001780
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001781defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001782 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001783defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001784 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001785defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001786 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1787 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001788defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001789 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1790 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001791defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001792 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001793defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001794 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +00001795
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001796//===----------------------------------------------------------------------===//
1797// AVX-512 - BLEND using mask
1798//
Simon Pilgrimd4953012017-12-05 21:05:25 +00001799
Simon Pilgrim75673942017-12-06 11:23:13 +00001800let Sched = WriteFVarBlend in
1801def AVX512_BLENDM : OpndItins<
1802 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001803>;
1804
Simon Pilgrim75673942017-12-06 11:23:13 +00001805let Sched = WriteVarBlend in
1806def AVX512_PBLENDM : OpndItins<
1807 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001808>;
1809
1810multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1811 X86VectorVTInfo _> {
Craig Toppera74e3082017-01-07 22:20:34 +00001812 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001813 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1814 (ins _.RC:$src1, _.RC:$src2),
1815 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001816 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001817 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001818 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1819 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001820 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001821 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001822 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001823 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1824 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1825 !strconcat(OpcodeStr,
1826 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001827 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
Craig Toppera74e3082017-01-07 22:20:34 +00001828 let mayLoad = 1 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001829 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1830 (ins _.RC:$src1, _.MemOp:$src2),
1831 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001832 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001833 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1834 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001835 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1836 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001837 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001838 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001839 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1840 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001841 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1842 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1843 !strconcat(OpcodeStr,
1844 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001845 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1846 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001847 }
Craig Toppera74e3082017-01-07 22:20:34 +00001848 }
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001849}
Simon Pilgrimd4953012017-12-05 21:05:25 +00001850multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1851 X86VectorVTInfo _> {
Craig Topper81f20aa2017-01-07 22:20:26 +00001852 let mayLoad = 1, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001853 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1854 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1855 !strconcat(OpcodeStr,
1856 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1857 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001858 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1859 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001860
1861 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1862 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1863 !strconcat(OpcodeStr,
1864 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1865 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001866 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1867 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper81f20aa2017-01-07 22:20:26 +00001868 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001869}
1870
Simon Pilgrimd4953012017-12-05 21:05:25 +00001871multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001872 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001873 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1874 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001875
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001876 let Predicates = [HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001877 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1878 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1879 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1880 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001881 }
1882}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001883
Simon Pilgrimd4953012017-12-05 21:05:25 +00001884multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001885 AVX512VLVectorVTInfo VTInfo> {
1886 let Predicates = [HasBWI] in
Simon Pilgrimd4953012017-12-05 21:05:25 +00001887 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001888
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001889 let Predicates = [HasBWI, HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001890 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1891 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001892 }
1893}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001894
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001895
Simon Pilgrimd4953012017-12-05 21:05:25 +00001896defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
1897defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
1898defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
1899defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
1900defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
1901defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001902
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001903
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001904//===----------------------------------------------------------------------===//
1905// Compare Instructions
1906//===----------------------------------------------------------------------===//
1907
1908// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001909
Simon Pilgrim71660c62017-12-05 14:34:42 +00001910multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
1911 OpndItins itins> {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001912 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1913 (outs _.KRC:$dst),
1914 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1915 "vcmp${cc}"#_.Suffix,
1916 "$src2, $src1", "$src1, $src2",
1917 (OpNode (_.VT _.RC:$src1),
1918 (_.VT _.RC:$src2),
Simon Pilgrim71660c62017-12-05 14:34:42 +00001919 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00001920 let mayLoad = 1 in
Craig Toppere1cac152016-06-07 07:27:54 +00001921 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1922 (outs _.KRC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00001923 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
Craig Toppere1cac152016-06-07 07:27:54 +00001924 "vcmp${cc}"#_.Suffix,
1925 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00001926 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001927 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1928 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001929
1930 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1931 (outs _.KRC:$dst),
1932 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1933 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00001934 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001935 (OpNodeRnd (_.VT _.RC:$src1),
1936 (_.VT _.RC:$src2),
1937 imm:$cc,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001938 (i32 FROUND_NO_EXC)), itins.rr>,
1939 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001940 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001941 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001942 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1943 (outs VK1:$dst),
1944 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1945 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001946 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
1947 Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00001948 let mayLoad = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001949 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1950 (outs _.KRC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00001951 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001952 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001953 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
1954 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1955 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001956
1957 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1958 (outs _.KRC:$dst),
1959 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1960 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001961 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
1962 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001963 }// let isAsmParserOnly = 1, hasSideEffects = 0
1964
1965 let isCodeGenOnly = 1 in {
Craig Topper225da2c2016-08-27 05:22:15 +00001966 let isCommutable = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001967 def rr : AVX512Ii8<0xC2, MRMSrcReg,
1968 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1969 !strconcat("vcmp${cc}", _.Suffix,
1970 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1971 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1972 _.FRC:$src2,
1973 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00001974 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00001975 def rm : AVX512Ii8<0xC2, MRMSrcMem,
1976 (outs _.KRC:$dst),
1977 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1978 !strconcat("vcmp${cc}", _.Suffix,
1979 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1980 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1981 (_.ScalarLdFrag addr:$src2),
1982 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00001983 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1984 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001985 }
1986}
1987
1988let Predicates = [HasAVX512] in {
Craig Topperd890db62017-02-21 04:26:04 +00001989 let ExeDomain = SSEPackedSingle in
Simon Pilgrim71660c62017-12-05 14:34:42 +00001990 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
1991 SSE_ALU_F32S>, AVX512XSIi8Base;
Craig Topperd890db62017-02-21 04:26:04 +00001992 let ExeDomain = SSEPackedDouble in
Simon Pilgrim71660c62017-12-05 14:34:42 +00001993 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
1994 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001995}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001996
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001997multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00001998 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
Craig Topper392cd032016-09-03 16:28:03 +00001999 let isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002000 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002001 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2002 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2003 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002004 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002005 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002006 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2007 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2008 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2009 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002010 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1d81032017-06-13 07:13:47 +00002011 let isCommutable = IsCommutable in
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002012 def rrk : AVX512BI<opc, MRMSrcReg,
2013 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2014 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2015 "$dst {${mask}}, $src1, $src2}"),
2016 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2017 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002018 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002019 def rmk : AVX512BI<opc, MRMSrcMem,
2020 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2022 "$dst {${mask}}, $src1, $src2}"),
2023 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2024 (OpNode (_.VT _.RC:$src1),
2025 (_.VT (bitconvert
2026 (_.LdFrag addr:$src2))))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002027 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002028}
2029
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002030multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002031 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2032 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002033 def rmb : AVX512BI<opc, MRMSrcMem,
2034 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2035 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2036 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2037 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2038 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002039 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002040 def rmbk : AVX512BI<opc, MRMSrcMem,
2041 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2042 _.ScalarMemOp:$src2),
2043 !strconcat(OpcodeStr,
2044 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2045 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2046 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2047 (OpNode (_.VT _.RC:$src1),
2048 (X86VBroadcast
2049 (_.ScalarLdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002050 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2051 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002052}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002053
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002054multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002055 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2056 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002057 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002058 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002059 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002060
2061 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002062 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002063 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002064 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002065 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002066 }
2067}
2068
2069multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002070 SDNode OpNode, OpndItins itins,
2071 AVX512VLVectorVTInfo VTInfo,
2072 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002073 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002074 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002075 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002076
2077 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002078 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002079 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002080 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002081 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002082 }
2083}
2084
Simon Pilgrima2b58622017-12-05 12:02:22 +00002085// FIXME: Is there a better scheduler itinerary for VPCMP?
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002086defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002087 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002088 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002089
2090defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002091 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002092 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002093
Robert Khasanovf70f7982014-09-18 14:06:55 +00002094defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002095 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002096 EVEX_CD8<32, CD8VF>;
2097
Robert Khasanovf70f7982014-09-18 14:06:55 +00002098defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002099 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002100 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2101
2102defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002103 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002104 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002105
2106defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002107 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002108 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002109
Robert Khasanovf70f7982014-09-18 14:06:55 +00002110defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002111 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002112 EVEX_CD8<32, CD8VF>;
2113
Robert Khasanovf70f7982014-09-18 14:06:55 +00002114defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002115 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002116 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002117
Craig Toppera88306e2017-10-10 06:36:46 +00002118// Transforms to swizzle an immediate to help matching memory operand in first
2119// operand.
2120def CommutePCMPCC : SDNodeXForm<imm, [{
2121 uint8_t Imm = N->getZExtValue() & 0x7;
2122 switch (Imm) {
2123 default: llvm_unreachable("Unreachable!");
2124 case 0x01: Imm = 0x06; break; // LT -> NLE
2125 case 0x02: Imm = 0x05; break; // LE -> NLT
2126 case 0x05: Imm = 0x02; break; // NLT -> LE
2127 case 0x06: Imm = 0x01; break; // NLE -> LT
2128 case 0x00: // EQ
2129 case 0x03: // FALSE
2130 case 0x04: // NE
2131 case 0x07: // TRUE
2132 break;
2133 }
2134 return getI8Imm(Imm, SDLoc(N));
2135}]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002136
Robert Khasanov29e3b962014-08-27 09:34:37 +00002137multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002138 OpndItins itins, X86VectorVTInfo _> {
Craig Topper149e6bd2016-09-09 01:36:10 +00002139 let isCommutable = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002140 def rri : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002141 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002142 !strconcat("vpcmp${cc}", Suffix,
2143 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002144 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2145 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002146 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002147 def rmi : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002148 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002149 !strconcat("vpcmp${cc}", Suffix,
2150 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002151 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2152 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002153 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002154 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper8b876762017-06-13 07:13:50 +00002155 let isCommutable = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002156 def rrik : AVX512AIi8<opc, MRMSrcReg,
2157 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002158 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002159 !strconcat("vpcmp${cc}", Suffix,
2160 "\t{$src2, $src1, $dst {${mask}}|",
2161 "$dst {${mask}}, $src1, $src2}"),
2162 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2163 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Craig Topper6e3a5822014-12-27 20:08:45 +00002164 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002165 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002166 def rmik : AVX512AIi8<opc, MRMSrcMem,
2167 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002168 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002169 !strconcat("vpcmp${cc}", Suffix,
2170 "\t{$src2, $src1, $dst {${mask}}|",
2171 "$dst {${mask}}, $src1, $src2}"),
2172 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2173 (OpNode (_.VT _.RC:$src1),
2174 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002175 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002176 itins.rm>, EVEX_4V, EVEX_K,
2177 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002178
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002179 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002180 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002181 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002182 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002183 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2184 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002185 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002186 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002187 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002188 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002189 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2190 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002191 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002192 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2193 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002194 u8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00002195 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002196 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2197 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002198 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002199 let mayLoad = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002200 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2201 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002202 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002203 !strconcat("vpcmp", Suffix,
2204 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2205 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002206 [], itins.rm>, EVEX_4V, EVEX_K,
2207 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002208 }
Craig Toppera88306e2017-10-10 06:36:46 +00002209
2210 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2211 (_.VT _.RC:$src1), imm:$cc),
2212 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2213 (CommutePCMPCC imm:$cc))>;
2214
2215 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2216 (_.VT _.RC:$src1), imm:$cc)),
2217 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2218 _.RC:$src1, addr:$src2,
2219 (CommutePCMPCC imm:$cc))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002220}
2221
Robert Khasanov29e3b962014-08-27 09:34:37 +00002222multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002223 OpndItins itins, X86VectorVTInfo _> :
2224 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002225 def rmib : AVX512AIi8<opc, MRMSrcMem,
2226 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002227 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002228 !strconcat("vpcmp${cc}", Suffix,
2229 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2230 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2231 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2232 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002233 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002234 itins.rm>, EVEX_4V, EVEX_B,
2235 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002236 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2237 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002238 _.ScalarMemOp:$src2, AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002239 !strconcat("vpcmp${cc}", Suffix,
2240 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2241 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2242 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2243 (OpNode (_.VT _.RC:$src1),
2244 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002245 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002246 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2247 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002248
Robert Khasanov29e3b962014-08-27 09:34:37 +00002249 // Accept explicit immediate argument form instead of comparison code.
Craig Topper9f4d4852015-01-20 12:15:30 +00002250 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002251 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2252 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002253 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002254 !strconcat("vpcmp", Suffix,
2255 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2256 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002257 [], itins.rm>, EVEX_4V, EVEX_B,
2258 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002259 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2260 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002261 _.ScalarMemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002262 !strconcat("vpcmp", Suffix,
2263 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2264 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002265 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2266 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002267 }
Craig Toppera88306e2017-10-10 06:36:46 +00002268
2269 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2270 (_.VT _.RC:$src1), imm:$cc),
2271 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2272 (CommutePCMPCC imm:$cc))>;
2273
2274 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2275 (_.ScalarLdFrag addr:$src2)),
2276 (_.VT _.RC:$src1), imm:$cc)),
2277 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2278 _.RC:$src1, addr:$src2,
2279 (CommutePCMPCC imm:$cc))>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002280}
2281
2282multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002283 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2284 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002285 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002286 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2287 EVEX_V512;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002288
2289 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002290 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2291 EVEX_V256;
2292 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2293 EVEX_V128;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002294 }
2295}
2296
2297multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002298 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2299 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002300 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002301 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002302 EVEX_V512;
2303
2304 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002305 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002306 EVEX_V256;
Simon Pilgrimaa911552017-12-05 12:14:36 +00002307 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002308 EVEX_V128;
2309 }
2310}
2311
Simon Pilgrimaa911552017-12-05 12:14:36 +00002312// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2313defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2314 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2315defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2316 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002317
Simon Pilgrimaa911552017-12-05 12:14:36 +00002318defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2319 avx512vl_i16_info, HasBWI>,
2320 VEX_W, EVEX_CD8<16, CD8VF>;
2321defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2322 avx512vl_i16_info, HasBWI>,
2323 VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002324
Simon Pilgrimaa911552017-12-05 12:14:36 +00002325defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2326 avx512vl_i32_info, HasAVX512>,
2327 EVEX_CD8<32, CD8VF>;
2328defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2329 avx512vl_i32_info, HasAVX512>,
2330 EVEX_CD8<32, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002331
Simon Pilgrimaa911552017-12-05 12:14:36 +00002332defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2333 avx512vl_i64_info, HasAVX512>,
2334 VEX_W, EVEX_CD8<64, CD8VF>;
2335defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2336 avx512vl_i64_info, HasAVX512>,
2337 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002338
Ayman Musa721d97f2017-06-27 12:08:37 +00002339
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002340multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002341 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2342 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2343 "vcmp${cc}"#_.Suffix,
2344 "$src2, $src1", "$src1, $src2",
2345 (X86cmpm (_.VT _.RC:$src1),
2346 (_.VT _.RC:$src2),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002347 imm:$cc), itins.rr, 1>,
2348 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002349
Craig Toppere1cac152016-06-07 07:27:54 +00002350 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2351 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2352 "vcmp${cc}"#_.Suffix,
2353 "$src2, $src1", "$src1, $src2",
2354 (X86cmpm (_.VT _.RC:$src1),
2355 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002356 imm:$cc), itins.rm>,
2357 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002358
Craig Toppere1cac152016-06-07 07:27:54 +00002359 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2360 (outs _.KRC:$dst),
2361 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2362 "vcmp${cc}"#_.Suffix,
2363 "${src2}"##_.BroadcastStr##", $src1",
2364 "$src1, ${src2}"##_.BroadcastStr,
2365 (X86cmpm (_.VT _.RC:$src1),
2366 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002367 imm:$cc), itins.rm>,
2368 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002369 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002370 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002371 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2372 (outs _.KRC:$dst),
2373 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2374 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002375 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2376 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002377
2378 let mayLoad = 1 in {
2379 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2380 (outs _.KRC:$dst),
2381 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2382 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002383 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2384 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002385
2386 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2387 (outs _.KRC:$dst),
2388 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2389 "vcmp"#_.Suffix,
2390 "$cc, ${src2}"##_.BroadcastStr##", $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002391 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2392 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002393 }
Craig Topper61956982017-09-30 17:02:39 +00002394 }
2395
2396 // Patterns for selecting with loads in other operand.
2397 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2398 CommutableCMPCC:$cc),
2399 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2400 imm:$cc)>;
2401
2402 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2403 (_.VT _.RC:$src1),
2404 CommutableCMPCC:$cc)),
2405 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2406 _.RC:$src1, addr:$src2,
2407 imm:$cc)>;
2408
2409 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2410 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2411 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2412 imm:$cc)>;
2413
2414 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2415 (_.ScalarLdFrag addr:$src2)),
2416 (_.VT _.RC:$src1),
2417 CommutableCMPCC:$cc)),
2418 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2419 _.RC:$src1, addr:$src2,
2420 imm:$cc)>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002421}
2422
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002423multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002424 // comparison code form (VCMP[EQ/LT/LE/...]
2425 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2426 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2427 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002428 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002429 (X86cmpmRnd (_.VT _.RC:$src1),
2430 (_.VT _.RC:$src2),
2431 imm:$cc,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002432 (i32 FROUND_NO_EXC)), itins.rr>,
2433 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002434
2435 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2436 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2437 (outs _.KRC:$dst),
2438 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2439 "vcmp"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002440 "$cc, {sae}, $src2, $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002441 "$src1, $src2, {sae}, $cc", itins.rr>,
2442 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002443 }
2444}
2445
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002446multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002447 let Predicates = [HasAVX512] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002448 defm Z : avx512_vcmp_common<itins, _.info512>,
2449 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002450
2451 }
2452 let Predicates = [HasAVX512,HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002453 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2454 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002455 }
2456}
2457
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002458defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002459 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002460defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002461 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002462
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00002463
Craig Topper61956982017-09-30 17:02:39 +00002464// Patterns to select fp compares with load as first operand.
2465let Predicates = [HasAVX512] in {
2466 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2467 CommutableCMPCC:$cc)),
2468 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2469
2470 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2471 CommutableCMPCC:$cc)),
2472 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2473}
2474
Asaf Badouh572bbce2015-09-20 08:46:07 +00002475// ----------------------------------------------------------------
2476// FPClass
Asaf Badouh696e8e02015-10-18 11:04:38 +00002477//handle fpclass instruction mask = op(reg_scalar,imm)
2478// op(mem_scalar,imm)
2479multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002480 OpndItins itins, X86VectorVTInfo _,
2481 Predicate prd> {
Craig Topper4a638432017-11-11 06:57:44 +00002482 let Predicates = [prd], ExeDomain = _.ExeDomain in {
Craig Topper702097d2017-08-20 18:30:24 +00002483 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
Asaf Badouh696e8e02015-10-18 11:04:38 +00002484 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002485 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh696e8e02015-10-18 11:04:38 +00002486 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002487 (i32 imm:$src2)))], itins.rr>,
2488 Sched<[itins.Sched]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002489 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2490 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2491 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002492 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002493 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002494 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002495 (i32 imm:$src2))))], itins.rr>,
2496 EVEX_K, Sched<[itins.Sched]>;
Craig Topper63801df2017-02-19 21:44:35 +00002497 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002498 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002499 OpcodeStr##_.Suffix##
2500 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2501 [(set _.KRC:$dst,
Craig Topperca8abed2017-11-13 06:46:48 +00002502 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002503 (i32 imm:$src2)))], itins.rm>,
2504 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper63801df2017-02-19 21:44:35 +00002505 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002506 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002507 OpcodeStr##_.Suffix##
2508 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2509 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Craig Topperca8abed2017-11-13 06:46:48 +00002510 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002511 (i32 imm:$src2))))], itins.rm>,
2512 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002513 }
2514}
2515
Asaf Badouh572bbce2015-09-20 08:46:07 +00002516//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2517// fpclass(reg_vec, mem_vec, imm)
2518// fpclass(reg_vec, broadcast(eltVt), imm)
2519multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002520 OpndItins itins, X86VectorVTInfo _,
2521 string mem, string broadcast>{
Craig Topper4a638432017-11-11 06:57:44 +00002522 let ExeDomain = _.ExeDomain in {
Asaf Badouh572bbce2015-09-20 08:46:07 +00002523 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2524 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002525 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh572bbce2015-09-20 08:46:07 +00002526 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002527 (i32 imm:$src2)))], itins.rr>,
2528 Sched<[itins.Sched]>;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002529 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2530 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2531 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002532 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002533 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Asaf Badouh572bbce2015-09-20 08:46:07 +00002534 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002535 (i32 imm:$src2))))], itins.rr>,
2536 EVEX_K, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002537 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2538 (ins _.MemOp:$src1, i32u8imm:$src2),
2539 OpcodeStr##_.Suffix##mem#
2540 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002541 [(set _.KRC:$dst,(OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002542 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002543 (i32 imm:$src2)))], itins.rm>,
2544 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002545 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2546 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2547 OpcodeStr##_.Suffix##mem#
2548 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002549 [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002550 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002551 (i32 imm:$src2))))], itins.rm>,
2552 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002553 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2554 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2555 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2556 _.BroadcastStr##", $dst|$dst, ${src1}"
2557 ##_.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002558 [(set _.KRC:$dst,(OpNode
2559 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002560 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002561 (i32 imm:$src2)))], itins.rm>,
2562 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002563 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2564 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2565 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2566 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2567 _.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002568 [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2569 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002570 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002571 (i32 imm:$src2))))], itins.rm>,
2572 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper4a638432017-11-11 06:57:44 +00002573 }
Asaf Badouh572bbce2015-09-20 08:46:07 +00002574}
2575
Simon Pilgrim54c60832017-12-01 16:51:48 +00002576multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2577 bits<8> opc, SDNode OpNode,
2578 OpndItins itins, Predicate prd,
2579 string broadcast>{
Asaf Badouh572bbce2015-09-20 08:46:07 +00002580 let Predicates = [prd] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002581 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2582 _.info512, "{z}", broadcast>, EVEX_V512;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002583 }
2584 let Predicates = [prd, HasVLX] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002585 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2586 _.info128, "{x}", broadcast>, EVEX_V128;
2587 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2588 _.info256, "{y}", broadcast>, EVEX_V256;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002589 }
2590}
2591
Simon Pilgrim54c60832017-12-01 16:51:48 +00002592// FIXME: Is there a better scheduler itinerary for VFPCLASS?
Asaf Badouh572bbce2015-09-20 08:46:07 +00002593multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002594 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002595 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002596 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2597 EVEX_CD8<32, CD8VF>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002598 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002599 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2600 EVEX_CD8<64, CD8VF> , VEX_W;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002601 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002602 SSE_ALU_F32S, f32x_info, prd>,
2603 EVEX_CD8<32, CD8VT1>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002604 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002605 SSE_ALU_F64S, f64x_info, prd>,
2606 EVEX_CD8<64, CD8VT1>, VEX_W;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002607}
2608
Asaf Badouh696e8e02015-10-18 11:04:38 +00002609defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2610 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002611
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002612//-----------------------------------------------------------------
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002613// Mask register copy, including
2614// - copy between mask registers
2615// - load/store mask registers
2616// - copy from GPR to mask register and vice versa
2617//
2618multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2619 string OpcodeStr, RegisterClass KRC,
Elena Demikhovskyba846722015-02-17 09:20:12 +00002620 ValueType vvt, X86MemOperand x86memop> {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002621 let hasSideEffects = 0, SchedRW = [WriteMove] in
Craig Toppere1cac152016-06-07 07:27:54 +00002622 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002623 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2624 IIC_SSE_MOVDQ>;
Craig Toppere1cac152016-06-07 07:27:54 +00002625 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2626 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002627 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
Craig Toppere1cac152016-06-07 07:27:54 +00002628 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2629 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002630 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002631}
2632
2633multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2634 string OpcodeStr,
2635 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002636 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002637 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002638 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2639 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002640 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002641 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2642 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002643 }
2644}
2645
Robert Khasanov74acbb72014-07-23 14:49:42 +00002646let Predicates = [HasDQI] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002647 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002648 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2649 VEX, PD;
2650
2651let Predicates = [HasAVX512] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002652 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002653 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002654 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002655
2656let Predicates = [HasBWI] in {
Elena Demikhovskyba846722015-02-17 09:20:12 +00002657 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2658 VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002659 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2660 VEX, XD;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002661 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2662 VEX, PS, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002663 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2664 VEX, XD, VEX_W;
2665}
2666
2667// GR from/to mask register
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002668def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002669 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002670def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002671 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002672
2673def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002674 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002675def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002676 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002677
2678def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002679 (KMOVWrk VK16:$src)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002680def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002681 (COPY_TO_REGCLASS VK16:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002682
2683def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002684 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002685def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002686 (COPY_TO_REGCLASS VK8:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002687
2688def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2689 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2690def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2691 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2692def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2693 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2694def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2695 (COPY_TO_REGCLASS VK64:$src, GR64)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002696
Robert Khasanov74acbb72014-07-23 14:49:42 +00002697// Load/store kreg
2698let Predicates = [HasDQI] in {
Elena Demikhovsky9f83c732015-09-02 09:20:58 +00002699 def : Pat<(store VK4:$src, addr:$dst),
2700 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2701 def : Pat<(store VK2:$src, addr:$dst),
2702 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002703 def : Pat<(store VK1:$src, addr:$dst),
2704 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002705
2706 def : Pat<(v2i1 (load addr:$src)),
2707 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2708 def : Pat<(v4i1 (load addr:$src)),
2709 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002710}
2711let Predicates = [HasAVX512, NoDQI] in {
Igor Bregerd6c187b2016-01-27 08:43:25 +00002712 def : Pat<(store VK1:$src, addr:$dst),
2713 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002714 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2715 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002716 def : Pat<(store VK2:$src, addr:$dst),
2717 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002718 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2719 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002720 def : Pat<(store VK4:$src, addr:$dst),
2721 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002722 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2723 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002724 def : Pat<(store VK8:$src, addr:$dst),
2725 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002726 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2727 sub_8bit)))>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002728
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002729 def : Pat<(v8i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002730 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002731 def : Pat<(v2i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002732 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002733 def : Pat<(v4i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002734 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002735}
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002736
Robert Khasanov74acbb72014-07-23 14:49:42 +00002737let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002738 def : Pat<(v1i1 (load addr:$src)),
Craig Toppera362dee2017-12-31 07:38:33 +00002739 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
Craig Topper876ec0b2017-12-31 07:38:41 +00002740 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2741 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002742}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00002743
Robert Khasanov74acbb72014-07-23 14:49:42 +00002744let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002745 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2746 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2747 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002748
Craig Topperee1e71e2017-12-17 01:35:48 +00002749 def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
Guy Blank548e22a2017-05-19 12:35:15 +00002750 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002751
Guy Blank548e22a2017-05-19 12:35:15 +00002752 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2753 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
Guy Blank548e22a2017-05-19 12:35:15 +00002754 }
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002755
Guy Blank548e22a2017-05-19 12:35:15 +00002756 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2757 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2758 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2759 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2760 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2761 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2762 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
Elena Demikhovskyb906df92016-09-13 07:57:00 +00002763
Craig Topper26a701f2018-01-23 05:36:53 +00002764 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2765 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
Guy Blank548e22a2017-05-19 12:35:15 +00002766 (COPY_TO_REGCLASS
Craig Topper26a701f2018-01-23 05:36:53 +00002767 (KMOVWkr (AND32ri8
2768 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2769 (i32 1))), VK16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002770}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002771
2772// Mask unary operation
2773// - KNOT
2774multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002775 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002776 OpndItins itins, Predicate prd> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002777 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002778 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00002779 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002780 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2781 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002782}
2783
Robert Khasanov74acbb72014-07-23 14:49:42 +00002784multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002785 SDPatternOperator OpNode, OpndItins itins> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002786 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002787 itins, HasDQI>, VEX, PD;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002788 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002789 itins, HasAVX512>, VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002790 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002791 itins, HasBWI>, VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002792 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002793 itins, HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002794}
2795
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002796defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002797
Robert Khasanov74acbb72014-07-23 14:49:42 +00002798// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
Craig Topper7b9cc142016-11-03 06:04:28 +00002799let Predicates = [HasAVX512, NoDQI] in
2800def : Pat<(vnot VK8:$src),
2801 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2802
2803def : Pat<(vnot VK4:$src),
2804 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2805def : Pat<(vnot VK2:$src),
2806 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002807
2808// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00002809// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002810multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00002811 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002812 OpndItins itins, Predicate prd, bit IsCommutable> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002813 let Predicates = [prd], isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002814 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2815 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002816 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002817 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2818 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002819}
2820
Robert Khasanov595683d2014-07-28 13:46:45 +00002821multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002822 SDPatternOperator OpNode, OpndItins itins,
2823 bit IsCommutable, Predicate prdW = HasAVX512> {
Robert Khasanov595683d2014-07-28 13:46:45 +00002824 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002825 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002826 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002827 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
Robert Khasanov595683d2014-07-28 13:46:45 +00002828 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002829 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002830 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002831 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002832}
2833
2834def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2835def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002836// These nodes use 'vnot' instead of 'not' to support vectors.
2837def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2838def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002839
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002840defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2841defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2842defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2843defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2844defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
2845defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>;
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00002846
Craig Topper7b9cc142016-11-03 06:04:28 +00002847multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2848 Instruction Inst> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002849 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2850 // for the DQI set, this type is legal and KxxxB instruction is used
2851 let Predicates = [NoDQI] in
Craig Topper7b9cc142016-11-03 06:04:28 +00002852 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002853 (COPY_TO_REGCLASS
2854 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2855 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2856
2857 // All types smaller than 8 bits require conversion anyway
2858 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2859 (COPY_TO_REGCLASS (Inst
2860 (COPY_TO_REGCLASS VK1:$src1, VK16),
2861 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002862 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002863 (COPY_TO_REGCLASS (Inst
2864 (COPY_TO_REGCLASS VK2:$src1, VK16),
2865 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002866 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002867 (COPY_TO_REGCLASS (Inst
2868 (COPY_TO_REGCLASS VK4:$src1, VK16),
2869 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002870}
2871
Craig Topper7b9cc142016-11-03 06:04:28 +00002872defm : avx512_binop_pat<and, and, KANDWrr>;
2873defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2874defm : avx512_binop_pat<or, or, KORWrr>;
2875defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2876defm : avx512_binop_pat<xor, xor, KXORWrr>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002877
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002878// Mask unpacking
Igor Bregera54a1a82015-09-08 13:10:00 +00002879multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002880 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
Igor Bregera54a1a82015-09-08 13:10:00 +00002881 let Predicates = [prd] in {
Craig Topperad2ce362016-01-05 07:44:08 +00002882 let hasSideEffects = 0 in
Igor Bregera54a1a82015-09-08 13:10:00 +00002883 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2884 (ins KRC:$src1, KRC:$src2),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002885 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2886 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
Igor Bregera54a1a82015-09-08 13:10:00 +00002887
2888 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2889 (!cast<Instruction>(NAME##rr)
2890 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2891 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2892 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002893}
2894
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002895defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2896defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2897defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002898
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002899// Mask bit testing
2900multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002901 SDNode OpNode, OpndItins itins, Predicate prd> {
Igor Breger5ea0a6812015-08-31 13:30:19 +00002902 let Predicates = [prd], Defs = [EFLAGS] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002903 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Craig Topperedb09112014-11-25 20:11:23 +00002904 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002905 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2906 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002907}
2908
Igor Breger5ea0a6812015-08-31 13:30:19 +00002909multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002910 OpndItins itins, Predicate prdW = HasAVX512> {
2911 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002912 VEX, PD;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002913 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002914 VEX, PS;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002915 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002916 VEX, PS, VEX_W;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002917 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002918 VEX, PD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002919}
2920
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002921defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
2922defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002923
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002924// Mask shift
2925multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002926 SDNode OpNode, OpndItins itins> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002927 let Predicates = [HasAVX512] in
Craig Topper7ff6ab32015-01-21 08:43:49 +00002928 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002929 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002930 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002931 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
2932 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002933}
2934
2935multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002936 SDNode OpNode, OpndItins itins> {
2937 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2938 itins>, VEX, TAPD, VEX_W;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00002939 let Predicates = [HasDQI] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002940 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2941 itins>, VEX, TAPD;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00002942 let Predicates = [HasBWI] in {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002943 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2944 itins>, VEX, TAPD, VEX_W;
2945 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2946 itins>, VEX, TAPD;
Michael Liao66233b72015-08-06 09:06:20 +00002947 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002948}
2949
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002950defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
2951defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002952
Craig Topperd58c1652018-01-07 18:20:37 +00002953multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr,
2954 X86VectorVTInfo Narrow,
2955 X86VectorVTInfo Wide> {
2956def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
2957 (Narrow.VT Narrow.RC:$src2))),
2958 (COPY_TO_REGCLASS
2959 (!cast<Instruction>(InstStr##Zrr)
2960 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2961 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
2962 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002963
Craig Topperd58c1652018-01-07 18:20:37 +00002964def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2965 (OpNode (Narrow.VT Narrow.RC:$src1),
2966 (Narrow.VT Narrow.RC:$src2)))),
Craig Toppereb5c4112017-09-24 05:24:52 +00002967 (COPY_TO_REGCLASS
2968 (!cast<Instruction>(InstStr##Zrrk)
Craig Topperd58c1652018-01-07 18:20:37 +00002969 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2970 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2971 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
2972 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002973}
2974
2975multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00002976 X86VectorVTInfo Narrow,
2977 X86VectorVTInfo Wide> {
2978def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
2979 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
2980 (COPY_TO_REGCLASS
2981 (!cast<Instruction>(InstStr##Zrri)
2982 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2983 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2984 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002985
Craig Topperd58c1652018-01-07 18:20:37 +00002986def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2987 (OpNode (Narrow.VT Narrow.RC:$src1),
2988 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
2989 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
2990 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2991 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2992 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2993 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002994}
2995
2996let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00002997 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
2998 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v8i32x_info, v16i32_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002999
Craig Topperd58c1652018-01-07 18:20:37 +00003000 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3001 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v4i32x_info, v16i32_info>;
3002
3003 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3004 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3005
3006 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3007 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3008
3009 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3010 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3011 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v8i32x_info, v16i32_info>;
3012
3013 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3014 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3015 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v4i32x_info, v16i32_info>;
3016
3017 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3018 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3019 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v4i64x_info, v8i64_info>;
3020
3021 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3022 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3023 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v2i64x_info, v8i64_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003024}
3025
Craig Toppera2018e792018-01-08 06:53:52 +00003026let Predicates = [HasBWI, NoVLX] in {
3027 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3028 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v32i8x_info, v64i8_info>;
3029
3030 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3031 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v16i8x_info, v64i8_info>;
3032
3033 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3034 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v16i16x_info, v32i16_info>;
3035
3036 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3037 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v8i16x_info, v32i16_info>;
3038
3039 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3040 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
3041
3042 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3043 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v16i8x_info, v64i8_info>;
3044
3045 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3046 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v16i16x_info, v32i16_info>;
3047
3048 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3049 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v8i16x_info, v32i16_info>;
3050}
3051
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003052// Mask setting all 0s or 1s
3053multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3054 let Predicates = [HasAVX512] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003055 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3056 SchedRW = [WriteZero] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003057 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3058 [(set KRC:$dst, (VT Val))]>;
3059}
3060
3061multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003062 defm W : avx512_mask_setop<VK16, v16i1, Val>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003063 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3064 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003065}
3066
3067defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3068defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3069
3070// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3071let Predicates = [HasAVX512] in {
3072 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
Igor Breger86724082016-08-14 05:25:07 +00003073 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3074 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003075 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003076 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003077 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3078 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003079 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003080}
Igor Bregerf1bd7612016-03-06 07:46:03 +00003081
3082// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3083multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3084 RegisterClass RC, ValueType VT> {
3085 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3086 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003087
Igor Bregerf1bd7612016-03-06 07:46:03 +00003088 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003089 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003090}
Guy Blank548e22a2017-05-19 12:35:15 +00003091defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3092defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3093defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3094defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3095defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3096defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003097
3098defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3099defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3100defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3101defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3102defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3103
3104defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3105defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3106defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3107defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3108
3109defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3110defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3111defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3112
3113defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3114defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3115
3116defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003117
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003118//===----------------------------------------------------------------------===//
3119// AVX-512 - Aligned and unaligned load and store
3120//
3121
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003122
Simon Pilgrimdf052512017-12-06 17:59:26 +00003123multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3124 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3125 bit NoRMPattern = 0,
3126 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003127 let hasSideEffects = 0 in {
3128 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003129 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003130 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003131 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3132 (ins _.KRCWM:$mask, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003133 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
Simon Pilgrim18bcf932016-02-03 09:41:59 +00003134 "${dst} {${mask}} {z}, $src}"),
Craig Topper5c46c752017-01-08 05:46:21 +00003135 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
Igor Breger7a000f52016-01-21 14:18:11 +00003136 (_.VT _.RC:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003137 _.ImmAllZerosV)))], _.ExeDomain,
3138 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003139
Simon Pilgrimdf052512017-12-06 17:59:26 +00003140 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003141 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003142 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Toppercb0e7492017-07-31 17:35:44 +00003143 !if(NoRMPattern, [],
3144 [(set _.RC:$dst,
3145 (_.VT (bitconvert (ld_frag addr:$src))))]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003146 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003147
Craig Topper63e2cd62017-01-14 07:50:52 +00003148 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003149 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3150 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3151 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3152 "${dst} {${mask}}, $src1}"),
3153 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3154 (_.VT _.RC:$src1),
3155 (_.VT _.RC:$src0))))], _.ExeDomain,
3156 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003157 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3158 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003159 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3160 "${dst} {${mask}}, $src1}"),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003161 [(set _.RC:$dst, (_.VT
3162 (vselect _.KRCWM:$mask,
3163 (_.VT (bitconvert (ld_frag addr:$src1))),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003164 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3165 EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003166 }
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003167 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3168 (ins _.KRCWM:$mask, _.MemOp:$src),
3169 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3170 "${dst} {${mask}} {z}, $src}",
3171 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3172 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003173 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003174 }
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003175 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3176 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3177
3178 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3179 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3180
3181 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3182 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3183 _.KRCWM:$mask, addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003184}
3185
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003186multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3187 AVX512VLVectorVTInfo _,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003188 Predicate prd,
3189 bit NoRMPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003190 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003191 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003192 _.info512.AlignedLdFrag, masked_load_aligned512,
3193 NoRMPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003194
3195 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003196 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003197 _.info256.AlignedLdFrag, masked_load_aligned256,
3198 NoRMPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003199 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003200 _.info128.AlignedLdFrag, masked_load_aligned128,
3201 NoRMPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003202 }
3203}
3204
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003205multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3206 AVX512VLVectorVTInfo _,
3207 Predicate prd,
Craig Toppercb0e7492017-07-31 17:35:44 +00003208 bit NoRMPattern = 0,
Craig Topperc9293492016-02-26 06:50:29 +00003209 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003210 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003211 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003212 masked_load_unaligned, NoRMPattern,
3213 SelectOprr>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003214
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003215 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003216 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003217 masked_load_unaligned, NoRMPattern,
3218 SelectOprr>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003219 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003220 masked_load_unaligned, NoRMPattern,
3221 SelectOprr>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003222 }
3223}
3224
Simon Pilgrimdf052512017-12-06 17:59:26 +00003225multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3226 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3227 string Name, bit NoMRPattern = 0> {
Craig Topper99f6b622016-05-01 01:03:56 +00003228 let hasSideEffects = 0 in {
Igor Breger81b79de2015-11-19 07:43:43 +00003229 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3230 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003231 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3232 Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003233 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3234 (ins _.KRCWM:$mask, _.RC:$src),
3235 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3236 "${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003237 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3238 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003239 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003240 (ins _.KRCWM:$mask, _.RC:$src),
Igor Breger81b79de2015-11-19 07:43:43 +00003241 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003242 "${dst} {${mask}} {z}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003243 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3244 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
Craig Topper99f6b622016-05-01 01:03:56 +00003245 }
Igor Breger81b79de2015-11-19 07:43:43 +00003246
Craig Topper2462a712017-08-01 15:31:24 +00003247 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003248 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003249 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topper2462a712017-08-01 15:31:24 +00003250 !if(NoMRPattern, [],
3251 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003252 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003253 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003254 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3255 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003256 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003257
3258 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3259 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3260 _.KRCWM:$mask, _.RC:$src)>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003261}
3262
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003263
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003264multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003265 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper2462a712017-08-01 15:31:24 +00003266 string Name, bit NoMRPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003267 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003268 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
Craig Topper2462a712017-08-01 15:31:24 +00003269 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003270
3271 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003272 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
Craig Topper2462a712017-08-01 15:31:24 +00003273 masked_store_unaligned, Name#Z256,
3274 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003275 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
Craig Topper2462a712017-08-01 15:31:24 +00003276 masked_store_unaligned, Name#Z128,
3277 NoMRPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003278 }
3279}
3280
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003281multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003282 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper83b0a982018-01-18 07:44:09 +00003283 string Name, bit NoMRPattern = 0> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003284 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003285 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003286 masked_store_aligned512, Name#Z>, EVEX_V512;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003287
3288 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003289 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003290 masked_store_aligned256, Name#Z256>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003291 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003292 masked_store_aligned128, Name#Z128>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003293 }
3294}
3295
3296defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3297 HasAVX512>,
3298 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003299 HasAVX512, "VMOVAPS">,
3300 PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003301
3302defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3303 HasAVX512>,
3304 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003305 HasAVX512, "VMOVAPD">,
3306 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003307
Craig Topperc9293492016-02-26 06:50:29 +00003308defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003309 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003310 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3311 "VMOVUPS">,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003312 PS, EVEX_CD8<32, CD8VF>;
3313
Craig Topper4e7b8882016-10-03 02:00:29 +00003314defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003315 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003316 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3317 "VMOVUPD">,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003318 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003319
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003320defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003321 HasAVX512, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003322 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003323 HasAVX512, "VMOVDQA32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003324 PD, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003325
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003326defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3327 HasAVX512>,
3328 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003329 HasAVX512, "VMOVDQA64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003330 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003331
Craig Toppercb0e7492017-07-31 17:35:44 +00003332defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003333 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
Craig Topper2462a712017-08-01 15:31:24 +00003334 HasBWI, "VMOVDQU8", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003335 XD, EVEX_CD8<8, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003336
Craig Toppercb0e7492017-07-31 17:35:44 +00003337defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003338 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
Craig Topper2462a712017-08-01 15:31:24 +00003339 HasBWI, "VMOVDQU16", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003340 XD, VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003341
Craig Topperc9293492016-02-26 06:50:29 +00003342defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003343 1, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003344 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003345 HasAVX512, "VMOVDQU32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003346 XS, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003347
Craig Topperc9293492016-02-26 06:50:29 +00003348defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003349 0, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003350 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003351 HasAVX512, "VMOVDQU64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003352 XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00003353
Craig Topperd875d6b2016-09-29 06:07:09 +00003354// Special instructions to help with spilling when we don't have VLX. We need
3355// to load or store from a ZMM register instead. These are converted in
3356// expandPostRAPseudos.
Craig Toppereab23d32016-10-03 02:22:33 +00003357let isReMaterializable = 1, canFoldAsLoad = 1,
Craig Topperd875d6b2016-09-29 06:07:09 +00003358 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3359def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003360 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003361def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003362 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003363def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003364 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003365def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003366 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003367}
3368
Simon Pilgrimdf052512017-12-06 17:59:26 +00003369let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
Craig Topperf3e671e2016-09-30 05:35:47 +00003370def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003371 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003372def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003373 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003374def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003375 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003376def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003377 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003378}
3379
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003380def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003381 (v8i64 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003382 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003383 VK8), VR512:$src)>;
3384
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003385def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003386 (v16i32 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003387 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyf1de34b2014-12-04 09:40:44 +00003388
Craig Topper33c550c2016-05-22 00:39:30 +00003389// These patterns exist to prevent the above patterns from introducing a second
3390// mask inversion when one already exists.
3391def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3392 (bc_v8i64 (v16i32 immAllZerosV)),
3393 (v8i64 VR512:$src))),
3394 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3395def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3396 (v16i32 immAllZerosV),
3397 (v16i32 VR512:$src))),
3398 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3399
Craig Topperfc3ce492018-01-01 01:11:29 +00003400multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3401 X86VectorVTInfo Wide> {
3402 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3403 Narrow.RC:$src1, Narrow.RC:$src0)),
3404 (EXTRACT_SUBREG
3405 (Wide.VT
3406 (!cast<Instruction>(InstrStr#"rrk")
3407 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3408 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3409 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3410 Narrow.SubRegIdx)>;
3411
3412 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3413 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3414 (EXTRACT_SUBREG
3415 (Wide.VT
3416 (!cast<Instruction>(InstrStr#"rrkz")
3417 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3418 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3419 Narrow.SubRegIdx)>;
3420}
3421
Craig Topper96ab6fd2017-01-09 04:19:34 +00003422// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3423// available. Use a 512-bit operation and extract.
3424let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003425 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3426 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
Craig Topperfc3ce492018-01-01 01:11:29 +00003427 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3428 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003429
3430 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3431 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3432 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3433 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
Craig Topper96ab6fd2017-01-09 04:19:34 +00003434}
3435
Craig Toppere9fc0cd2018-01-14 02:05:51 +00003436let Predicates = [HasBWI, NoVLX] in {
3437 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3438 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3439
3440 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3441 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3442}
3443
Craig Topper2462a712017-08-01 15:31:24 +00003444let Predicates = [HasAVX512] in {
3445 // 512-bit store.
Craig Topperafa69ee2017-08-19 23:21:21 +00003446 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003447 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003448 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003449 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3450 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3451 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003452 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003453 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003454 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003455 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003456}
3457
3458let Predicates = [HasVLX] in {
3459 // 128-bit store.
Craig Topper5ef13ba2016-12-26 07:26:07 +00003460 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003461 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003462 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003463 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3464 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3465 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003466 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003467 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003468 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003469 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper14aa2662016-08-11 06:04:04 +00003470
Craig Topper2462a712017-08-01 15:31:24 +00003471 // 256-bit store.
Craig Topperafa69ee2017-08-19 23:21:21 +00003472 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003473 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003474 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003475 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3476 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3477 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003478 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003479 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003480 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003481 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper95bdabd2016-05-22 23:44:33 +00003482}
3483
Craig Topper80075a52017-08-27 19:03:36 +00003484multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3485 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3486 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3487 (bitconvert
3488 (To.VT (extract_subvector
3489 (From.VT From.RC:$src), (iPTR 0)))),
3490 To.RC:$src0)),
3491 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3492 Cast.RC:$src0, Cast.KRCWM:$mask,
3493 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3494
3495 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3496 (bitconvert
3497 (To.VT (extract_subvector
3498 (From.VT From.RC:$src), (iPTR 0)))),
3499 Cast.ImmAllZerosV)),
3500 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3501 Cast.KRCWM:$mask,
3502 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3503}
3504
3505
Craig Topperd27386a2017-08-25 23:34:59 +00003506let Predicates = [HasVLX] in {
3507// A masked extract from the first 128-bits of a 256-bit vector can be
3508// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003509defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3510defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3511defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3512defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3513defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3514defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3515defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3516defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3517defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3518defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3519defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3520defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003521
3522// A masked extract from the first 128-bits of a 512-bit vector can be
3523// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003524defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3525defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3526defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3527defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3528defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3529defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3530defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3531defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3532defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3533defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3534defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3535defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003536
3537// A masked extract from the first 256-bits of a 512-bit vector can be
3538// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003539defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3540defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3541defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3542defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3543defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3544defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3545defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3546defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3547defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3548defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3549defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3550defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003551}
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003552
3553// Move Int Doubleword to Packed Double Int
3554//
3555let ExeDomain = SSEPackedInt in {
3556def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3557 "vmovd\t{$src, $dst|$dst, $src}",
3558 [(set VR128X:$dst,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003559 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003560 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003561def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003562 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003563 [(set VR128X:$dst,
3564 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003565 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003566def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003567 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003568 [(set VR128X:$dst,
3569 (v2i64 (scalar_to_vector GR64:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003570 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperc648c9b2015-12-28 06:11:42 +00003571let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3572def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3573 (ins i64mem:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003574 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3575 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00003576let isCodeGenOnly = 1 in {
Craig Topperaf88afb2015-12-28 06:11:45 +00003577def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003578 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003579 [(set FR64X:$dst, (bitconvert GR64:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003580 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper5971b542017-02-12 18:47:44 +00003581def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3582 "vmovq\t{$src, $dst|$dst, $src}",
3583 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003584 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003585def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003586 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003587 [(set GR64:$dst, (bitconvert FR64X:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003588 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003589def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003590 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003591 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003592 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3593 EVEX_CD8<64, CD8VT1>;
3594}
3595} // ExeDomain = SSEPackedInt
3596
3597// Move Int Doubleword to Single Scalar
3598//
3599let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3600def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3601 "vmovd\t{$src, $dst|$dst, $src}",
3602 [(set FR32X:$dst, (bitconvert GR32:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003603 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003604
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003605def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003606 "vmovd\t{$src, $dst|$dst, $src}",
3607 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003608 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003609} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3610
3611// Move doubleword from xmm register to r/m32
3612//
3613let ExeDomain = SSEPackedInt in {
3614def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3615 "vmovd\t{$src, $dst|$dst, $src}",
3616 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003617 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003618 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003619def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003620 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003621 "vmovd\t{$src, $dst|$dst, $src}",
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003622 [(store (i32 (extractelt (v4i32 VR128X:$src),
3623 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003624 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003625} // ExeDomain = SSEPackedInt
3626
3627// Move quadword from xmm1 register to r/m64
3628//
3629let ExeDomain = SSEPackedInt in {
3630def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3631 "vmovq\t{$src, $dst|$dst, $src}",
3632 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003633 (iPTR 0)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003634 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003635 Requires<[HasAVX512, In64BitMode]>;
3636
Craig Topperc648c9b2015-12-28 06:11:42 +00003637let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3638def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3639 "vmovq\t{$src, $dst|$dst, $src}",
Simon Pilgrim75673942017-12-06 11:23:13 +00003640 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003641 Requires<[HasAVX512, In64BitMode]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003642
Craig Topperc648c9b2015-12-28 06:11:42 +00003643def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3644 (ins i64mem:$dst, VR128X:$src),
3645 "vmovq\t{$src, $dst|$dst, $src}",
3646 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3647 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topper401675c2015-12-28 06:32:47 +00003648 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003649 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3650
3651let hasSideEffects = 0 in
3652def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003653 (ins VR128X:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003654 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3655 EVEX, VEX_W, Sched<[WriteMove]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003656} // ExeDomain = SSEPackedInt
3657
3658// Move Scalar Single to Double Int
3659//
3660let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3661def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3662 (ins FR32X:$src),
3663 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003664 [(set GR32:$dst, (bitconvert FR32X:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003665 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003666def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003667 (ins i32mem:$dst, FR32X:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003668 "vmovd\t{$src, $dst|$dst, $src}",
3669 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
Simon Pilgrim75673942017-12-06 11:23:13 +00003670 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003671} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3672
3673// Move Quadword Int to Packed Quadword Int
3674//
3675let ExeDomain = SSEPackedInt in {
3676def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3677 (ins i64mem:$src),
3678 "vmovq\t{$src, $dst|$dst, $src}",
3679 [(set VR128X:$dst,
3680 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003681 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003682} // ExeDomain = SSEPackedInt
3683
Craig Topper29476ab2018-01-05 21:57:23 +00003684// Allow "vmovd" but print "vmovq".
3685def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3686 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3687def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3688 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3689
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003690//===----------------------------------------------------------------------===//
3691// AVX-512 MOVSS, MOVSD
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003692//===----------------------------------------------------------------------===//
3693
Craig Topperc7de3a12016-07-29 02:49:08 +00003694multiclass avx512_move_scalar<string asm, SDNode OpNode,
Asaf Badouh41ecf462015-12-06 13:26:56 +00003695 X86VectorVTInfo _> {
Craig Topperc7de3a12016-07-29 02:49:08 +00003696 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003697 (ins _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003698 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Craig Topper6fb55712017-10-04 17:20:12 +00003699 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003700 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003701 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003702 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003703 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3704 "$dst {${mask}} {z}, $src1, $src2}"),
3705 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003706 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003707 _.ImmAllZerosV)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003708 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003709 let Constraints = "$src0 = $dst" in
3710 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003711 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003712 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3713 "$dst {${mask}}, $src1, $src2}"),
3714 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003715 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003716 (_.VT _.RC:$src0))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003717 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
Craig Toppere4f868e2016-07-29 06:06:04 +00003718 let canFoldAsLoad = 1, isReMaterializable = 1 in
Craig Topperc7de3a12016-07-29 02:49:08 +00003719 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3720 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3721 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003722 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003723 let mayLoad = 1, hasSideEffects = 0 in {
3724 let Constraints = "$src0 = $dst" in
3725 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3726 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3727 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3728 "$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003729 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003730 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3731 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3732 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3733 "$dst {${mask}} {z}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003734 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Asaf Badouh41ecf462015-12-06 13:26:56 +00003735 }
Craig Toppere1cac152016-06-07 07:27:54 +00003736 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3737 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3738 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003739 EVEX, Sched<[WriteStore]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003740 let mayStore = 1, hasSideEffects = 0 in
Craig Toppere1cac152016-06-07 07:27:54 +00003741 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3742 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3743 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003744 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003745}
3746
Asaf Badouh41ecf462015-12-06 13:26:56 +00003747defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3748 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003749
Asaf Badouh41ecf462015-12-06 13:26:56 +00003750defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3751 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003752
Ayman Musa46af8f92016-11-13 14:29:32 +00003753
3754multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3755 PatLeaf ZeroFP, X86VectorVTInfo _> {
3756
3757def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003758 (_.VT (scalar_to_vector
Guy Blank548e22a2017-05-19 12:35:15 +00003759 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003760 (_.EltVT _.FRC:$src1),
3761 (_.EltVT _.FRC:$src2))))))),
Craig Topper00230602017-10-01 23:53:50 +00003762 (!cast<Instruction>(InstrStr#rrk)
3763 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3764 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003765 (_.VT _.RC:$src0),
3766 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003767
3768def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003769 (_.VT (scalar_to_vector
Guy Blank548e22a2017-05-19 12:35:15 +00003770 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003771 (_.EltVT _.FRC:$src1),
3772 (_.EltVT ZeroFP))))))),
Craig Topper00230602017-10-01 23:53:50 +00003773 (!cast<Instruction>(InstrStr#rrkz)
3774 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003775 (_.VT _.RC:$src0),
3776 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003777}
3778
3779multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3780 dag Mask, RegisterClass MaskRC> {
3781
3782def : Pat<(masked_store addr:$dst, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003783 (_.info512.VT (insert_subvector undef,
Ayman Musa46af8f92016-11-13 14:29:32 +00003784 (_.info256.VT (insert_subvector undef,
3785 (_.info128.VT _.info128.RC:$src),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003786 (iPTR 0))),
3787 (iPTR 0)))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003788 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003789 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003790 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003791
3792}
3793
Craig Topper058f2f62017-03-28 16:35:29 +00003794multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3795 AVX512VLVectorVTInfo _,
3796 dag Mask, RegisterClass MaskRC,
3797 SubRegIndex subreg> {
3798
3799def : Pat<(masked_store addr:$dst, Mask,
3800 (_.info512.VT (insert_subvector undef,
3801 (_.info256.VT (insert_subvector undef,
3802 (_.info128.VT _.info128.RC:$src),
3803 (iPTR 0))),
3804 (iPTR 0)))),
3805 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003806 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003807 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3808
3809}
3810
Ayman Musa46af8f92016-11-13 14:29:32 +00003811multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3812 dag Mask, RegisterClass MaskRC> {
3813
3814def : Pat<(_.info128.VT (extract_subvector
3815 (_.info512.VT (masked_load addr:$srcAddr, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003816 (_.info512.VT (bitconvert
Ayman Musa46af8f92016-11-13 14:29:32 +00003817 (v16i32 immAllZerosV))))),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003818 (iPTR 0))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003819 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003820 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003821 addr:$srcAddr)>;
3822
3823def : Pat<(_.info128.VT (extract_subvector
3824 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3825 (_.info512.VT (insert_subvector undef,
3826 (_.info256.VT (insert_subvector undef,
3827 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003828 (iPTR 0))),
3829 (iPTR 0))))),
3830 (iPTR 0))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003831 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003832 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003833 addr:$srcAddr)>;
3834
3835}
3836
Craig Topper058f2f62017-03-28 16:35:29 +00003837multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3838 AVX512VLVectorVTInfo _,
3839 dag Mask, RegisterClass MaskRC,
3840 SubRegIndex subreg> {
3841
3842def : Pat<(_.info128.VT (extract_subvector
3843 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3844 (_.info512.VT (bitconvert
3845 (v16i32 immAllZerosV))))),
3846 (iPTR 0))),
3847 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003848 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003849 addr:$srcAddr)>;
3850
3851def : Pat<(_.info128.VT (extract_subvector
3852 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3853 (_.info512.VT (insert_subvector undef,
3854 (_.info256.VT (insert_subvector undef,
3855 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3856 (iPTR 0))),
3857 (iPTR 0))))),
3858 (iPTR 0))),
3859 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003860 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003861 addr:$srcAddr)>;
3862
3863}
3864
Ayman Musa46af8f92016-11-13 14:29:32 +00003865defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3866defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3867
3868defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3869 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003870defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3871 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3872defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3873 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003874
3875defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3876 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003877defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3878 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3879defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3880 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003881
Guy Blankb169d56d2017-07-31 08:26:14 +00003882def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3883 (f32 FR32X:$src1), (f32 FR32X:$src2))),
3884 (COPY_TO_REGCLASS
3885 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3886 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3887 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003888 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3889 FR32X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003890
Craig Topper74ed0872016-05-18 06:55:59 +00003891def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003892 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003893 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3894 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003895
Guy Blankb169d56d2017-07-31 08:26:14 +00003896def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3897 (f64 FR64X:$src1), (f64 FR64X:$src2))),
3898 (COPY_TO_REGCLASS
3899 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3900 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3901 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003902 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3903 FR64X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003904
Craig Topper74ed0872016-05-18 06:55:59 +00003905def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003906 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003907 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3908 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003909
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00003910def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
Guy Blank548e22a2017-05-19 12:35:15 +00003911 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00003912 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3913
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003914let hasSideEffects = 0 in {
Simon Pilgrim64fff142017-07-16 18:37:23 +00003915 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003916 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003917 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003918 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
3919 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
Igor Breger4424aaa2015-11-19 07:58:33 +00003920
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003921let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00003922 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3923 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003924 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003925 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3926 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003927 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3928 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
Simon Pilgrim64fff142017-07-16 18:37:23 +00003929
3930 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003931 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003932 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3933 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003934 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3935 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003936
Simon Pilgrim64fff142017-07-16 18:37:23 +00003937 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003938 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003939 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003940 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
3941 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003942
3943let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00003944 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3945 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003946 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003947 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
3948 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003949 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
3950 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003951
Simon Pilgrim64fff142017-07-16 18:37:23 +00003952 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3953 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
Craig Topper6fb55712017-10-04 17:20:12 +00003954 VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003955 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3956 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003957 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
3958 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003959}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003960
3961let Predicates = [HasAVX512] in {
3962 let AddedComplexity = 15 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003963 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00003964 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003965 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00003966 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003967 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
Craig Topper6fb55712017-10-04 17:20:12 +00003968 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3969 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
Craig Topper3f8126e2016-08-13 05:43:20 +00003970 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003971
3972 // Move low f32 and clear high bits.
3973 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3974 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003975 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003976 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3977 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3978 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003979 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00003980 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00003981 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
3982 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003983 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00003984 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
3985 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
3986 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003987 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00003988 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003989
3990 let AddedComplexity = 20 in {
3991 // MOVSSrm zeros the high parts of the register; represent this
3992 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3993 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3994 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3995 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3996 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3997 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3998 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00003999 def : Pat<(v4f32 (X86vzload addr:$src)),
4000 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004001
4002 // MOVSDrm zeros the high parts of the register; represent this
4003 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4004 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4005 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4006 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4007 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4008 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4009 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4010 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4011 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4012 def : Pat<(v2f64 (X86vzload addr:$src)),
4013 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4014
4015 // Represent the same patterns above but in the form they appear for
4016 // 256-bit types
4017 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4018 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004019 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004020 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4021 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4022 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004023 def : Pat<(v8f32 (X86vzload addr:$src)),
4024 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004025 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4026 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4027 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004028 def : Pat<(v4f64 (X86vzload addr:$src)),
4029 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004030
4031 // Represent the same patterns above but in the form they appear for
4032 // 512-bit types
4033 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4034 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4035 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4036 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4037 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4038 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004039 def : Pat<(v16f32 (X86vzload addr:$src)),
4040 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004041 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4042 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4043 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004044 def : Pat<(v8f64 (X86vzload addr:$src)),
4045 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004046 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004047 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4048 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004049 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004050
4051 // Move low f64 and clear high bits.
4052 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4053 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004054 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004055 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004056 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4057 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004058 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004059 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004060
4061 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004062 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004063 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004064 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004065 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004066 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004067
4068 // Extract and store.
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +00004069 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004070 addr:$dst),
4071 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004072
4073 // Shuffle with VMOVSS
4074 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004075 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4076
4077 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4078 (VMOVSSZrr VR128X:$src1,
4079 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004080
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004081 // Shuffle with VMOVSD
4082 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004083 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4084
4085 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4086 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004087
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004088 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004089 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004090 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004091 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004092}
4093
4094let AddedComplexity = 15 in
4095def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4096 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00004097 "vmovq\t{$src, $dst|$dst, $src}",
Michael Liao5bf95782014-12-04 05:20:33 +00004098 [(set VR128X:$dst, (v2i64 (X86vzmovl
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004099 (v2i64 VR128X:$src))))],
4100 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4101
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004102let Predicates = [HasAVX512] in {
Craig Topperde549852016-05-22 06:09:34 +00004103 let AddedComplexity = 15 in {
4104 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4105 (VMOVDI2PDIZrr GR32:$src)>;
4106
4107 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4108 (VMOV64toPQIZrr GR64:$src)>;
4109
4110 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4111 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4112 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004113
4114 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4115 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4116 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperde549852016-05-22 06:09:34 +00004117 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004118 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4119 let AddedComplexity = 20 in {
Simon Pilgrima4c350f2017-02-17 20:43:32 +00004120 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4121 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004122 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4123 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004124 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4125 (VMOVDI2PDIZrm addr:$src)>;
4126 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4127 (VMOVDI2PDIZrm addr:$src)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004128 def : Pat<(v4i32 (X86vzload addr:$src)),
4129 (VMOVDI2PDIZrm addr:$src)>;
4130 def : Pat<(v8i32 (X86vzload addr:$src)),
4131 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004132 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004133 (VMOVQI2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004134 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004135 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00004136 def : Pat<(v2i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004137 (VMOVQI2PQIZrm addr:$src)>;
Craig Topperde549852016-05-22 06:09:34 +00004138 def : Pat<(v4i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004139 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004140 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00004141
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004142 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4143 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4144 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4145 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004146 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4147 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4148 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4149
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004150 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004151 def : Pat<(v16i32 (X86vzload addr:$src)),
4152 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004153 def : Pat<(v8i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004154 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004155}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004156//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00004157// AVX-512 - Non-temporals
4158//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00004159let SchedRW = [WriteLoad] in {
4160 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4161 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004162 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
Robert Khasanoved882972014-08-13 10:46:00 +00004163 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004164
Craig Topper2f90c1f2016-06-07 07:27:57 +00004165 let Predicates = [HasVLX] in {
Robert Khasanoved882972014-08-13 10:46:00 +00004166 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004167 (ins i256mem:$src),
4168 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004169 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004170 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004171
Robert Khasanoved882972014-08-13 10:46:00 +00004172 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004173 (ins i128mem:$src),
4174 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004175 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004176 EVEX_CD8<64, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004177 }
Adam Nemetefd07852014-06-18 16:51:10 +00004178}
4179
Igor Bregerd3341f52016-01-20 13:11:47 +00004180multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4181 PatFrag st_frag = alignednontemporalstore,
4182 InstrItinClass itin = IIC_SSE_MOVNT> {
Craig Toppere1cac152016-06-07 07:27:54 +00004183 let SchedRW = [WriteStore], AddedComplexity = 400 in
Igor Bregerd3341f52016-01-20 13:11:47 +00004184 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanoved882972014-08-13 10:46:00 +00004185 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Igor Bregerd3341f52016-01-20 13:11:47 +00004186 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4187 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004188}
4189
Igor Bregerd3341f52016-01-20 13:11:47 +00004190multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4191 AVX512VLVectorVTInfo VTInfo> {
4192 let Predicates = [HasAVX512] in
4193 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
Robert Khasanoved882972014-08-13 10:46:00 +00004194
Igor Bregerd3341f52016-01-20 13:11:47 +00004195 let Predicates = [HasAVX512, HasVLX] in {
4196 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4197 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
Robert Khasanoved882972014-08-13 10:46:00 +00004198 }
4199}
4200
Igor Bregerd3341f52016-01-20 13:11:47 +00004201defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4202defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4203defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
Robert Khasanoved882972014-08-13 10:46:00 +00004204
Craig Topper707c89c2016-05-08 23:43:17 +00004205let Predicates = [HasAVX512], AddedComplexity = 400 in {
4206 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4207 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4208 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4209 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4210 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4211 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004212
4213 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4214 (VMOVNTDQAZrm addr:$src)>;
4215 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4216 (VMOVNTDQAZrm addr:$src)>;
4217 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4218 (VMOVNTDQAZrm addr:$src)>;
Craig Topper707c89c2016-05-08 23:43:17 +00004219}
4220
Craig Topperc41320d2016-05-08 23:08:45 +00004221let Predicates = [HasVLX], AddedComplexity = 400 in {
4222 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4223 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4224 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4225 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4226 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4227 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4228
Simon Pilgrim9a896232016-06-07 13:34:24 +00004229 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4230 (VMOVNTDQAZ256rm addr:$src)>;
4231 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4232 (VMOVNTDQAZ256rm addr:$src)>;
4233 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4234 (VMOVNTDQAZ256rm addr:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004235
Craig Topperc41320d2016-05-08 23:08:45 +00004236 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4237 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4238 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4239 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4240 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4241 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004242
4243 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4244 (VMOVNTDQAZ128rm addr:$src)>;
4245 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4246 (VMOVNTDQAZ128rm addr:$src)>;
4247 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4248 (VMOVNTDQAZ128rm addr:$src)>;
Craig Topperc41320d2016-05-08 23:08:45 +00004249}
4250
Adam Nemet7f62b232014-06-10 16:39:53 +00004251//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004252// AVX-512 - Integer arithmetic
4253//
4254multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanov44241442014-10-08 14:37:45 +00004255 X86VectorVTInfo _, OpndItins itins,
4256 bit IsCommutable = 0> {
Adam Nemet34801422014-10-08 23:25:39 +00004257 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Bregerf2460112015-07-26 14:41:44 +00004258 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Robert Khasanov44241442014-10-08 14:37:45 +00004259 "$src2, $src1", "$src1, $src2",
4260 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004261 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4262 Sched<[itins.Sched]>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004263
Craig Toppere1cac152016-06-07 07:27:54 +00004264 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4265 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4266 "$src2, $src1", "$src1, $src2",
4267 (_.VT (OpNode _.RC:$src1,
4268 (bitconvert (_.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004269 itins.rm>, AVX512BIBase, EVEX_4V,
4270 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004271}
4272
4273multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4274 X86VectorVTInfo _, OpndItins itins,
4275 bit IsCommutable = 0> :
4276 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
Craig Toppere1cac152016-06-07 07:27:54 +00004277 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4278 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4279 "${src2}"##_.BroadcastStr##", $src1",
4280 "$src1, ${src2}"##_.BroadcastStr,
4281 (_.VT (OpNode _.RC:$src1,
4282 (X86VBroadcast
4283 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004284 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4285 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004286}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004287
Robert Khasanovd5b14f72014-10-09 08:38:48 +00004288multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4289 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4290 Predicate prd, bit IsCommutable = 0> {
4291 let Predicates = [prd] in
4292 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4293 IsCommutable>, EVEX_V512;
4294
4295 let Predicates = [prd, HasVLX] in {
4296 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4297 IsCommutable>, EVEX_V256;
4298 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4299 IsCommutable>, EVEX_V128;
4300 }
4301}
4302
Robert Khasanov545d1b72014-10-14 14:36:19 +00004303multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4304 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4305 Predicate prd, bit IsCommutable = 0> {
4306 let Predicates = [prd] in
4307 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4308 IsCommutable>, EVEX_V512;
4309
4310 let Predicates = [prd, HasVLX] in {
4311 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4312 IsCommutable>, EVEX_V256;
4313 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4314 IsCommutable>, EVEX_V128;
4315 }
4316}
4317
4318multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4319 OpndItins itins, Predicate prd,
4320 bit IsCommutable = 0> {
4321 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4322 itins, prd, IsCommutable>,
4323 VEX_W, EVEX_CD8<64, CD8VF>;
4324}
4325
4326multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4327 OpndItins itins, Predicate prd,
4328 bit IsCommutable = 0> {
4329 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4330 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4331}
4332
4333multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4334 OpndItins itins, Predicate prd,
4335 bit IsCommutable = 0> {
4336 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004337 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4338 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004339}
4340
4341multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4342 OpndItins itins, Predicate prd,
4343 bit IsCommutable = 0> {
4344 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004345 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4346 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004347}
4348
4349multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4350 SDNode OpNode, OpndItins itins, Predicate prd,
4351 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004352 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004353 IsCommutable>;
4354
Igor Bregerf2460112015-07-26 14:41:44 +00004355 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004356 IsCommutable>;
4357}
4358
4359multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4360 SDNode OpNode, OpndItins itins, Predicate prd,
4361 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004362 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004363 IsCommutable>;
4364
Igor Bregerf2460112015-07-26 14:41:44 +00004365 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004366 IsCommutable>;
4367}
4368
4369multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4370 bits<8> opc_d, bits<8> opc_q,
4371 string OpcodeStr, SDNode OpNode,
4372 OpndItins itins, bit IsCommutable = 0> {
4373 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4374 itins, HasAVX512, IsCommutable>,
4375 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4376 itins, HasBWI, IsCommutable>;
4377}
4378
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004379multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
Michael Liao66233b72015-08-06 09:06:20 +00004380 SDNode OpNode,X86VectorVTInfo _Src,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004381 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4382 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004383 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004384 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004385 "$src2, $src1","$src1, $src2",
4386 (_Dst.VT (OpNode
4387 (_Src.VT _Src.RC:$src1),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004388 (_Src.VT _Src.RC:$src2))),
Michael Liao66233b72015-08-06 09:06:20 +00004389 itins.rr, IsCommutable>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004390 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004391 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4392 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4393 "$src2, $src1", "$src1, $src2",
4394 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4395 (bitconvert (_Src.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004396 itins.rm>, AVX512BIBase, EVEX_4V,
4397 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004398
4399 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Coby Tayree99a66392016-11-20 17:19:55 +00004400 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
Craig Toppere1cac152016-06-07 07:27:54 +00004401 OpcodeStr,
4402 "${src2}"##_Brdct.BroadcastStr##", $src1",
Coby Tayree99a66392016-11-20 17:19:55 +00004403 "$src1, ${src2}"##_Brdct.BroadcastStr,
Craig Toppere1cac152016-06-07 07:27:54 +00004404 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4405 (_Brdct.VT (X86VBroadcast
4406 (_Brdct.ScalarLdFrag addr:$src2)))))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004407 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4408 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004409}
4410
Robert Khasanov545d1b72014-10-14 14:36:19 +00004411defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4412 SSE_INTALU_ITINS_P, 1>;
4413defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4414 SSE_INTALU_ITINS_P, 0>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004415defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4416 SSE_INTALU_ITINS_P, HasBWI, 1>;
4417defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4418 SSE_INTALU_ITINS_P, HasBWI, 0>;
4419defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
Michael Liao66233b72015-08-06 09:06:20 +00004420 SSE_INTALU_ITINS_P, HasBWI, 1>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004421defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
Michael Liao66233b72015-08-06 09:06:20 +00004422 SSE_INTALU_ITINS_P, HasBWI, 0>;
Igor Bregerf2460112015-07-26 14:41:44 +00004423defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004424 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004425defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004426 SSE_INTMUL_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004427defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004428 SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4429defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
Asaf Badouh73f26f82015-07-05 12:23:20 +00004430 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004431defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004432 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004433defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004434 HasBWI, 1>, T8PD;
Asaf Badouh81f03c32015-06-18 12:30:53 +00004435defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
Michael Liao66233b72015-08-06 09:06:20 +00004436 SSE_INTALU_ITINS_P, HasBWI, 1>;
4437
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004438multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004439 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4440 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4441 let Predicates = [prd] in
4442 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4443 _SrcVTInfo.info512, _DstVTInfo.info512,
4444 v8i64_info, IsCommutable>,
4445 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4446 let Predicates = [HasVLX, prd] in {
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004447 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004448 _SrcVTInfo.info256, _DstVTInfo.info256,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004449 v4i64x_info, IsCommutable>,
4450 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004451 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004452 _SrcVTInfo.info128, _DstVTInfo.info128,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004453 v2i64x_info, IsCommutable>,
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004454 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4455 }
Michael Liao66233b72015-08-06 09:06:20 +00004456}
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004457
Craig Topper9b800c62017-12-26 05:43:04 +00004458defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004459 avx512vl_i32_info, avx512vl_i64_info,
4460 X86pmuldq, HasAVX512, 1>,T8PD;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004461defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004462 avx512vl_i32_info, avx512vl_i64_info,
4463 X86pmuludq, HasAVX512, 1>;
4464defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4465 avx512vl_i8_info, avx512vl_i8_info,
4466 X86multishift, HasVBMI, 0>, T8PD;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004467
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004468multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004469 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4470 OpndItins itins> {
Craig Toppere1cac152016-06-07 07:27:54 +00004471 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4472 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4473 OpcodeStr,
4474 "${src2}"##_Src.BroadcastStr##", $src1",
4475 "$src1, ${src2}"##_Src.BroadcastStr,
4476 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4477 (_Src.VT (X86VBroadcast
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004478 (_Src.ScalarLdFrag addr:$src2)))))),
4479 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4480 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004481}
4482
Michael Liao66233b72015-08-06 09:06:20 +00004483multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4484 SDNode OpNode,X86VectorVTInfo _Src,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004485 X86VectorVTInfo _Dst, OpndItins itins,
4486 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004487 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004488 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004489 "$src2, $src1","$src1, $src2",
4490 (_Dst.VT (OpNode
4491 (_Src.VT _Src.RC:$src1),
Craig Topper37e8c542016-08-14 17:57:22 +00004492 (_Src.VT _Src.RC:$src2))),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004493 itins.rr, IsCommutable>,
4494 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004495 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4496 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4497 "$src2, $src1", "$src1, $src2",
4498 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004499 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4500 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4501 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004502}
4503
4504multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4505 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004506 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004507 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004508 v32i16_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004509 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004510 v32i16_info, SSE_PACK>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004511 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004512 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004513 v16i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004514 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004515 v16i16x_info, SSE_PACK>, EVEX_V256;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004516 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004517 v8i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004518 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004519 v8i16x_info, SSE_PACK>, EVEX_V128;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004520 }
4521}
4522multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4523 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004524 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004525 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004526 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004527 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004528 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004529 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004530 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004531 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004532 }
4533}
Igor Bregerf7fd5472015-07-21 07:11:28 +00004534
4535multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4536 SDNode OpNode, AVX512VLVectorVTInfo _Src,
Craig Topper37e8c542016-08-14 17:57:22 +00004537 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004538 let Predicates = [HasBWI] in
Igor Bregerf7fd5472015-07-21 07:11:28 +00004539 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004540 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004541 let Predicates = [HasBWI, HasVLX] in {
Igor Bregerf7fd5472015-07-21 07:11:28 +00004542 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004543 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004544 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004545 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004546 }
4547}
4548
Craig Topperb6da6542016-05-01 17:38:32 +00004549defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4550defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4551defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4552defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004553
Craig Topper5acb5a12016-05-01 06:24:57 +00004554defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
Craig Toppera33846a2017-10-22 06:18:23 +00004555 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004556defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
Craig Toppera33846a2017-10-22 06:18:23 +00004557 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004558
Igor Bregerf2460112015-07-26 14:41:44 +00004559defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004560 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004561defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004562 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004563defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004564 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004565
Igor Bregerf2460112015-07-26 14:41:44 +00004566defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004567 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004568defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004569 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004570defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004571 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004572
Igor Bregerf2460112015-07-26 14:41:44 +00004573defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004574 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004575defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004576 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004577defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004578 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004579
Igor Bregerf2460112015-07-26 14:41:44 +00004580defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004581 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004582defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004583 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004584defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004585 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Craig Topperabe80cc2016-08-28 06:06:28 +00004586
Simon Pilgrim47c1ff72016-10-27 17:07:40 +00004587// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4588let Predicates = [HasDQI, NoVLX] in {
4589 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4590 (EXTRACT_SUBREG
4591 (VPMULLQZrr
4592 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4593 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4594 sub_ymm)>;
4595
4596 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4597 (EXTRACT_SUBREG
4598 (VPMULLQZrr
4599 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4600 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4601 sub_xmm)>;
4602}
4603
Craig Topper4520d4f2017-12-04 07:21:01 +00004604// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4605let Predicates = [HasDQI, NoVLX] in {
4606 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4607 (EXTRACT_SUBREG
4608 (VPMULLQZrr
4609 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4610 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4611 sub_ymm)>;
4612
4613 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4614 (EXTRACT_SUBREG
4615 (VPMULLQZrr
4616 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4617 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4618 sub_xmm)>;
4619}
4620
4621multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4622 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4623 (EXTRACT_SUBREG
4624 (Instr
4625 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4626 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4627 sub_ymm)>;
4628
4629 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4630 (EXTRACT_SUBREG
4631 (Instr
4632 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4633 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4634 sub_xmm)>;
4635}
4636
Craig Topper694c73a2018-01-01 01:11:32 +00004637let Predicates = [HasAVX512, NoVLX] in {
Craig Topper4520d4f2017-12-04 07:21:01 +00004638 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4639 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4640 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4641 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4642}
4643
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004644//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004645// AVX-512 Logical Instructions
4646//===----------------------------------------------------------------------===//
4647
Craig Topperafce0ba2017-08-30 16:38:33 +00004648// OpNodeMsk is the OpNode to use when element size is important. OpNode will
4649// be set to null_frag for 32-bit elements.
4650multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4651 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004652 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004653 bit IsCommutable = 0> {
4654 let hasSideEffects = 0 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004655 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4656 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4657 "$src2, $src1", "$src1, $src2",
4658 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4659 (bitconvert (_.VT _.RC:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004660 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4661 _.RC:$src2)))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004662 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4663 Sched<[itins.Sched]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004664
Craig Topperafce0ba2017-08-30 16:38:33 +00004665 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004666 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4667 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4668 "$src2, $src1", "$src1, $src2",
4669 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4670 (bitconvert (_.LdFrag addr:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004671 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004672 (bitconvert (_.LdFrag addr:$src2)))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004673 itins.rm>, AVX512BIBase, EVEX_4V,
4674 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004675}
4676
Craig Topperafce0ba2017-08-30 16:38:33 +00004677// OpNodeMsk is the OpNode to use where element size is important. So use
4678// for all of the broadcast patterns.
4679multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4680 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004681 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004682 bit IsCommutable = 0> :
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004683 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4684 IsCommutable> {
Craig Topperabe80cc2016-08-28 06:06:28 +00004685 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4686 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4687 "${src2}"##_.BroadcastStr##", $src1",
4688 "$src1, ${src2}"##_.BroadcastStr,
Craig Topperafce0ba2017-08-30 16:38:33 +00004689 (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004690 (bitconvert
4691 (_.VT (X86VBroadcast
4692 (_.ScalarLdFrag addr:$src2)))))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004693 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004694 (bitconvert
4695 (_.VT (X86VBroadcast
4696 (_.ScalarLdFrag addr:$src2)))))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004697 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4698 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004699}
4700
Craig Topperafce0ba2017-08-30 16:38:33 +00004701multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4702 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004703 SDNode OpNodeMsk, OpndItins itins,
4704 AVX512VLVectorVTInfo VTInfo,
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004705 bit IsCommutable = 0> {
4706 let Predicates = [HasAVX512] in
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004707 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4708 VTInfo.info512, IsCommutable>, EVEX_V512;
Craig Topperabe80cc2016-08-28 06:06:28 +00004709
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004710 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004711 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004712 VTInfo.info256, IsCommutable>, EVEX_V256;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004713 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004714 VTInfo.info128, IsCommutable>, EVEX_V128;
Craig Topperabe80cc2016-08-28 06:06:28 +00004715 }
4716}
4717
Craig Topperabe80cc2016-08-28 06:06:28 +00004718multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004719 SDNode OpNode, OpndItins itins,
4720 bit IsCommutable = 0> {
4721 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004722 avx512vl_i64_info, IsCommutable>,
4723 VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004724 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004725 avx512vl_i32_info, IsCommutable>,
4726 EVEX_CD8<32, CD8VF>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004727}
4728
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004729defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4730defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4731defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4732defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004733
4734//===----------------------------------------------------------------------===//
4735// AVX-512 FP arithmetic
4736//===----------------------------------------------------------------------===//
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004737multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4738 SDNode OpNode, SDNode VecNode, OpndItins itins,
4739 bit IsCommutable> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004740 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004741 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4742 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4743 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004744 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4745 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004746 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004747
4748 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00004749 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004750 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004751 (_.VT (VecNode _.RC:$src1,
4752 _.ScalarIntMemCPat:$src2,
4753 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004754 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper79011a62016-07-26 08:06:18 +00004755 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004756 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004757 (ins _.FRC:$src1, _.FRC:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004758 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4759 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004760 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004761 let isCommutable = IsCommutable;
4762 }
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004763 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004764 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004765 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4766 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004767 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4768 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004769 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004770 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004771}
4772
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004773multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00004774 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004775 let ExeDomain = _.ExeDomain in
Craig Topperda7e78e2017-12-10 04:07:28 +00004776 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004777 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4778 "$rc, $src2, $src1", "$src1, $src2, $rc",
4779 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004780 (i32 imm:$rc)), itins.rr, IsCommutable>,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004781 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004782}
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004783multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Craig Topper56d40222017-02-22 06:54:18 +00004784 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4785 OpndItins itins, bit IsCommutable> {
4786 let ExeDomain = _.ExeDomain in {
4787 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4788 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4789 "$src2, $src1", "$src1, $src2",
4790 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004791 itins.rr>, Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004792
4793 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4794 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4795 "$src2, $src1", "$src1, $src2",
4796 (_.VT (VecNode _.RC:$src1,
4797 _.ScalarIntMemCPat:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004798 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004799
4800 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4801 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4802 (ins _.FRC:$src1, _.FRC:$src2),
4803 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4804 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004805 itins.rr>, Sched<[itins.Sched]> {
Craig Topper56d40222017-02-22 06:54:18 +00004806 let isCommutable = IsCommutable;
4807 }
4808 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4809 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4810 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4811 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004812 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4813 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004814 }
4815
Craig Topperda7e78e2017-12-10 04:07:28 +00004816 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004817 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004818 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Topper56d40222017-02-22 06:54:18 +00004819 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +00004820 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4821 Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004822 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004823}
4824
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004825multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4826 SDNode VecNode,
4827 SizeItins itins, bit IsCommutable> {
4828 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4829 itins.s, IsCommutable>,
4830 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4831 itins.s, IsCommutable>,
4832 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4833 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4834 itins.d, IsCommutable>,
4835 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4836 itins.d, IsCommutable>,
4837 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4838}
4839
4840multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper56d40222017-02-22 06:54:18 +00004841 SDNode VecNode, SDNode SaeNode,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004842 SizeItins itins, bit IsCommutable> {
Craig Topper56d40222017-02-22 06:54:18 +00004843 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4844 VecNode, SaeNode, itins.s, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004845 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper56d40222017-02-22 06:54:18 +00004846 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4847 VecNode, SaeNode, itins.d, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004848 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4849}
Craig Topper8783bbb2017-02-24 07:21:10 +00004850defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4851defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4852defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4853defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4854defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004855 SSE_ALU_ITINS_S, 0>;
Craig Topper8783bbb2017-02-24 07:21:10 +00004856defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004857 SSE_ALU_ITINS_S, 0>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004858
4859// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4860// X86fminc and X86fmaxc instead of X86fmin and X86fmax
4861multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4862 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
Craig Topper03669332017-02-26 06:45:56 +00004863 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004864 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4865 (ins _.FRC:$src1, _.FRC:$src2),
4866 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4867 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004868 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004869 let isCommutable = 1;
4870 }
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004871 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4872 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4873 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4874 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004875 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4876 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004877 }
4878}
4879defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4880 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4881 EVEX_CD8<32, CD8VT1>;
4882
4883defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4884 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4885 EVEX_CD8<64, CD8VT1>;
4886
4887defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4888 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4889 EVEX_CD8<32, CD8VT1>;
4890
4891defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4892 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4893 EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004894
Craig Topper375aa902016-12-19 00:42:28 +00004895multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00004896 X86VectorVTInfo _, OpndItins itins,
4897 bit IsCommutable> {
Craig Topper375aa902016-12-19 00:42:28 +00004898 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004899 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4900 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4901 "$src2, $src1", "$src1, $src2",
Craig Topper9433f972016-08-02 06:16:53 +00004902 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004903 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper375aa902016-12-19 00:42:28 +00004904 let mayLoad = 1 in {
4905 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4906 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4907 "$src2, $src1", "$src1, $src2",
4908 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004909 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00004910 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4911 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4912 "${src2}"##_.BroadcastStr##", $src1",
4913 "$src1, ${src2}"##_.BroadcastStr,
4914 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4915 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004916 itins.rm>, EVEX_4V, EVEX_B,
4917 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00004918 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004919 }
Robert Khasanov595e5982014-10-29 15:43:02 +00004920}
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00004921
Craig Topper375aa902016-12-19 00:42:28 +00004922multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004923 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004924 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00004925 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004926 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4927 "$rc, $src2, $src1", "$src1, $src2, $rc",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004928 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
4929 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004930}
4931
Craig Topper375aa902016-12-19 00:42:28 +00004932multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004933 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004934 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00004935 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004936 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4937 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004938 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
4939 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004940}
4941
Craig Topper375aa902016-12-19 00:42:28 +00004942multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00004943 Predicate prd, SizeItins itins,
4944 bit IsCommutable = 0> {
Craig Topperdb290662016-05-01 05:57:06 +00004945 let Predicates = [prd] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004946 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
Craig Topper9433f972016-08-02 06:16:53 +00004947 itins.s, IsCommutable>, EVEX_V512, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00004948 EVEX_CD8<32, CD8VF>;
4949 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
Craig Topper9433f972016-08-02 06:16:53 +00004950 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00004951 EVEX_CD8<64, CD8VF>;
Craig Topperdb290662016-05-01 05:57:06 +00004952 }
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00004953
Robert Khasanov595e5982014-10-29 15:43:02 +00004954 // Define only if AVX512VL feature is present.
Craig Topperdb290662016-05-01 05:57:06 +00004955 let Predicates = [prd, HasVLX] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004956 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004957 itins.s, IsCommutable>, EVEX_V128, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00004958 EVEX_CD8<32, CD8VF>;
4959 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004960 itins.s, IsCommutable>, EVEX_V256, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00004961 EVEX_CD8<32, CD8VF>;
4962 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004963 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00004964 EVEX_CD8<64, CD8VF>;
4965 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004966 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00004967 EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00004968 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004969}
4970
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004971multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4972 SizeItins itins> {
4973 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004974 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004975 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004976 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4977}
4978
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004979multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4980 SizeItins itins> {
4981 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004982 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004983 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004984 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4985}
4986
Craig Topper9433f972016-08-02 06:16:53 +00004987defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
4988 SSE_ALU_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004989 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00004990defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
4991 SSE_MUL_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004992 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00004993defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004994 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00004995defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004996 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00004997defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
4998 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004999 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005000defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5001 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005002 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
Igor Breger58c07802016-05-03 11:51:45 +00005003let isCodeGenOnly = 1 in {
Craig Topper9433f972016-08-02 06:16:53 +00005004 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5005 SSE_ALU_ITINS_P, 1>;
5006 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5007 SSE_ALU_ITINS_P, 1>;
Igor Breger58c07802016-05-03 11:51:45 +00005008}
Craig Topper375aa902016-12-19 00:42:28 +00005009defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005010 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005011defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005012 SSE_ALU_ITINS_P, 0>;
Craig Topper375aa902016-12-19 00:42:28 +00005013defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005014 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005015defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005016 SSE_ALU_ITINS_P, 1>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00005017
Craig Topper8f6827c2016-08-31 05:37:52 +00005018// Patterns catch floating point selects with bitcasted integer logic ops.
Craig Topper45d65032016-09-02 05:29:13 +00005019multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5020 X86VectorVTInfo _, Predicate prd> {
5021let Predicates = [prd] in {
5022 // Masked register-register logical operations.
5023 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5024 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5025 _.RC:$src0)),
5026 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5027 _.RC:$src1, _.RC:$src2)>;
5028 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5029 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5030 _.ImmAllZerosV)),
5031 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5032 _.RC:$src2)>;
5033 // Masked register-memory logical operations.
5034 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5035 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5036 (load addr:$src2)))),
5037 _.RC:$src0)),
5038 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5039 _.RC:$src1, addr:$src2)>;
5040 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5041 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5042 _.ImmAllZerosV)),
5043 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5044 addr:$src2)>;
5045 // Register-broadcast logical operations.
5046 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5047 (bitconvert (_.VT (X86VBroadcast
5048 (_.ScalarLdFrag addr:$src2)))))),
5049 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5050 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5051 (bitconvert
5052 (_.i64VT (OpNode _.RC:$src1,
5053 (bitconvert (_.VT
5054 (X86VBroadcast
5055 (_.ScalarLdFrag addr:$src2))))))),
5056 _.RC:$src0)),
5057 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5058 _.RC:$src1, addr:$src2)>;
5059 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5060 (bitconvert
5061 (_.i64VT (OpNode _.RC:$src1,
5062 (bitconvert (_.VT
5063 (X86VBroadcast
5064 (_.ScalarLdFrag addr:$src2))))))),
5065 _.ImmAllZerosV)),
5066 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5067 _.RC:$src1, addr:$src2)>;
5068}
Craig Topper8f6827c2016-08-31 05:37:52 +00005069}
5070
Craig Topper45d65032016-09-02 05:29:13 +00005071multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5072 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5073 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5074 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5075 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5076 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5077 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
Craig Topper8f6827c2016-08-31 05:37:52 +00005078}
5079
Craig Topper45d65032016-09-02 05:29:13 +00005080defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5081defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5082defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5083defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5084
Craig Topper2baef8f2016-12-18 04:17:00 +00005085let Predicates = [HasVLX,HasDQI] in {
Craig Topperd3295c62016-12-17 19:26:00 +00005086 // Use packed logical operations for scalar ops.
5087 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5088 (COPY_TO_REGCLASS (VANDPDZ128rr
5089 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5090 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5091 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5092 (COPY_TO_REGCLASS (VORPDZ128rr
5093 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5094 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5095 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5096 (COPY_TO_REGCLASS (VXORPDZ128rr
5097 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5098 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5099 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5100 (COPY_TO_REGCLASS (VANDNPDZ128rr
5101 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5102 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5103
5104 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5105 (COPY_TO_REGCLASS (VANDPSZ128rr
5106 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5107 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5108 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5109 (COPY_TO_REGCLASS (VORPSZ128rr
5110 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5111 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5112 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5113 (COPY_TO_REGCLASS (VXORPSZ128rr
5114 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5115 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5116 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5117 (COPY_TO_REGCLASS (VANDNPSZ128rr
5118 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5119 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5120}
5121
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005122multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005123 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005124 let ExeDomain = _.ExeDomain in {
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005125 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5126 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5127 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005128 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5129 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005130 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5131 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5132 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005133 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5134 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005135 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5136 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5137 "${src2}"##_.BroadcastStr##", $src1",
5138 "$src1, ${src2}"##_.BroadcastStr,
5139 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005140 (_.ScalarLdFrag addr:$src2))),
5141 (i32 FROUND_CURRENT)), itins.rm>,
5142 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005143 }
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005144}
5145
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005146multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005147 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005148 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005149 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5150 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5151 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005152 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5153 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005154 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00005155 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
Craig Toppere1cac152016-06-07 07:27:54 +00005156 "$src2, $src1", "$src1, $src2",
Craig Topper75d71542017-11-13 08:07:33 +00005157 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005158 (i32 FROUND_CURRENT)), itins.rm>,
5159 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005160 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005161}
5162
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005163multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005164 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5165 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005166 EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005167 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5168 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005169 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005170 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005171 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005172 EVEX_4V,EVEX_CD8<32, CD8VT1>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005173 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005174 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005175 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5176
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005177 // Define only if AVX512VL feature is present.
5178 let Predicates = [HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005179 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005180 EVEX_V128, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005181 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005182 EVEX_V256, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005183 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005184 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005185 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005186 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5187 }
5188}
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005189defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005190
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005191//===----------------------------------------------------------------------===//
5192// AVX-512 VPTESTM instructions
5193//===----------------------------------------------------------------------===//
5194
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005195multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005196 OpndItins itins, X86VectorVTInfo _> {
Craig Topper1a093932017-11-11 06:19:12 +00005197 let ExeDomain = _.ExeDomain in {
Igor Breger639fde72016-03-03 14:18:38 +00005198 let isCommutable = 1 in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005199 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5200 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5201 "$src2, $src1", "$src1, $src2",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005202 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
5203 EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005204 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5205 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5206 "$src2, $src1", "$src1, $src2",
Michael Liao66233b72015-08-06 09:06:20 +00005207 (OpNode (_.VT _.RC:$src1),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005208 (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
5209 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5210 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper1a093932017-11-11 06:19:12 +00005211 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005212}
5213
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005214multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005215 OpndItins itins, X86VectorVTInfo _> {
Craig Topper1a093932017-11-11 06:19:12 +00005216 let ExeDomain = _.ExeDomain in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005217 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5218 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5219 "${src2}"##_.BroadcastStr##", $src1",
5220 "$src1, ${src2}"##_.BroadcastStr,
5221 (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005222 (_.ScalarLdFrag addr:$src2)))),
5223 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5224 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005225}
Igor Bregerfca0a342016-01-28 13:19:25 +00005226
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005227// Use 512bit version to implement 128/256 bit in case NoVLX.
Igor Bregerfca0a342016-01-28 13:19:25 +00005228multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
5229 X86VectorVTInfo _, string Suffix> {
5230 def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
5231 (_.KVT (COPY_TO_REGCLASS
5232 (!cast<Instruction>(NAME # Suffix # "Zrr")
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005233 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00005234 _.RC:$src1, _.SubRegIdx),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005235 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00005236 _.RC:$src2, _.SubRegIdx)),
5237 _.KRC))>;
5238}
5239
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005240multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005241 OpndItins itins, AVX512VLVectorVTInfo _,
5242 string Suffix> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005243 let Predicates = [HasAVX512] in
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005244 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
5245 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005246
5247 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005248 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
5249 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
5250 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
5251 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005252 }
Igor Bregerfca0a342016-01-28 13:19:25 +00005253 let Predicates = [HasAVX512, NoVLX] in {
5254 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5255 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005256 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005257}
5258
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005259multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
5260 OpndItins itins> {
5261 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005262 avx512vl_i32_info, "D">;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005263 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005264 avx512vl_i64_info, "Q">, VEX_W;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005265}
5266
5267multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005268 SDNode OpNode, OpndItins itins> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005269 let Predicates = [HasBWI] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005270 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005271 EVEX_V512, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005272 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005273 EVEX_V512;
5274 }
5275 let Predicates = [HasVLX, HasBWI] in {
5276
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005277 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005278 EVEX_V256, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005279 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005280 EVEX_V128, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005281 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005282 EVEX_V256;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005283 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005284 EVEX_V128;
5285 }
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005286
Igor Bregerfca0a342016-01-28 13:19:25 +00005287 let Predicates = [HasAVX512, NoVLX] in {
5288 defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
5289 defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
5290 defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
5291 defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005292 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005293}
5294
5295multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005296 SDNode OpNode, OpndItins itins> :
5297 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5298 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005299
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005300defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
5301 SSE_BIT_ITINS_P>, T8PD;
5302defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
5303 SSE_BIT_ITINS_P>, T8XS;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005304
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005305
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005306//===----------------------------------------------------------------------===//
5307// AVX-512 Shift instructions
5308//===----------------------------------------------------------------------===//
5309multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005310 string OpcodeStr, SDNode OpNode, OpndItins itins,
5311 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005312 let ExeDomain = _.ExeDomain in {
Cameron McInally04400442014-11-14 15:43:00 +00005313 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005314 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005315 "$src2, $src1", "$src1, $src2",
5316 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005317 itins.rr>, Sched<[itins.Sched]>;
Cameron McInally04400442014-11-14 15:43:00 +00005318 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005319 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005320 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005321 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5322 (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005323 itins.rm>, Sched<[itins.Sched.Folded]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005324 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005325}
5326
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005327multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005328 string OpcodeStr, SDNode OpNode, OpndItins itins,
5329 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005330 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005331 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5332 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5333 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5334 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005335 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005336}
5337
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005338multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005339 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5340 X86VectorVTInfo _> {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005341 // src2 is always 128-bit
Craig Topper05948fb2016-08-02 05:11:15 +00005342 let ExeDomain = _.ExeDomain in {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005343 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5344 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5345 "$src2, $src1", "$src1, $src2",
5346 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005347 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005348 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5349 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5350 "$src2, $src1", "$src1, $src2",
Craig Topper820d4922015-02-09 04:04:50 +00005351 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005352 itins.rm>, AVX512BIBase,
5353 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005354 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005355}
5356
Cameron McInally5fb084e2014-12-11 17:13:05 +00005357multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005358 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5359 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005360 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005361 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005362 VTInfo.info512>, EVEX_V512,
5363 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5364 let Predicates = [prd, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005365 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005366 VTInfo.info256>, EVEX_V256,
5367 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005368 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005369 VTInfo.info128>, EVEX_V128,
5370 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5371 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005372}
5373
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005374multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005375 string OpcodeStr, SDNode OpNode,
5376 OpndItins itins> {
5377 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5378 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5379 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5380 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5381 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5382 bc_v2i64, avx512vl_i16_info, HasBWI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005383}
5384
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005385multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005386 string OpcodeStr, SDNode OpNode,
5387 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005388 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005389 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005390 VTInfo.info512>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005391 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005392 VTInfo.info512>, EVEX_V512;
5393 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005394 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005395 VTInfo.info256>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005396 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005397 VTInfo.info256>, EVEX_V256;
5398 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005399 itins, VTInfo.info128>,
5400 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005401 VTInfo.info128>, EVEX_V128;
5402 }
5403}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005404
Michael Liao66233b72015-08-06 09:06:20 +00005405multiclass avx512_shift_rmi_w<bits<8> opcw,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005406 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005407 string OpcodeStr, SDNode OpNode,
5408 OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005409 let Predicates = [HasBWI] in
5410 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005411 itins, v32i16_info>, EVEX_V512, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005412 let Predicates = [HasVLX, HasBWI] in {
5413 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005414 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005415 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005416 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005417 }
5418}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005419
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005420multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5421 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005422 string OpcodeStr, SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005423 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005424 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005425 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005426 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005427}
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005428
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005429defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5430 SSE_INTSHIFT_P>,
5431 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5432 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005433
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005434defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5435 SSE_INTSHIFT_P>,
5436 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5437 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005438
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005439defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5440 SSE_INTSHIFT_P>,
5441 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5442 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005443
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005444defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5445 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5446defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5447 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005448
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005449defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5450defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5451defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005452
Simon Pilgrim5910ebe2017-02-20 12:16:38 +00005453// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5454let Predicates = [HasAVX512, NoVLX] in {
5455 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5456 (EXTRACT_SUBREG (v8i64
5457 (VPSRAQZrr
5458 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5459 VR128X:$src2)), sub_ymm)>;
5460
5461 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5462 (EXTRACT_SUBREG (v8i64
5463 (VPSRAQZrr
5464 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5465 VR128X:$src2)), sub_xmm)>;
5466
5467 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5468 (EXTRACT_SUBREG (v8i64
5469 (VPSRAQZri
5470 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5471 imm:$src2)), sub_ymm)>;
5472
5473 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5474 (EXTRACT_SUBREG (v8i64
5475 (VPSRAQZri
5476 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5477 imm:$src2)), sub_xmm)>;
5478}
5479
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005480//===-------------------------------------------------------------------===//
5481// Variable Bit Shifts
5482//===-------------------------------------------------------------------===//
5483multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005484 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005485 let ExeDomain = _.ExeDomain in {
Cameron McInally5fb084e2014-12-11 17:13:05 +00005486 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5487 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5488 "$src2, $src1", "$src1, $src2",
5489 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005490 itins.rr>, AVX5128IBase, EVEX_4V,
5491 Sched<[itins.Sched]>;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005492 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5493 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5494 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005495 (_.VT (OpNode _.RC:$src1,
5496 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005497 itins.rm>, AVX5128IBase, EVEX_4V,
5498 EVEX_CD8<_.EltSize, CD8VF>,
5499 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005500 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005501}
5502
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005503multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005504 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005505 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005506 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5507 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5508 "${src2}"##_.BroadcastStr##", $src1",
5509 "$src1, ${src2}"##_.BroadcastStr,
5510 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5511 (_.ScalarLdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005512 itins.rm>, AVX5128IBase, EVEX_B,
5513 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5514 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005515}
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005516
Cameron McInally5fb084e2014-12-11 17:13:05 +00005517multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005518 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005519 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005520 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5521 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005522
5523 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005524 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5525 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5526 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5527 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005528 }
Cameron McInally5fb084e2014-12-11 17:13:05 +00005529}
5530
5531multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005532 SDNode OpNode, OpndItins itins> {
5533 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005534 avx512vl_i32_info>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005535 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005536 avx512vl_i64_info>, VEX_W;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005537}
5538
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005539// Use 512bit version to implement 128/256 bit in case NoVLX.
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005540multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5541 SDNode OpNode, list<Predicate> p> {
5542 let Predicates = p in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005543 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005544 (_.info256.VT _.info256.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005545 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005546 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005547 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5548 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5549 sub_ymm)>;
5550
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005551 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005552 (_.info128.VT _.info128.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005553 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005554 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005555 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5556 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5557 sub_xmm)>;
5558 }
5559}
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005560multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005561 SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005562 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005563 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005564 EVEX_V512, VEX_W;
5565 let Predicates = [HasVLX, HasBWI] in {
5566
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005567 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005568 EVEX_V256, VEX_W;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005569 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005570 EVEX_V128, VEX_W;
5571 }
5572}
5573
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005574defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5575 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005576
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005577defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5578 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005579
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005580defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5581 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005582
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005583defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5584defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005585
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005586defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5587defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5588defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5589defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5590
Craig Topper05629d02016-07-24 07:32:45 +00005591// Special handing for handling VPSRAV intrinsics.
5592multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5593 list<Predicate> p> {
5594 let Predicates = p in {
5595 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5596 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5597 _.RC:$src2)>;
5598 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5599 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5600 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005601 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5602 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5603 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5604 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5605 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5606 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5607 _.RC:$src0)),
5608 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5609 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005610 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5611 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5612 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5613 _.RC:$src1, _.RC:$src2)>;
5614 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5615 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5616 _.ImmAllZerosV)),
5617 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5618 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005619 }
5620}
5621
5622multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5623 list<Predicate> p> :
5624 avx512_var_shift_int_lowering<InstrStr, _, p> {
5625 let Predicates = p in {
5626 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5627 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5628 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5629 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005630 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5631 (X86vsrav _.RC:$src1,
5632 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5633 _.RC:$src0)),
5634 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5635 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005636 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5637 (X86vsrav _.RC:$src1,
5638 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5639 _.ImmAllZerosV)),
5640 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5641 _.RC:$src1, addr:$src2)>;
5642 }
5643}
5644
5645defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5646defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5647defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5648defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5649defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5650defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5651defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5652defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5653defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5654
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005655
5656// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5657let Predicates = [HasAVX512, NoVLX] in {
5658 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5659 (EXTRACT_SUBREG (v8i64
5660 (VPROLVQZrr
5661 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005662 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005663 sub_xmm)>;
5664 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5665 (EXTRACT_SUBREG (v8i64
5666 (VPROLVQZrr
5667 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005668 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005669 sub_ymm)>;
5670
5671 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5672 (EXTRACT_SUBREG (v16i32
5673 (VPROLVDZrr
5674 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005675 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005676 sub_xmm)>;
5677 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5678 (EXTRACT_SUBREG (v16i32
5679 (VPROLVDZrr
5680 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005681 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005682 sub_ymm)>;
5683
5684 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5685 (EXTRACT_SUBREG (v8i64
5686 (VPROLQZri
5687 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5688 imm:$src2)), sub_xmm)>;
5689 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5690 (EXTRACT_SUBREG (v8i64
5691 (VPROLQZri
5692 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5693 imm:$src2)), sub_ymm)>;
5694
5695 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5696 (EXTRACT_SUBREG (v16i32
5697 (VPROLDZri
5698 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5699 imm:$src2)), sub_xmm)>;
5700 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5701 (EXTRACT_SUBREG (v16i32
5702 (VPROLDZri
5703 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5704 imm:$src2)), sub_ymm)>;
5705}
5706
5707// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5708let Predicates = [HasAVX512, NoVLX] in {
5709 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5710 (EXTRACT_SUBREG (v8i64
5711 (VPRORVQZrr
5712 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005713 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005714 sub_xmm)>;
5715 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5716 (EXTRACT_SUBREG (v8i64
5717 (VPRORVQZrr
5718 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005719 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005720 sub_ymm)>;
5721
5722 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5723 (EXTRACT_SUBREG (v16i32
5724 (VPRORVDZrr
5725 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005726 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005727 sub_xmm)>;
5728 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5729 (EXTRACT_SUBREG (v16i32
5730 (VPRORVDZrr
5731 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005732 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005733 sub_ymm)>;
5734
5735 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5736 (EXTRACT_SUBREG (v8i64
5737 (VPRORQZri
5738 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5739 imm:$src2)), sub_xmm)>;
5740 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5741 (EXTRACT_SUBREG (v8i64
5742 (VPRORQZri
5743 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5744 imm:$src2)), sub_ymm)>;
5745
5746 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5747 (EXTRACT_SUBREG (v16i32
5748 (VPRORDZri
5749 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5750 imm:$src2)), sub_xmm)>;
5751 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5752 (EXTRACT_SUBREG (v16i32
5753 (VPRORDZri
5754 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5755 imm:$src2)), sub_ymm)>;
5756}
5757
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005758//===-------------------------------------------------------------------===//
5759// 1-src variable permutation VPERMW/D/Q
5760//===-------------------------------------------------------------------===//
5761multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005762 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005763 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005764 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5765 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005766
5767 let Predicates = [HasAVX512, HasVLX] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005768 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5769 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005770}
5771
5772multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5773 string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005774 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005775 let Predicates = [HasAVX512] in
5776 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005777 itins, VTInfo.info512>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005778 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005779 itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005780 let Predicates = [HasAVX512, HasVLX] in
5781 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005782 itins, VTInfo.info256>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005783 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005784 itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005785}
5786
Michael Zuckermand9cac592016-01-19 17:07:43 +00005787multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5788 Predicate prd, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005789 OpndItins itins, AVX512VLVectorVTInfo _> {
Michael Zuckermand9cac592016-01-19 17:07:43 +00005790 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005791 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005792 EVEX_V512 ;
5793 let Predicates = [HasVLX, prd] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005794 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005795 EVEX_V256 ;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005796 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005797 EVEX_V128 ;
5798 }
5799}
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005800
Michael Zuckermand9cac592016-01-19 17:07:43 +00005801defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005802 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
Michael Zuckermand9cac592016-01-19 17:07:43 +00005803defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005804 AVX2_PERMV_I, avx512vl_i8_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005805
5806defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005807 AVX2_PERMV_I, avx512vl_i32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005808defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005809 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005810defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005811 AVX2_PERMV_F, avx512vl_f32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005812defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005813 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005814
5815defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005816 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005817 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5818defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005819 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005820 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger78741a12015-10-04 07:20:41 +00005821//===----------------------------------------------------------------------===//
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005822// AVX-512 - VPERMIL
Igor Breger78741a12015-10-04 07:20:41 +00005823//===----------------------------------------------------------------------===//
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005824
Simon Pilgrim1401a752017-11-29 14:58:34 +00005825multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5826 OpndItins itins, X86VectorVTInfo _,
5827 X86VectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005828 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5829 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5830 "$src2, $src1", "$src1, $src2",
5831 (_.VT (OpNode _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005832 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5833 T8PD, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005834 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5835 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5836 "$src2, $src1", "$src1, $src2",
5837 (_.VT (OpNode
5838 _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005839 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5840 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5841 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005842 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5843 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5844 "${src2}"##_.BroadcastStr##", $src1",
5845 "$src1, ${src2}"##_.BroadcastStr,
5846 (_.VT (OpNode
5847 _.RC:$src1,
5848 (Ctrl.VT (X86VBroadcast
Simon Pilgrim1401a752017-11-29 14:58:34 +00005849 (Ctrl.ScalarLdFrag addr:$src2))))),
5850 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5851 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger78741a12015-10-04 07:20:41 +00005852}
5853
5854multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005855 OpndItins itins, AVX512VLVectorVTInfo _,
5856 AVX512VLVectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005857 let Predicates = [HasAVX512] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00005858 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5859 _.info512, Ctrl.info512>, EVEX_V512;
Igor Breger78741a12015-10-04 07:20:41 +00005860 }
5861 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00005862 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5863 _.info128, Ctrl.info128>, EVEX_V128;
5864 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5865 _.info256, Ctrl.info256>, EVEX_V256;
Igor Breger78741a12015-10-04 07:20:41 +00005866 }
5867}
5868
5869multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5870 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
Simon Pilgrim1401a752017-11-29 14:58:34 +00005871 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
Igor Breger78741a12015-10-04 07:20:41 +00005872 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005873 X86VPermilpi, AVX_VPERMILV, _>,
Igor Breger78741a12015-10-04 07:20:41 +00005874 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
Igor Breger78741a12015-10-04 07:20:41 +00005875}
5876
Craig Topper05948fb2016-08-02 05:11:15 +00005877let ExeDomain = SSEPackedSingle in
Igor Breger78741a12015-10-04 07:20:41 +00005878defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5879 avx512vl_i32_info>;
Craig Topper05948fb2016-08-02 05:11:15 +00005880let ExeDomain = SSEPackedDouble in
Igor Breger78741a12015-10-04 07:20:41 +00005881defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5882 avx512vl_i64_info>, VEX_W;
Simon Pilgrim1401a752017-11-29 14:58:34 +00005883
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005884//===----------------------------------------------------------------------===//
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005885// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5886//===----------------------------------------------------------------------===//
5887
5888defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005889 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005890 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5891defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005892 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005893defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005894 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
Michael Liao66233b72015-08-06 09:06:20 +00005895
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005896multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5897 OpndItins itins> {
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005898 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005899 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005900
5901 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005902 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
5903 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005904 }
5905}
5906
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005907defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005908
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005909//===----------------------------------------------------------------------===//
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00005910// Move Low to High and High to Low packed FP Instructions
5911//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005912def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5913 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00005914 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005915 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5916 IIC_SSE_MOV_LH>, EVEX_4V;
5917def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5918 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00005919 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005920 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5921 IIC_SSE_MOV_LH>, EVEX_4V;
5922
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005923//===----------------------------------------------------------------------===//
Igor Bregerb6b27af2015-11-10 07:09:07 +00005924// VMOVHPS/PD VMOVLPS Instructions
5925// All patterns was taken from SSS implementation.
5926//===----------------------------------------------------------------------===//
5927multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5928 X86VectorVTInfo _> {
Craig Toppere70231b2017-02-26 06:45:54 +00005929 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00005930 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5931 (ins _.RC:$src1, f64mem:$src2),
5932 !strconcat(OpcodeStr,
5933 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5934 [(set _.RC:$dst,
5935 (OpNode _.RC:$src1,
5936 (_.VT (bitconvert
5937 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
5938 IIC_SSE_MOV_LH>, EVEX_4V;
Igor Bregerb6b27af2015-11-10 07:09:07 +00005939}
5940
5941defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
5942 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
Craig Topper3b11fca2017-09-18 00:20:53 +00005943defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
Igor Bregerb6b27af2015-11-10 07:09:07 +00005944 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5945defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
5946 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5947defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
5948 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5949
5950let Predicates = [HasAVX512] in {
5951 // VMOVHPS patterns
5952 def : Pat<(X86Movlhps VR128X:$src1,
5953 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
5954 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5955 def : Pat<(X86Movlhps VR128X:$src1,
Craig Topper0a197df2017-09-17 18:59:32 +00005956 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
Igor Bregerb6b27af2015-11-10 07:09:07 +00005957 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5958 // VMOVHPD patterns
5959 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
Igor Bregerb6b27af2015-11-10 07:09:07 +00005960 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
5961 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5962 // VMOVLPS patterns
5963 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
5964 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00005965 // VMOVLPD patterns
5966 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
5967 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00005968 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
5969 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
5970 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5971}
5972
Igor Bregerb6b27af2015-11-10 07:09:07 +00005973def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
5974 (ins f64mem:$dst, VR128X:$src),
5975 "vmovhps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00005976 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00005977 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
5978 (bc_v2f64 (v4f32 VR128X:$src))),
5979 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5980 EVEX, EVEX_CD8<32, CD8VT2>;
5981def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
5982 (ins f64mem:$dst, VR128X:$src),
5983 "vmovhpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00005984 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00005985 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
5986 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5987 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5988def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
5989 (ins f64mem:$dst, VR128X:$src),
5990 "vmovlps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00005991 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
Igor Bregerb6b27af2015-11-10 07:09:07 +00005992 (iPTR 0))), addr:$dst)],
5993 IIC_SSE_MOV_LH>,
5994 EVEX, EVEX_CD8<32, CD8VT2>;
5995def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
5996 (ins f64mem:$dst, VR128X:$src),
5997 "vmovlpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00005998 [(store (f64 (extractelt (v2f64 VR128X:$src),
Igor Bregerb6b27af2015-11-10 07:09:07 +00005999 (iPTR 0))), addr:$dst)],
6000 IIC_SSE_MOV_LH>,
6001 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
Craig Toppere1cac152016-06-07 07:27:54 +00006002
Igor Bregerb6b27af2015-11-10 07:09:07 +00006003let Predicates = [HasAVX512] in {
6004 // VMOVHPD patterns
Craig Topperc9b19232016-05-01 04:59:44 +00006005 def : Pat<(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006006 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6007 (iPTR 0))), addr:$dst),
6008 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6009 // VMOVLPS patterns
6010 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6011 addr:$src1),
6012 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006013 // VMOVLPD patterns
6014 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6015 addr:$src1),
6016 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006017}
6018//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006019// FMA - Fused Multiply Operations
6020//
Adam Nemet26371ce2014-10-24 00:02:55 +00006021
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006022multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006023 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006024 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Adam Nemet34801422014-10-08 23:25:39 +00006025 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Adam Nemet6bddb8c2014-09-29 22:54:41 +00006026 (ins _.RC:$src2, _.RC:$src3),
Adam Nemet2e91ee52014-08-14 17:13:19 +00006027 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006028 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006029 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006030
Craig Toppere1cac152016-06-07 07:27:54 +00006031 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6032 (ins _.RC:$src2, _.MemOp:$src3),
6033 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006034 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
Craig Topper468a8132017-12-12 07:06:35 +00006035 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006036
Craig Toppere1cac152016-06-07 07:27:54 +00006037 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6038 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6039 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6040 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper6bcbf532016-07-25 07:20:28 +00006041 (OpNode _.RC:$src2,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006042 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6043 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006044 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006045 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006046}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006047
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006048multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006049 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006050 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006051 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006052 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6053 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006054 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6055 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006056}
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006057
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006058multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006059 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6060 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006061 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006062 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6063 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6064 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006065 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006066 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006067 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006068 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006069 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006070 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006071 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006072}
6073
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006074multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006075 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006076 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006077 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006078 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006079 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006080}
6081
Craig Topperaf0b9922017-09-04 06:59:50 +00006082defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006083defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6084defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6085defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6086defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6087defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6088
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006089
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006090multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006091 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006092 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006093 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6094 (ins _.RC:$src2, _.RC:$src3),
6095 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006096 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6097 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006098
Craig Toppere1cac152016-06-07 07:27:54 +00006099 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6100 (ins _.RC:$src2, _.MemOp:$src3),
6101 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006102 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
Craig Topper468a8132017-12-12 07:06:35 +00006103 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006104
Craig Toppere1cac152016-06-07 07:27:54 +00006105 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6106 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6107 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6108 "$src2, ${src3}"##_.BroadcastStr,
6109 (_.VT (OpNode _.RC:$src2,
6110 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006111 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006112 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006113 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006114}
6115
6116multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006117 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006118 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006119 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6120 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6121 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006122 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6123 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006124 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006125}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006126
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006127multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006128 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6129 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006130 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006131 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6132 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6133 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006134 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006135 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006136 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006137 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006138 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006139 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006140 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006141}
6142
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006143multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006144 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006145 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006146 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006147 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006148 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006149}
6150
Craig Topperaf0b9922017-09-04 06:59:50 +00006151defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006152defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6153defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6154defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6155defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6156defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6157
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006158multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006159 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006160 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006161 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006162 (ins _.RC:$src2, _.RC:$src3),
6163 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006164 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6165 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006166
Craig Topper69e22782017-09-04 07:35:05 +00006167 // Pattern is 312 order so that the load is in a different place from the
6168 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006169 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006170 (ins _.RC:$src2, _.MemOp:$src3),
6171 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006172 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
Craig Topper468a8132017-12-12 07:06:35 +00006173 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006174
Craig Topper69e22782017-09-04 07:35:05 +00006175 // Pattern is 312 order so that the load is in a different place from the
6176 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006177 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006178 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6179 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6180 "$src2, ${src3}"##_.BroadcastStr,
Craig Topper69e22782017-09-04 07:35:05 +00006181 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006182 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
Craig Topper468a8132017-12-12 07:06:35 +00006183 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006184 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006185}
6186
6187multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006188 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006189 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006190 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006191 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6192 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006193 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6194 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006195 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006196}
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006197
6198multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006199 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6200 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006201 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006202 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6203 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6204 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006205 }
6206 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006207 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006208 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006209 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006210 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6211 }
6212}
6213
6214multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006215 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006216 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006217 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006218 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006219 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006220}
6221
Craig Topperaf0b9922017-09-04 06:59:50 +00006222defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006223defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6224defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6225defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6226defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6227defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006228
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006229// Scalar FMA
Igor Breger15820b02015-07-01 13:24:28 +00006230multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6231 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
Craig Topper69e22782017-09-04 07:35:05 +00006232 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
Craig Topperb16598d2017-09-01 07:58:16 +00006233let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
Igor Breger15820b02015-07-01 13:24:28 +00006234 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6235 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006236 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6237 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006238
Craig Toppere1cac152016-06-07 07:27:54 +00006239 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00006240 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006241 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
Craig Topper468a8132017-12-12 07:06:35 +00006242 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006243
6244 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6245 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006246 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6247 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
Craig Toppera2f55282017-12-10 03:16:36 +00006248 Sched<[WriteFMA]>;
Igor Breger15820b02015-07-01 13:24:28 +00006249
Craig Toppereafdbec2016-08-13 06:48:41 +00006250 let isCodeGenOnly = 1, isCommutable = 1 in {
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006251 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
Igor Breger15820b02015-07-01 13:24:28 +00006252 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6253 !strconcat(OpcodeStr,
6254 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Simon Pilgrim97160be2017-11-27 10:41:32 +00006255 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006256 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00006257 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6258 !strconcat(OpcodeStr,
6259 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Craig Topper468a8132017-12-12 07:06:35 +00006260 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006261 }// isCodeGenOnly = 1
Igor Breger15820b02015-07-01 13:24:28 +00006262}// Constraints = "$src1 = $dst"
Craig Topperb16598d2017-09-01 07:58:16 +00006263}
Igor Breger15820b02015-07-01 13:24:28 +00006264
6265multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006266 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6267 SDNode OpNodeRnds1, SDNode OpNodes3,
6268 SDNode OpNodeRnds3, X86VectorVTInfo _,
6269 string SUFF> {
Craig Topper2caa97c2017-02-25 19:36:28 +00006270 let ExeDomain = _.ExeDomain in {
Craig Topperb16598d2017-09-01 07:58:16 +00006271 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
Craig Toppera55b4832016-12-09 06:42:28 +00006272 // Operands for intrinsic are in 123 order to preserve passthu
6273 // semantics.
Craig Topper07dac552017-11-06 05:48:25 +00006274 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6275 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6276 _.ScalarIntMemCPat:$src3)),
Craig Toppera55b4832016-12-09 06:42:28 +00006277 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
Igor Breger15820b02015-07-01 13:24:28 +00006278 (i32 imm:$rc))),
6279 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6280 _.FRC:$src3))),
6281 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
Craig Topper69e22782017-09-04 07:35:05 +00006282 (_.ScalarLdFrag addr:$src3)))), 0>;
Igor Breger15820b02015-07-01 13:24:28 +00006283
Craig Topperb16598d2017-09-01 07:58:16 +00006284 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
Craig Topper07dac552017-11-06 05:48:25 +00006285 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6286 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6287 _.RC:$src1)),
Craig Toppera55b4832016-12-09 06:42:28 +00006288 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
Igor Breger15820b02015-07-01 13:24:28 +00006289 (i32 imm:$rc))),
6290 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6291 _.FRC:$src1))),
6292 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
Craig Topper69e22782017-09-04 07:35:05 +00006293 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
Igor Breger15820b02015-07-01 13:24:28 +00006294
Craig Toppereec768b2017-09-06 03:35:58 +00006295 // One pattern is 312 order so that the load is in a different place from the
6296 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Topperb16598d2017-09-01 07:58:16 +00006297 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
Craig Topper69e22782017-09-04 07:35:05 +00006298 (null_frag),
Craig Topper07dac552017-11-06 05:48:25 +00006299 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6300 _.RC:$src2)),
Craig Topper69e22782017-09-04 07:35:05 +00006301 (null_frag),
Igor Breger15820b02015-07-01 13:24:28 +00006302 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6303 _.FRC:$src2))),
Craig Toppereec768b2017-09-06 03:35:58 +00006304 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6305 _.FRC:$src1, _.FRC:$src2))), 1>;
Craig Topper2caa97c2017-02-25 19:36:28 +00006306 }
Igor Breger15820b02015-07-01 13:24:28 +00006307}
6308
6309multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006310 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6311 SDNode OpNodeRnds1, SDNode OpNodes3,
Craig Toppera55b4832016-12-09 06:42:28 +00006312 SDNode OpNodeRnds3> {
Igor Breger15820b02015-07-01 13:24:28 +00006313 let Predicates = [HasAVX512] in {
6314 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006315 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6316 f32x_info, "SS">,
Craig Toppera55b4832016-12-09 06:42:28 +00006317 EVEX_CD8<32, CD8VT1>, VEX_LIG;
Igor Breger15820b02015-07-01 13:24:28 +00006318 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006319 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6320 f64x_info, "SD">,
Craig Toppera55b4832016-12-09 06:42:28 +00006321 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
Igor Breger15820b02015-07-01 13:24:28 +00006322 }
6323}
6324
Craig Topper07dac552017-11-06 05:48:25 +00006325defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6326 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6327defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6328 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6329defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6330 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6331defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6332 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006333
6334//===----------------------------------------------------------------------===//
Asaf Badouh655822a2016-01-25 11:14:24 +00006335// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6336//===----------------------------------------------------------------------===//
6337let Constraints = "$src1 = $dst" in {
6338multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006339 OpndItins itins, X86VectorVTInfo _> {
Craig Topper47e14ea2017-09-24 19:30:55 +00006340 // NOTE: The SDNode have the multiply operands first with the add last.
6341 // This enables commuted load patterns to be autogenerated by tablegen.
Craig Topper6bf9b802017-02-26 06:45:45 +00006342 let ExeDomain = _.ExeDomain in {
Asaf Badouh655822a2016-01-25 11:14:24 +00006343 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6344 (ins _.RC:$src2, _.RC:$src3),
6345 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006346 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6347 AVX512FMA3Base, Sched<[itins.Sched]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006348
Craig Toppere1cac152016-06-07 07:27:54 +00006349 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6350 (ins _.RC:$src2, _.MemOp:$src3),
6351 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006352 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6353 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006354
Craig Toppere1cac152016-06-07 07:27:54 +00006355 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6356 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6357 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6358 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper47e14ea2017-09-24 19:30:55 +00006359 (OpNode _.RC:$src2,
6360 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006361 _.RC:$src1), itins.rm>,
6362 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper6bf9b802017-02-26 06:45:45 +00006363 }
Asaf Badouh655822a2016-01-25 11:14:24 +00006364}
6365} // Constraints = "$src1 = $dst"
6366
6367multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006368 OpndItins itins, AVX512VLVectorVTInfo _> {
Asaf Badouh655822a2016-01-25 11:14:24 +00006369 let Predicates = [HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006370 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006371 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6372 }
6373 let Predicates = [HasVLX, HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006374 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006375 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006376 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006377 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6378 }
6379}
6380
6381defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006382 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006383defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006384 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006385
6386//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006387// AVX-512 Scalar convert from sign integer to float/double
6388//===----------------------------------------------------------------------===//
6389
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006390multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6391 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6392 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006393 let hasSideEffects = 0 in {
6394 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6395 (ins DstVT.FRC:$src1, SrcRC:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006396 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6397 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006398 let mayLoad = 1 in
6399 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6400 (ins DstVT.FRC:$src1, x86memop:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006401 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6402 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006403 } // hasSideEffects = 0
6404 let isCodeGenOnly = 1 in {
6405 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6406 (ins DstVT.RC:$src1, SrcRC:$src2),
6407 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6408 [(set DstVT.RC:$dst,
6409 (OpNode (DstVT.VT DstVT.RC:$src1),
6410 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006411 (i32 FROUND_CURRENT)))], itins.rr>,
6412 EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006413
6414 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6415 (ins DstVT.RC:$src1, x86memop:$src2),
6416 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6417 [(set DstVT.RC:$dst,
6418 (OpNode (DstVT.VT DstVT.RC:$src1),
6419 (ld_frag addr:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006420 (i32 FROUND_CURRENT)))], itins.rm>,
6421 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006422 }//isCodeGenOnly = 1
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006423}
Elena Demikhovskyd8fda622015-03-30 09:29:28 +00006424
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006425multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6426 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
Igor Bregerabe4a792015-06-14 12:44:55 +00006427 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6428 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006429 !strconcat(asm,
6430 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
Igor Bregerabe4a792015-06-14 12:44:55 +00006431 [(set DstVT.RC:$dst,
6432 (OpNode (DstVT.VT DstVT.RC:$src1),
6433 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006434 (i32 imm:$rc)))], itins.rr>,
6435 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Bregerabe4a792015-06-14 12:44:55 +00006436}
6437
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006438multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6439 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6440 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6441 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6442 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6443 ld_frag, asm>, VEX_LIG;
Igor Bregerabe4a792015-06-14 12:44:55 +00006444}
6445
Andrew Trick15a47742013-10-09 05:11:10 +00006446let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006447defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006448 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6449 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006450defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006451 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6452 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006453defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006454 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6455 XD, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006456defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006457 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6458 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006459
Craig Topper8f85ad12016-11-14 02:46:58 +00006460def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6461 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6462def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6463 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6464
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006465def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6466 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6467def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006468 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006469def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6470 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6471def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006472 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006473
6474def : Pat<(f32 (sint_to_fp GR32:$src)),
6475 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6476def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006477 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006478def : Pat<(f64 (sint_to_fp GR32:$src)),
6479 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6480def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006481 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6482
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006483defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006484 v4f32x_info, i32mem, loadi32,
6485 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006486defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006487 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6488 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006489defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006490 i32mem, loadi32, "cvtusi2sd{l}">,
6491 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006492defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006493 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6494 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006495
Craig Topper8f85ad12016-11-14 02:46:58 +00006496def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6497 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6498def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6499 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6500
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006501def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6502 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6503def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6504 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6505def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6506 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6507def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6508 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6509
6510def : Pat<(f32 (uint_to_fp GR32:$src)),
6511 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6512def : Pat<(f32 (uint_to_fp GR64:$src)),
6513 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6514def : Pat<(f64 (uint_to_fp GR32:$src)),
6515 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6516def : Pat<(f64 (uint_to_fp GR64:$src)),
6517 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00006518}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006519
6520//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006521// AVX-512 Scalar convert from float/double to integer
6522//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006523
6524multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6525 X86VectorVTInfo DstVT, SDNode OpNode,
Craig Toppera49c3542018-01-06 19:20:33 +00006526 OpndItins itins, string asm,
6527 string aliasStr,
6528 bit CodeGenOnly = 1> {
Craig Toppere1cac152016-06-07 07:27:54 +00006529 let Predicates = [HasAVX512] in {
Craig Toppera0be5a02017-12-10 19:47:56 +00006530 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006531 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006532 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6533 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
Craig Toppera0be5a02017-12-10 19:47:56 +00006534 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
Craig Topper1de942b2017-12-10 17:42:44 +00006535 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6536 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6537 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6538 Sched<[itins.Sched]>;
Craig Toppera49c3542018-01-06 19:20:33 +00006539 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Toppera0be5a02017-12-10 19:47:56 +00006540 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006541 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006542 [(set DstVT.RC:$dst, (OpNode
Craig Topper5a63ca22017-03-13 03:59:06 +00006543 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006544 (i32 FROUND_CURRENT)))], itins.rm>,
6545 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere2659d82018-01-05 23:13:54 +00006546
6547 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6548 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>;
6549 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
6550 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0>;
Craig Toppera49c3542018-01-06 19:20:33 +00006551 } // Predicates = [HasAVX512]
6552}
6553
6554multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
6555 X86VectorVTInfo DstVT, SDNode OpNode,
6556 OpndItins itins, string asm,
6557 string aliasStr> :
6558 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, itins, asm, aliasStr, 0> {
6559 let Predicates = [HasAVX512] in {
Craig Toppere2659d82018-01-05 23:13:54 +00006560 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6561 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
6562 SrcVT.IntScalarMemOp:$src), 0>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006563 } // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006564}
Asaf Badouh2744d212015-09-20 14:31:19 +00006565
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006566// Convert float/double to signed/unsigned int 32/64
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006567defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006568 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006569 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006570defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006571 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006572 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006573defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006574 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006575 XS, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006576defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006577 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006578 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006579defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006580 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006581 XD, EVEX_CD8<64, CD8VT1>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006582defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006583 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006584 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006585defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006586 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006587 XD, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006588defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006589 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006590 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006591
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006592// The SSE version of these instructions are disabled for AVX512.
6593// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6594let Predicates = [HasAVX512] in {
6595 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006596 (VCVTSS2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006597 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006598 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006599 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006600 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006601 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006602 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006603 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006604 (VCVTSD2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006605 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006606 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006607 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006608 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006609 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006610 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006611} // HasAVX512
6612
Craig Topperac941b92016-09-25 16:33:53 +00006613let Predicates = [HasAVX512] in {
6614 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6615 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6616 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6617 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6618 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6619 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6620 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6621 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6622 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6623 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6624 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6625 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6626 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6627 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6628 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6629 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6630 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6631 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6632 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6633 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6634} // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006635
Elad Cohen0c260102017-01-11 09:11:48 +00006636// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6637// which produce unnecessary vmovs{s,d} instructions
6638let Predicates = [HasAVX512] in {
6639def : Pat<(v4f32 (X86Movss
6640 (v4f32 VR128X:$dst),
6641 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6642 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6643
6644def : Pat<(v4f32 (X86Movss
6645 (v4f32 VR128X:$dst),
6646 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6647 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6648
6649def : Pat<(v2f64 (X86Movsd
6650 (v2f64 VR128X:$dst),
6651 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6652 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6653
6654def : Pat<(v2f64 (X86Movsd
6655 (v2f64 VR128X:$dst),
6656 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6657 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6658} // Predicates = [HasAVX512]
6659
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006660// Convert float/double to signed/unsigned int 32/64 with truncation
Simon Pilgrim18bcf932016-02-03 09:41:59 +00006661multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6662 X86VectorVTInfo _DstRC, SDNode OpNode,
Craig Topper61d8a602018-01-06 21:27:25 +00006663 SDNode OpNodeRnd, OpndItins itins, string aliasStr,
6664 bit CodeGenOnly = 1>{
Asaf Badouh2744d212015-09-20 14:31:19 +00006665let Predicates = [HasAVX512] in {
Craig Topper90353a92018-01-06 21:02:22 +00006666 let isCodeGenOnly = 1 in {
Igor Bregerc59b3a22016-08-03 10:58:05 +00006667 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006668 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006669 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6670 EVEX, Sched<[itins.Sched]>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006671 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006672 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006673 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6674 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper90353a92018-01-06 21:02:22 +00006675 }
6676
6677 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6678 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6679 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6680 (i32 FROUND_CURRENT)))], itins.rr>,
6681 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6682 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6683 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6684 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6685 (i32 FROUND_NO_EXC)))], itins.rr>,
6686 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
Craig Topper61d8a602018-01-06 21:27:25 +00006687 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Topper0f4ccb72018-01-06 21:02:26 +00006688 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6689 (ins _SrcRC.IntScalarMemOp:$src),
6690 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6691 [(set _DstRC.RC:$dst, (OpNodeRnd
6692 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
6693 (i32 FROUND_CURRENT)))], itins.rm>,
6694 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Simon Pilgrim916485c2016-08-18 11:22:22 +00006695
Igor Bregerc59b3a22016-08-03 10:58:05 +00006696 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
Craig Topper90353a92018-01-06 21:02:22 +00006697 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Craig Toppere2659d82018-01-05 23:13:54 +00006698 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
Craig Topper90353a92018-01-06 21:02:22 +00006699 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006700} //HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006701}
6702
Craig Topper61d8a602018-01-06 21:27:25 +00006703multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
6704 X86VectorVTInfo _SrcRC,
6705 X86VectorVTInfo _DstRC, SDNode OpNode,
6706 SDNode OpNodeRnd, OpndItins itins,
6707 string aliasStr> :
6708 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, itins,
6709 aliasStr, 0> {
6710let Predicates = [HasAVX512] in {
6711 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6712 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
6713 _SrcRC.IntScalarMemOp:$src), 0>;
6714}
6715}
Asaf Badouh2744d212015-09-20 14:31:19 +00006716
Igor Bregerc59b3a22016-08-03 10:58:05 +00006717defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006718 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006719 XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006720defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006721 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006722 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006723defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006724 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006725 XD, EVEX_CD8<64, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006726defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006727 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006728 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6729
Craig Topper61d8a602018-01-06 21:27:25 +00006730defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006731 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006732 XS, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006733defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006734 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006735 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006736defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006737 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006738 XD, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006739defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006740 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006741 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006742
Asaf Badouh2744d212015-09-20 14:31:19 +00006743let Predicates = [HasAVX512] in {
6744 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006745 (VCVTTSS2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006746 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6747 (VCVTTSS2SIZrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006748 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006749 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006750 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6751 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006752 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006753 (VCVTTSD2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006754 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6755 (VCVTTSD2SIZrm_Int sdmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006756 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006757 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006758 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6759 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00006760} // HasAVX512
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006761
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006762//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006763// AVX-512 Convert form float to double and back
6764//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006765
Asaf Badouh2744d212015-09-20 14:31:19 +00006766multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006767 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006768 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006769 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006770 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006771 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Toppera02e3942016-09-23 06:24:43 +00006772 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006773 (i32 FROUND_CURRENT))), itins.rr>,
6774 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006775 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper08b413a2017-03-13 05:14:44 +00006776 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006777 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006778 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Topper08b413a2017-03-13 05:14:44 +00006779 (_Src.VT _Src.ScalarIntMemCPat:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006780 (i32 FROUND_CURRENT))), itins.rm>,
6781 EVEX_4V, VEX_LIG,
6782 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006783
Craig Topperd2011e32017-02-25 18:43:42 +00006784 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6785 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6786 (ins _.FRC:$src1, _Src.FRC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006787 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6788 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006789 let mayLoad = 1 in
6790 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6791 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006792 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6793 itins.rm>, EVEX_4V, VEX_LIG,
6794 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006795 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006796}
6797
Asaf Badouh2744d212015-09-20 14:31:19 +00006798// Scalar Coversion with SAE - suppress all exceptions
6799multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006800 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006801 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006802 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006803 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Toppera58abd12016-05-09 05:34:12 +00006804 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Asaf Badouh2744d212015-09-20 14:31:19 +00006805 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006806 (i32 FROUND_NO_EXC))), itins.rr>,
6807 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006808}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006809
Asaf Badouh2744d212015-09-20 14:31:19 +00006810// Scalar Conversion with rounding control (RC)
6811multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006812 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006813 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006814 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006815 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Toppera58abd12016-05-09 05:34:12 +00006816 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006817 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
Craig Toppera2f55282017-12-10 03:16:36 +00006818 itins.rr>,
6819 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006820 EVEX_B, EVEX_RC;
6821}
Craig Toppera02e3942016-09-23 06:24:43 +00006822multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006823 SDNode OpNodeRnd, OpndItins itins,
6824 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006825 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006826 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006827 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006828 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
Asaf Badouh2744d212015-09-20 14:31:19 +00006829 }
6830}
6831
Craig Toppera02e3942016-09-23 06:24:43 +00006832multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006833 SDNode OpNodeRnd, OpndItins itins,
6834 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006835 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006836 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6837 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Michael Zuckerman4b88a772016-12-18 14:29:00 +00006838 EVEX_CD8<32, CD8VT1>, XS;
Asaf Badouh2744d212015-09-20 14:31:19 +00006839 }
6840}
Craig Toppera02e3942016-09-23 06:24:43 +00006841defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006842 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6843 f32x_info>, NotMemoryFoldable;
Craig Toppera02e3942016-09-23 06:24:43 +00006844defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006845 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6846 f64x_info>, NotMemoryFoldable;
Asaf Badouh2744d212015-09-20 14:31:19 +00006847
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006848def : Pat<(f64 (fpextend FR32X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00006849 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006850 Requires<[HasAVX512]>;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006851def : Pat<(f64 (fpextend (loadf32 addr:$src))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006852 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006853 Requires<[HasAVX512]>;
6854
6855def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006856 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006857 Requires<[HasAVX512, OptForSize]>;
6858
Asaf Badouh2744d212015-09-20 14:31:19 +00006859def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006860 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006861 Requires<[HasAVX512, OptForSpeed]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006862
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006863def : Pat<(f32 (fpround FR64X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00006864 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006865 Requires<[HasAVX512]>;
Elad Cohen0c260102017-01-11 09:11:48 +00006866
6867def : Pat<(v4f32 (X86Movss
6868 (v4f32 VR128X:$dst),
6869 (v4f32 (scalar_to_vector
6870 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006871 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00006872 Requires<[HasAVX512]>;
6873
6874def : Pat<(v2f64 (X86Movsd
6875 (v2f64 VR128X:$dst),
6876 (v2f64 (scalar_to_vector
6877 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006878 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00006879 Requires<[HasAVX512]>;
6880
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006881//===----------------------------------------------------------------------===//
6882// AVX-512 Vector convert from signed/unsigned integer to float/double
6883// and from float/double to signed/unsigned integer
6884//===----------------------------------------------------------------------===//
6885
6886multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006887 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006888 string Broadcast = _.BroadcastStr,
Coby Tayree97e9cf62016-11-20 17:09:56 +00006889 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006890
6891 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6892 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006893 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
6894 EVEX, Sched<[itins.Sched]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006895
6896 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Coby Tayree97e9cf62016-11-20 17:09:56 +00006897 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006898 (_.VT (OpNode (_Src.VT
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006899 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
6900 EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006901
6902 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00006903 (ins _Src.ScalarMemOp:$src), OpcodeStr,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006904 "${src}"##Broadcast, "${src}"##Broadcast,
6905 (_.VT (OpNode (_Src.VT
6906 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006907 )), itins.rm>, EVEX, EVEX_B,
6908 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006909}
6910// Coversion with SAE - suppress all exceptions
6911multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006912 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6913 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006914 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6915 (ins _Src.RC:$src), OpcodeStr,
6916 "{sae}, $src", "$src, {sae}",
6917 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006918 (i32 FROUND_NO_EXC))), itins.rr>,
6919 EVEX, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006920}
6921
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006922// Conversion with rounding control (RC)
6923multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006924 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6925 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006926 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6927 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6928 "$rc, $src", "$src, $rc",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006929 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
6930 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00006931}
6932
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006933// Extend Float to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006934multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
6935 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006936 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006937 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
6938 fpextend, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006939 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006940 X86vfpextRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006941 }
6942 let Predicates = [HasVLX] in {
6943 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006944 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
6945 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
6946 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006947 }
6948}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006949
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006950// Truncate Double to Float
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006951multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006952 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006953 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006954 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006955 X86vfproundRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006956 }
6957 let Predicates = [HasVLX] in {
6958 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006959 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006960 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006961 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00006962
6963 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6964 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6965 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6966 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
6967 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6968 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6969 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6970 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006971 }
6972}
6973
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006974defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006975 VEX_W, PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006976defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006977 PS, EVEX_CD8<32, CD8VH>;
6978
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006979def : Pat<(v8f64 (extloadv8f32 addr:$src)),
6980 (VCVTPS2PDZrm addr:$src)>;
Michael Liao5bf95782014-12-04 05:20:33 +00006981
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006982let Predicates = [HasVLX] in {
Craig Topperee277e12017-10-14 05:55:42 +00006983 let AddedComplexity = 15 in {
6984 def : Pat<(X86vzmovl (v2f64 (bitconvert
6985 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
6986 (VCVTPD2PSZ128rr VR128X:$src)>;
6987 def : Pat<(X86vzmovl (v2f64 (bitconvert
6988 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
6989 (VCVTPD2PSZ128rm addr:$src)>;
6990 }
Craig Topper5471fc22016-11-06 04:12:52 +00006991 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
6992 (VCVTPS2PDZ128rm addr:$src)>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006993 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
6994 (VCVTPS2PDZ256rm addr:$src)>;
6995}
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00006996
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006997// Convert Signed/Unsigned Doubleword to Double
6998multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006999 SDNode OpNode128, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007000 // No rounding in this op
7001 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007002 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7003 itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007004
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007005 let Predicates = [HasVLX] in {
7006 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007007 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
7008 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7009 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007010 }
7011}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007012
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007013// Convert Signed/Unsigned Doubleword to Float
7014multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007015 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007016 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007017 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7018 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007019 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007020 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007021
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007022 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007023 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7024 itins>, EVEX_V128;
7025 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7026 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007027 }
7028}
7029
7030// Convert Float to Signed/Unsigned Doubleword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007031multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7032 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007033 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007034 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7035 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007036 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007037 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007038 }
7039 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007040 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7041 itins>, EVEX_V128;
7042 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7043 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007044 }
7045}
7046
7047// Convert Float to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007048multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7049 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007050 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007051 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7052 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007053 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007054 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007055 }
7056 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007057 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7058 itins>, EVEX_V128;
7059 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7060 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007061 }
7062}
7063
7064// Convert Double to Signed/Unsigned Doubleword with truncation
Craig Topper731bf9c2016-11-09 07:31:32 +00007065multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007066 SDNode OpNode128, SDNode OpNodeRnd,
7067 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007068 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007069 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7070 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007071 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007072 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007073 }
7074 let Predicates = [HasVLX] in {
7075 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
Craig Topper731bf9c2016-11-09 07:31:32 +00007076 // memory forms of these instructions in Asm Parser. They have the same
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007077 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7078 // due to the same reason.
Craig Topper731bf9c2016-11-09 07:31:32 +00007079 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007080 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007081 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007082 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007083
7084 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7085 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7086 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7087 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7088 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7089 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7090 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7091 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007092 }
7093}
7094
7095// Convert Double to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007096multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7097 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007098 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007099 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7100 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007101 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007102 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007103 }
7104 let Predicates = [HasVLX] in {
7105 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7106 // memory forms of these instructions in Asm Parcer. They have the same
7107 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7108 // due to the same reason.
7109 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007110 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007111 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007112 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007113
7114 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7115 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7116 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7117 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7118 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7119 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7120 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7121 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007122 }
7123}
7124
7125// Convert Double to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007126multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7127 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007128 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007129 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7130 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007131 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007132 OpNodeRnd,itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007133 }
7134 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007135 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7136 itins>, EVEX_V128;
7137 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7138 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007139 }
7140}
7141
7142// Convert Double to Signed/Unsigned Quardword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007143multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7144 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007145 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007146 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7147 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007148 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007149 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007150 }
7151 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007152 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7153 itins>, EVEX_V128;
7154 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7155 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007156 }
7157}
7158
7159// Convert Signed/Unsigned Quardword to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007160multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7161 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007162 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007163 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7164 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007165 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007166 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007167 }
7168 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007169 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7170 itins>, EVEX_V128;
7171 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7172 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007173 }
7174}
7175
7176// Convert Float to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007177multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7178 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007179 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007180 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7181 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007182 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007183 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007184 }
7185 let Predicates = [HasDQI, HasVLX] in {
7186 // Explicitly specified broadcast string, since we take only 2 elements
7187 // from v4f32x_info source
7188 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007189 itins, "{1to2}", "", f64mem>, EVEX_V128;
7190 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7191 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007192 }
7193}
7194
7195// Convert Float to Signed/Unsigned Quardword with truncation
Craig Toppera39b6502016-12-10 06:02:48 +00007196multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007197 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007198 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007199 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7200 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007201 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007202 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007203 }
7204 let Predicates = [HasDQI, HasVLX] in {
7205 // Explicitly specified broadcast string, since we take only 2 elements
7206 // from v4f32x_info source
Craig Toppera39b6502016-12-10 06:02:48 +00007207 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007208 itins, "{1to2}", "", f64mem>, EVEX_V128;
7209 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7210 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007211 }
7212}
7213
7214// Convert Signed/Unsigned Quardword to Float
Simon Pilgrima3af7962016-11-24 12:13:46 +00007215multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007216 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007217 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007218 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7219 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007220 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007221 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007222 }
7223 let Predicates = [HasDQI, HasVLX] in {
7224 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7225 // memory forms of these instructions in Asm Parcer. They have the same
7226 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7227 // due to the same reason.
Simon Pilgrima3af7962016-11-24 12:13:46 +00007228 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007229 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007230 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007231 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007232
7233 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7234 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7235 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7236 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7237 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7238 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7239 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7240 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007241 }
7242}
7243
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007244defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7245 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007246
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007247defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007248 X86VSintToFpRnd, SSE_CVT_I2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007249 PS, EVEX_CD8<32, CD8VF>;
7250
7251defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007252 X86cvttp2siRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007253 XS, EVEX_CD8<32, CD8VF>;
7254
Simon Pilgrima3af7962016-11-24 12:13:46 +00007255defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007256 X86cvttp2siRnd, SSE_CVT_PD2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007257 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7258
7259defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007260 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007261 EVEX_CD8<32, CD8VF>;
7262
Craig Topperf334ac192016-11-09 07:48:51 +00007263defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007264 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7265 PS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007266
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007267defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7268 X86VUintToFP, SSE_CVT_I2PD>, XS,
7269 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007270
7271defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007272 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007273 EVEX_CD8<32, CD8VF>;
7274
Craig Topper19e04b62016-05-19 06:13:58 +00007275defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007276 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7277 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007278
Craig Topper19e04b62016-05-19 06:13:58 +00007279defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007280 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7281 VEX_W, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007282
Craig Topper19e04b62016-05-19 06:13:58 +00007283defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007284 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007285 PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007286
Craig Topper19e04b62016-05-19 06:13:58 +00007287defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007288 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007289 PS, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007290
Craig Topper19e04b62016-05-19 06:13:58 +00007291defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007292 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007293 PD, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007294
Craig Topper19e04b62016-05-19 06:13:58 +00007295defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007296 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7297 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007298
Craig Topper19e04b62016-05-19 06:13:58 +00007299defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007300 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007301 PD, EVEX_CD8<64, CD8VF>;
7302
Craig Topper19e04b62016-05-19 06:13:58 +00007303defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007304 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7305 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007306
7307defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007308 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007309 PD, EVEX_CD8<64, CD8VF>;
7310
Craig Toppera39b6502016-12-10 06:02:48 +00007311defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007312 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7313 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007314
7315defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007316 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007317 PD, EVEX_CD8<64, CD8VF>;
7318
Craig Toppera39b6502016-12-10 06:02:48 +00007319defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007320 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7321 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007322
7323defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007324 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7325 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007326
7327defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007328 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7329 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007330
Simon Pilgrima3af7962016-11-24 12:13:46 +00007331defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007332 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7333 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007334
Simon Pilgrima3af7962016-11-24 12:13:46 +00007335defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007336 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7337 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007338
Craig Toppere38c57a2015-11-27 05:44:02 +00007339let Predicates = [HasAVX512, NoVLX] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007340def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
Michael Liao5bf95782014-12-04 05:20:33 +00007341 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007342 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7343 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007344
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007345def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7346 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007347 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7348 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007349
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007350def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7351 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007352 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7353 VR256X:$src1, sub_ymm)))), sub_xmm)>;
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007354
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007355def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7356 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007357 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7358 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007359
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007360def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7361 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007362 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7363 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007364
Cameron McInallyf10a7c92014-06-18 14:04:37 +00007365def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7366 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
Craig Topper61403202016-09-19 02:53:43 +00007367 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7368 VR128X:$src1, sub_xmm)))), sub_ymm)>;
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007369
Simon Pilgrima3af7962016-11-24 12:13:46 +00007370def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007371 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7372 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7373 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007374}
7375
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007376let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007377 let AddedComplexity = 15 in {
7378 def : Pat<(X86vzmovl (v2i64 (bitconvert
7379 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007380 (VCVTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007381 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007382 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7383 (VCVTPD2DQZ128rm addr:$src)>;
7384 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007385 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007386 (VCVTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007387 def : Pat<(X86vzmovl (v2i64 (bitconvert
Simon Pilgrima3af7962016-11-24 12:13:46 +00007388 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007389 (VCVTTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007390 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007391 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7392 (VCVTTPD2DQZ128rm addr:$src)>;
7393 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007394 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007395 (VCVTTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007396 }
Craig Topperd7467472017-10-14 04:18:09 +00007397
7398 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7399 (VCVTDQ2PDZ128rm addr:$src)>;
7400 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7401 (VCVTDQ2PDZ128rm addr:$src)>;
7402
7403 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7404 (VCVTUDQ2PDZ128rm addr:$src)>;
7405 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7406 (VCVTUDQ2PDZ128rm addr:$src)>;
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007407}
7408
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007409let Predicates = [HasAVX512] in {
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007410 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007411 (VCVTPD2PSZrm addr:$src)>;
7412 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7413 (VCVTPS2PDZrm addr:$src)>;
7414}
7415
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007416let Predicates = [HasDQI, HasVLX] in {
7417 let AddedComplexity = 15 in {
7418 def : Pat<(X86vzmovl (v2f64 (bitconvert
7419 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007420 (VCVTQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007421 def : Pat<(X86vzmovl (v2f64 (bitconvert
7422 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007423 (VCVTUQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007424 }
7425}
7426
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007427let Predicates = [HasDQI, NoVLX] in {
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007428def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7429 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7430 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7431 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7432
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007433def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7434 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7435 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7436 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7437
7438def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7439 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7440 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7441 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7442
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007443def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7444 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7445 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7446 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7447
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007448def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7449 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7450 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7451 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7452
7453def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7454 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7455 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7456 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7457
7458def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7459 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7460 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7461 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7462
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007463def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7464 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7465 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7466 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7467
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007468def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7469 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7470 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7471 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7472
7473def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7474 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7475 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7476 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7477
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007478def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7479 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7480 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7481 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7482
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007483def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7484 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7485 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7486 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7487}
7488
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007489//===----------------------------------------------------------------------===//
7490// Half precision conversion instructions
7491//===----------------------------------------------------------------------===//
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007492
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007493multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007494 X86MemOperand x86memop, PatFrag ld_frag,
7495 OpndItins itins> {
Craig Toppercf8e6d02017-11-07 07:13:03 +00007496 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7497 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007498 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7499 T8PD, Sched<[itins.Sched]>;
Craig Toppercf8e6d02017-11-07 07:13:03 +00007500 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7501 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7502 (X86cvtph2ps (_src.VT
7503 (bitconvert
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007504 (ld_frag addr:$src)))), itins.rm>,
7505 T8PD, Sched<[itins.Sched.Folded]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007506}
7507
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007508multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7509 OpndItins itins> {
Craig Topperc89e2822017-12-10 09:14:38 +00007510 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7511 (ins _src.RC:$src), "vcvtph2ps",
7512 "{sae}, $src", "$src, {sae}",
7513 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7514 (i32 FROUND_NO_EXC)), itins.rr>,
7515 T8PD, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007516}
7517
Craig Toppere7fb3002017-11-07 07:13:07 +00007518let Predicates = [HasAVX512] in
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007519 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7520 SSE_CVT_PH2PS>,
7521 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
Asaf Badouh7c522452015-10-22 14:01:16 +00007522 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007523
7524let Predicates = [HasVLX] in {
7525 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007526 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7527 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007528 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007529 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7530 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007531
7532 // Pattern match vcvtph2ps of a scalar i64 load.
7533 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7534 (VCVTPH2PSZ128rm addr:$src)>;
7535 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7536 (VCVTPH2PSZ128rm addr:$src)>;
7537 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7538 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7539 (VCVTPH2PSZ128rm addr:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007540}
7541
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007542multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007543 X86MemOperand x86memop, OpndItins itins> {
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007544 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007545 (ins _src.RC:$src1, i32u8imm:$src2),
7546 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007547 (X86cvtps2ph (_src.VT _src.RC:$src1),
Craig Topperd8688702016-09-21 03:58:44 +00007548 (i32 imm:$src2)),
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007549 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007550 let hasSideEffects = 0, mayStore = 1 in {
7551 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7552 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7553 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007554 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007555 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7556 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7557 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007558 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007559 }
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007560}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007561
7562multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7563 OpndItins itins> {
Craig Topperd8688702016-09-21 03:58:44 +00007564 let hasSideEffects = 0 in
Craig Topper1de942b2017-12-10 17:42:44 +00007565 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
Craig Topperd8688702016-09-21 03:58:44 +00007566 (outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007567 (ins _src.RC:$src1, i32u8imm:$src2),
7568 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007569 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007570}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007571
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007572let Predicates = [HasAVX512] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007573 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7574 SSE_CVT_PS2PH>,
7575 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7576 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7577 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007578 let Predicates = [HasVLX] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007579 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7580 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7581 EVEX_CD8<32, CD8VH>;
7582 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7583 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7584 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007585 }
Craig Topper65e6d0b2017-11-08 04:00:31 +00007586
7587 def : Pat<(store (f64 (extractelt
7588 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7589 (iPTR 0))), addr:$dst),
7590 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7591 def : Pat<(store (i64 (extractelt
7592 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7593 (iPTR 0))), addr:$dst),
7594 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7595 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7596 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7597 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7598 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007599}
Asaf Badouh2489f352015-12-02 08:17:51 +00007600
Craig Topper9820e342016-09-20 05:44:47 +00007601// Patterns for matching conversions from float to half-float and vice versa.
Craig Topperb3b50332016-09-19 02:53:37 +00007602let Predicates = [HasVLX] in {
7603 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7604 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7605 // configurations we support (the default). However, falling back to MXCSR is
7606 // more consistent with other instructions, which are always controlled by it.
7607 // It's encoded as 0b100.
7608 def : Pat<(fp_to_f16 FR32X:$src),
7609 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7610 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7611
7612 def : Pat<(f16_to_fp GR16:$src),
7613 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7614 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7615
7616 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7617 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7618 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7619}
7620
Asaf Badouh2489f352015-12-02 08:17:51 +00007621// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
Craig Topper7e664da2016-09-24 21:42:43 +00007622multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007623 string OpcodeStr, OpndItins itins> {
Craig Topper07a7d562017-07-23 03:59:39 +00007624 let hasSideEffects = 0 in
Craig Topperc89e2822017-12-10 09:14:38 +00007625 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7626 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7627 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7628 Sched<[itins.Sched]>;
Asaf Badouh2489f352015-12-02 08:17:51 +00007629}
7630
7631let Defs = [EFLAGS], Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007632 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007633 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007634 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007635 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007636 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007637 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007638 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007639 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7640}
7641
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007642let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7643 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007644 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007645 EVEX_CD8<32, CD8VT1>;
7646 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007647 "ucomisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007648 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7649 let Pattern = []<dag> in {
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007650 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007651 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007652 EVEX_CD8<32, CD8VT1>;
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007653 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007654 "comisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007655 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7656 }
Craig Topper9dd48c82014-01-02 17:28:14 +00007657 let isCodeGenOnly = 1 in {
Ayman Musa02f95332017-01-04 08:21:54 +00007658 defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007659 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00007660 EVEX_CD8<32, CD8VT1>;
Ayman Musa02f95332017-01-04 08:21:54 +00007661 defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007662 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00007663 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007664
Ayman Musa02f95332017-01-04 08:21:54 +00007665 defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007666 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00007667 EVEX_CD8<32, CD8VT1>;
Ayman Musa02f95332017-01-04 08:21:54 +00007668 defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007669 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00007670 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7671 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007672}
Michael Liao5bf95782014-12-04 05:20:33 +00007673
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007674/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
Asaf Badouheaf2da12015-09-21 10:23:53 +00007675multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007676 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007677 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Asaf Badouheaf2da12015-09-21 10:23:53 +00007678 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7679 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7680 "$src2, $src1", "$src1, $src2",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007681 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7682 EVEX_4V, Sched<[itins.Sched]>;
Asaf Badouheaf2da12015-09-21 10:23:53 +00007683 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00007684 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouheaf2da12015-09-21 10:23:53 +00007685 "$src2, $src1", "$src1, $src2",
7686 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007687 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7688 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007689}
7690}
7691
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007692defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007693 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007694defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007695 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007696defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007697 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007698defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007699 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007700
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007701/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7702multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007703 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007704 let ExeDomain = _.ExeDomain in {
Robert Khasanov3e534c92014-10-28 16:37:13 +00007705 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7706 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007707 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7708 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007709 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7710 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7711 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007712 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7713 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007714 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7715 (ins _.ScalarMemOp:$src), OpcodeStr,
7716 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7717 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007718 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7719 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007720 }
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007721}
Robert Khasanov3e534c92014-10-28 16:37:13 +00007722
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007723multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7724 SizeItins itins> {
7725 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7726 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7727 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7728 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov3e534c92014-10-28 16:37:13 +00007729
7730 // Define only if AVX512VL feature is present.
7731 let Predicates = [HasVLX] in {
7732 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007733 OpNode, itins.s, v4f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007734 EVEX_V128, EVEX_CD8<32, CD8VF>;
7735 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007736 OpNode, itins.s, v8f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007737 EVEX_V256, EVEX_CD8<32, CD8VF>;
7738 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007739 OpNode, itins.d, v2f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007740 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7741 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007742 OpNode, itins.d, v4f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007743 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7744 }
7745}
7746
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007747defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7748defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007749
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007750/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007751multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007752 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007753 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007754 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7755 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7756 "$src2, $src1", "$src1, $src2",
7757 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007758 (i32 FROUND_CURRENT)), itins.rr>,
7759 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007760
7761 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7762 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00007763 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007764 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007765 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
Craig Toppera2f55282017-12-10 03:16:36 +00007766 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007767
7768 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper512e9e72017-11-19 05:42:54 +00007769 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007770 "$src2, $src1", "$src1, $src2",
Craig Topper512e9e72017-11-19 05:42:54 +00007771 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007772 (i32 FROUND_CURRENT)), itins.rm>,
7773 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007774 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007775}
7776
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007777multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7778 SizeItins itins> {
7779 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007780 EVEX_CD8<32, CD8VT1>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007781 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007782 EVEX_CD8<64, CD8VT1>, VEX_W;
7783}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007784
Craig Toppere1cac152016-06-07 07:27:54 +00007785let Predicates = [HasERI] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007786 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7787 T8PD, EVEX_4V;
7788 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7789 T8PD, EVEX_4V;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007790}
Igor Breger8352a0d2015-07-28 06:53:28 +00007791
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007792defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7793 T8PD, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007794/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007795
7796multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007797 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007798 let ExeDomain = _.ExeDomain in {
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007799 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7800 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007801 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7802 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007803
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007804 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7805 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7806 (OpNode (_.FloatVT
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007807 (bitconvert (_.LdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007808 (i32 FROUND_CURRENT)), itins.rm>,
7809 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007810
7811 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007812 (ins _.ScalarMemOp:$src), OpcodeStr,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007813 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007814 (OpNode (_.FloatVT
7815 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007816 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7817 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007818 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007819}
Asaf Badouh402ebb32015-06-03 13:41:48 +00007820multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007821 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007822 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00007823 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7824 (ins _.RC:$src), OpcodeStr,
7825 "{sae}, $src", "$src, {sae}",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007826 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7827 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007828}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007829
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007830multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7831 SizeItins itins> {
7832 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7833 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007834 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007835 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7836 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007837 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007838}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007839
Asaf Badouh402ebb32015-06-03 13:41:48 +00007840multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007841 SDNode OpNode, SizeItins itins> {
Asaf Badouh402ebb32015-06-03 13:41:48 +00007842 // Define only if AVX512VL feature is present.
7843 let Predicates = [HasVLX] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007844 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007845 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007846 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007847 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007848 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007849 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007850 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007851 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7852 }
7853}
Craig Toppere1cac152016-06-07 07:27:54 +00007854let Predicates = [HasERI] in {
Michael Liao5bf95782014-12-04 05:20:33 +00007855
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007856 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7857 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7858 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007859}
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007860defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7861 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
7862 SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007863
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007864multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00007865 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00007866 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00007867 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7868 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007869 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
7870 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007871}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007872
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007873multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00007874 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00007875 let ExeDomain = _.ExeDomain in {
Robert Khasanov1cf354c2014-10-28 18:22:41 +00007876 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Robert Khasanoveb126392014-10-28 18:15:20 +00007877 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007878 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
7879 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007880 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7881 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper80405072017-11-11 08:24:12 +00007882 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007883 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
7884 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007885 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7886 (ins _.ScalarMemOp:$src), OpcodeStr,
7887 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Craig Topper80405072017-11-11 08:24:12 +00007888 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007889 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7890 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007891 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007892}
7893
Craig Topper80405072017-11-11 08:24:12 +00007894multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007895 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007896 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007897 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007898 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7899 // Define only if AVX512VL feature is present.
7900 let Predicates = [HasVLX] in {
7901 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007902 SSE_SQRTPS, v4f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007903 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7904 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007905 SSE_SQRTPS, v8f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007906 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7907 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007908 SSE_SQRTPD, v2f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007909 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7910 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007911 SSE_SQRTPD, v4f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007912 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7913 }
7914}
7915
Craig Topper80405072017-11-11 08:24:12 +00007916multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007917 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007918 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007919 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007920 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7921}
7922
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007923multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
7924 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
Craig Topper176f3312017-02-25 19:18:11 +00007925 let ExeDomain = _.ExeDomain in {
Clement Courbet41a13742018-01-15 12:05:33 +00007926 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00007927 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7928 "$src2, $src1", "$src1, $src2",
Craig Topper80405072017-11-11 08:24:12 +00007929 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00007930 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007931 (i32 FROUND_CURRENT)), itins.rr>,
7932 Sched<[itins.Sched]>;
Clement Courbet41a13742018-01-15 12:05:33 +00007933 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7934 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7935 "$src2, $src1", "$src1, $src2",
7936 (X86fsqrtRnds (_.VT _.RC:$src1),
7937 _.ScalarIntMemCPat:$src2,
7938 (i32 FROUND_CURRENT)), itins.rm>,
7939 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7940 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00007941 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
7942 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Topper80405072017-11-11 08:24:12 +00007943 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00007944 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007945 (i32 imm:$rc)), itins.rr>,
Craig Toppera2f55282017-12-10 03:16:36 +00007946 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Breger4c4cd782015-09-20 09:13:41 +00007947
Clement Courbet41a13742018-01-15 12:05:33 +00007948 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
7949 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7950 (ins _.FRC:$src1, _.FRC:$src2),
7951 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
7952 itins.rr>, Sched<[itins.Sched]>;
7953 let mayLoad = 1 in
7954 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7955 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
7956 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
7957 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7958 }
Craig Topper176f3312017-02-25 19:18:11 +00007959 }
Igor Breger4c4cd782015-09-20 09:13:41 +00007960
Clement Courbet41a13742018-01-15 12:05:33 +00007961 let Predicates = [HasAVX512] in {
7962 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
7963 (!cast<Instruction>(NAME#SUFF#Zr)
7964 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
Igor Breger4c4cd782015-09-20 09:13:41 +00007965
Clement Courbet41a13742018-01-15 12:05:33 +00007966 def : Pat<(Intr VR128X:$src),
7967 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
Craig Toppereff606c2017-11-06 04:04:01 +00007968 VR128X:$src)>;
Clement Courbet41a13742018-01-15 12:05:33 +00007969 }
Craig Toppereff606c2017-11-06 04:04:01 +00007970
Clement Courbet41a13742018-01-15 12:05:33 +00007971 let Predicates = [HasAVX512, OptForSize] in {
7972 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
7973 (!cast<Instruction>(NAME#SUFF#Zm)
7974 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
Craig Toppereff606c2017-11-06 04:04:01 +00007975
Clement Courbet41a13742018-01-15 12:05:33 +00007976 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
7977 (!cast<Instruction>(NAME#SUFF#Zm_Int)
7978 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
7979 }
Craig Topperd6471cb2017-11-05 21:14:06 +00007980}
Igor Breger4c4cd782015-09-20 09:13:41 +00007981
7982multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007983 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
Craig Topper80405072017-11-11 08:24:12 +00007984 int_x86_sse_sqrt_ss>,
Craig Toppereff606c2017-11-06 04:04:01 +00007985 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007986 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
Craig Topper80405072017-11-11 08:24:12 +00007987 int_x86_sse2_sqrt_sd>,
Craig Toppereff606c2017-11-06 04:04:01 +00007988 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007989 NotMemoryFoldable;
Igor Breger4c4cd782015-09-20 09:13:41 +00007990}
7991
Craig Topper80405072017-11-11 08:24:12 +00007992defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
7993 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007994
Igor Breger4c4cd782015-09-20 09:13:41 +00007995defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007996
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00007997multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
7998 OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00007999 let ExeDomain = _.ExeDomain in {
Craig Topper0ccec702017-11-11 08:24:15 +00008000 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008001 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8002 "$src3, $src2, $src1", "$src1, $src2, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008003 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008004 (i32 imm:$src3))), itins.rr>,
8005 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008006
Craig Topper0ccec702017-11-11 08:24:15 +00008007 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008008 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008009 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
Craig Topper0af48f12017-11-13 02:02:58 +00008010 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008011 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
8012 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008013
Craig Topper0ccec702017-11-11 08:24:15 +00008014 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperbece74c2017-11-19 06:24:26 +00008015 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008016 OpcodeStr,
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008017 "$src3, $src2, $src1", "$src1, $src2, $src3",
Craig Topperdeee24b2017-11-13 02:03:01 +00008018 (_.VT (X86RndScales _.RC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008019 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
8020 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008021
Clement Courbetda1fad32018-01-15 14:24:07 +00008022 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
Craig Topper0ccec702017-11-11 08:24:15 +00008023 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8024 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8025 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008026 [], itins.rr>, Sched<[itins.Sched]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008027
8028 let mayLoad = 1 in
8029 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8030 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8031 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008032 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008033 }
8034 }
8035
8036 let Predicates = [HasAVX512] in {
8037 def : Pat<(ffloor _.FRC:$src),
8038 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8039 _.FRC:$src, (i32 0x9)))>;
8040 def : Pat<(fceil _.FRC:$src),
8041 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8042 _.FRC:$src, (i32 0xa)))>;
8043 def : Pat<(ftrunc _.FRC:$src),
8044 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8045 _.FRC:$src, (i32 0xb)))>;
8046 def : Pat<(frint _.FRC:$src),
8047 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8048 _.FRC:$src, (i32 0x4)))>;
8049 def : Pat<(fnearbyint _.FRC:$src),
8050 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8051 _.FRC:$src, (i32 0xc)))>;
8052 }
8053
8054 let Predicates = [HasAVX512, OptForSize] in {
8055 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8056 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8057 addr:$src, (i32 0x9)))>;
8058 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8059 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8060 addr:$src, (i32 0xa)))>;
8061 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8062 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8063 addr:$src, (i32 0xb)))>;
8064 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8065 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8066 addr:$src, (i32 0x4)))>;
8067 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8068 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8069 addr:$src, (i32 0xc)))>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008070 }
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00008071}
8072
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008073defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8074 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008075
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008076defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8077 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8078 EVEX_CD8<64, CD8VT1>;
Eric Christopher0d94fa92015-02-20 00:45:28 +00008079
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008080//-------------------------------------------------
8081// Integer truncate and extend operations
8082//-------------------------------------------------
8083
Simon Pilgrim833c2602017-12-05 19:21:28 +00008084let Sched = WriteShuffle256 in
8085def AVX512_EXTEND : OpndItins<
8086 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8087>;
8088
8089let Sched = WriteShuffle256 in
8090def AVX512_TRUNCATE : OpndItins<
8091 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8092>;
8093
Igor Breger074a64e2015-07-24 17:24:15 +00008094multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008095 OpndItins itins, X86VectorVTInfo SrcInfo,
8096 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
Craig Topper52e2e832016-07-22 05:46:44 +00008097 let ExeDomain = DestInfo.ExeDomain in
Igor Breger074a64e2015-07-24 17:24:15 +00008098 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8099 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008100 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8101 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
Igor Breger074a64e2015-07-24 17:24:15 +00008102
Craig Topper52e2e832016-07-22 05:46:44 +00008103 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8104 ExeDomain = DestInfo.ExeDomain in {
Igor Breger074a64e2015-07-24 17:24:15 +00008105 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8106 (ins x86memop:$dst, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008107 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008108 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008109
Igor Breger074a64e2015-07-24 17:24:15 +00008110 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8111 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008112 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008113 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
Craig Topper99f6b622016-05-01 01:03:56 +00008114 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008115}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008116
Igor Breger074a64e2015-07-24 17:24:15 +00008117multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8118 X86VectorVTInfo DestInfo,
8119 PatFrag truncFrag, PatFrag mtruncFrag > {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008120
Igor Breger074a64e2015-07-24 17:24:15 +00008121 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8122 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8123 addr:$dst, SrcInfo.RC:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008124
Igor Breger074a64e2015-07-24 17:24:15 +00008125 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8126 (SrcInfo.VT SrcInfo.RC:$src)),
8127 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8128 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8129}
8130
Craig Topperb2868232018-01-14 08:11:36 +00008131multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
8132 SDNode OpNode256, SDNode OpNode512, OpndItins itins,
8133 AVX512VLVectorVTInfo VTSrcInfo,
8134 X86VectorVTInfo DestInfoZ128,
8135 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8136 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8137 X86MemOperand x86memopZ, PatFrag truncFrag,
8138 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
Igor Breger074a64e2015-07-24 17:24:15 +00008139
8140 let Predicates = [HasVLX, prd] in {
Craig Topperb2868232018-01-14 08:11:36 +00008141 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008142 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
Igor Breger074a64e2015-07-24 17:24:15 +00008143 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8144 truncFrag, mtruncFrag>, EVEX_V128;
8145
Craig Topperb2868232018-01-14 08:11:36 +00008146 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008147 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
Igor Breger074a64e2015-07-24 17:24:15 +00008148 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8149 truncFrag, mtruncFrag>, EVEX_V256;
8150 }
8151 let Predicates = [prd] in
Craig Topperb2868232018-01-14 08:11:36 +00008152 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008153 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
Igor Breger074a64e2015-07-24 17:24:15 +00008154 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8155 truncFrag, mtruncFrag>, EVEX_V512;
8156}
8157
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008158multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008159 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008160 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8161 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, itins,
8162 avx512vl_i64_info, v16i8x_info, v16i8x_info,
8163 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
8164 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
Igor Breger074a64e2015-07-24 17:24:15 +00008165}
8166
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008167multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008168 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008169 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8170 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8171 avx512vl_i64_info, v8i16x_info, v8i16x_info,
8172 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
8173 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008174}
8175
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008176multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008177 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008178 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8179 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8180 avx512vl_i64_info, v4i32x_info, v4i32x_info,
8181 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
8182 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008183}
8184
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008185multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008186 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008187 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8188 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8189 avx512vl_i32_info, v16i8x_info, v16i8x_info,
8190 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
8191 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008192}
8193
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008194multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008195 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008196 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8197 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8198 avx512vl_i32_info, v8i16x_info, v8i16x_info,
8199 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
8200 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008201}
8202
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008203multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008204 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008205 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8206 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
8207 itins, avx512vl_i16_info, v16i8x_info, v16i8x_info,
8208 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
8209 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008210}
8211
Craig Topperb2868232018-01-14 08:11:36 +00008212defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE,
8213 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008214defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008215 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008216defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008217 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008218
Craig Topperb2868232018-01-14 08:11:36 +00008219defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE,
8220 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008221defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008222 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008223defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008224 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008225
Craig Topperb2868232018-01-14 08:11:36 +00008226defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE,
8227 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008228defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008229 truncstore_s_vi32, masked_truncstore_s_vi32>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008230defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008231 truncstore_us_vi32, masked_truncstore_us_vi32>;
Igor Breger074a64e2015-07-24 17:24:15 +00008232
Craig Topperb2868232018-01-14 08:11:36 +00008233defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE,
8234 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008235defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008236 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008237defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008238 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008239
Craig Topperb2868232018-01-14 08:11:36 +00008240defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE,
8241 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008242defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008243 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008244defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008245 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008246
Craig Topperb2868232018-01-14 08:11:36 +00008247defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE,
8248 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008249defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008250 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008251defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008252 truncstore_us_vi8, masked_truncstore_us_vi8>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008253
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008254let Predicates = [HasAVX512, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008255def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008256 (v8i16 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008257 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008258 VR256X:$src, sub_ymm)))), sub_xmm))>;
Craig Topperb2868232018-01-14 08:11:36 +00008259def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008260 (v4i32 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008261 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008262 VR256X:$src, sub_ymm)))), sub_xmm))>;
8263}
8264
8265let Predicates = [HasBWI, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008266def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
Craig Topper61403202016-09-19 02:53:43 +00008267 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008268 VR256X:$src, sub_ymm))), sub_xmm))>;
8269}
8270
Simon Pilgrim833c2602017-12-05 19:21:28 +00008271multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
Igor Breger2ba64ab2016-05-22 10:21:04 +00008272 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
Craig Topper6840f112016-07-14 06:41:34 +00008273 X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
Craig Topper52e2e832016-07-22 05:46:44 +00008274 let ExeDomain = DestInfo.ExeDomain in {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008275 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8276 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008277 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8278 EVEX, Sched<[itins.Sched]>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008279
Craig Toppere1cac152016-06-07 07:27:54 +00008280 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8281 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008282 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8283 EVEX, Sched<[itins.Sched.Folded]>;
Craig Topper52e2e832016-07-22 05:46:44 +00008284 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008285}
8286
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008287multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008288 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8289 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008290 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008291 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008292 v16i8x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008293 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008294
Simon Pilgrim833c2602017-12-05 19:21:28 +00008295 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008296 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008297 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008298 }
8299 let Predicates = [HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008300 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
Craig Topper6840f112016-07-14 06:41:34 +00008301 v32i8x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008302 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008303 }
8304}
8305
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008306multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008307 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8308 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008309 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008310 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008311 v16i8x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008312 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008313
Simon Pilgrim833c2602017-12-05 19:21:28 +00008314 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008315 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008316 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008317 }
8318 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008319 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008320 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008321 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008322 }
8323}
8324
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008325multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008326 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8327 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008328 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008329 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008330 v16i8x_info, i16mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008331 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008332
Simon Pilgrim833c2602017-12-05 19:21:28 +00008333 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008334 v16i8x_info, i32mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008335 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008336 }
8337 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008338 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008339 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008340 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008341 }
8342}
8343
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008344multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008345 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8346 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008347 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008348 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008349 v8i16x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008350 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008351
Simon Pilgrim833c2602017-12-05 19:21:28 +00008352 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008353 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008354 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008355 }
8356 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008357 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008358 v16i16x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008359 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008360 }
8361}
8362
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008363multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008364 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8365 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008366 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008367 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008368 v8i16x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008369 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008370
Simon Pilgrim833c2602017-12-05 19:21:28 +00008371 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008372 v8i16x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008373 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008374 }
8375 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008376 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008377 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008378 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008379 }
8380}
8381
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008382multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008383 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8384 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008385
8386 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008387 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008388 v4i32x_info, i64mem, LdFrag, InVecNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008389 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8390
Simon Pilgrim833c2602017-12-05 19:21:28 +00008391 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008392 v4i32x_info, i128mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008393 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8394 }
8395 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008396 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008397 v8i32x_info, i256mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008398 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8399 }
8400}
8401
Simon Pilgrim833c2602017-12-05 19:21:28 +00008402defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8403defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8404defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8405defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8406defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8407defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008408
Simon Pilgrim833c2602017-12-05 19:21:28 +00008409defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8410defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8411defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8412defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8413defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8414defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008415
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008416
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008417multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8418 SDNode InVecOp, PatFrag ExtLoad16> {
Craig Topper64378f42016-10-09 23:08:39 +00008419 // 128-bit patterns
8420 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008421 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008422 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008423 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008424 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008425 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008426 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008427 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008428 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008429 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008430 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8431 }
8432 let Predicates = [HasVLX] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008433 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008434 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008435 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008436 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008437 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008438 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008439 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008440 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8441
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008442 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008443 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008444 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008445 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008446 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008447 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008448 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008449 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8450
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008451 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008452 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008453 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008454 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008455 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008456 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008457 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008458 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008459 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008460 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8461
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008462 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008463 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008464 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008465 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008466 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008467 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008468 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008469 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8470
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008471 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008472 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008473 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008474 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008475 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008476 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008477 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008478 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008479 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008480 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8481 }
8482 // 256-bit patterns
8483 let Predicates = [HasVLX, HasBWI] in {
8484 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8485 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8486 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8487 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8488 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8489 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8490 }
8491 let Predicates = [HasVLX] in {
8492 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8493 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8494 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8495 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8496 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8497 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8498 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8499 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8500
8501 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8502 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8503 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8504 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8505 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8506 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8507 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8508 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8509
8510 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8511 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8512 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8513 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8514 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8515 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8516
8517 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8518 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8519 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8520 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8521 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8522 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8523 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8524 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8525
8526 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8527 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8528 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8529 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8530 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8531 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8532 }
8533 // 512-bit patterns
8534 let Predicates = [HasBWI] in {
8535 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8536 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8537 }
8538 let Predicates = [HasAVX512] in {
8539 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8540 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8541
8542 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8543 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper9ece2f72016-10-10 06:25:48 +00008544 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8545 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper64378f42016-10-09 23:08:39 +00008546
8547 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8548 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8549
8550 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8551 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8552
8553 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8554 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8555 }
8556}
8557
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008558defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8559defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
Craig Topper64378f42016-10-09 23:08:39 +00008560
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008561//===----------------------------------------------------------------------===//
8562// GATHER - SCATTER Operations
8563
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008564// FIXME: Improve scheduling of gather/scatter instructions.
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008565multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper16a91ce2017-11-15 07:46:43 +00008566 X86MemOperand memop, PatFrag GatherNode,
8567 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008568 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8569 ExeDomain = _.ExeDomain in
Craig Topper16a91ce2017-11-15 07:46:43 +00008570 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8571 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008572 !strconcat(OpcodeStr#_.Suffix,
Craig Topperedb09112014-11-25 20:11:23 +00008573 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Craig Topper16a91ce2017-11-15 07:46:43 +00008574 [(set _.RC:$dst, MaskRC:$mask_wb,
8575 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008576 vectoraddr:$src2))]>, EVEX, EVEX_K,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008577 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008578}
Cameron McInally45325962014-03-26 13:50:50 +00008579
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008580multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8581 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8582 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008583 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008584 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008585 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008586let Predicates = [HasVLX] in {
8587 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008588 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008589 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008590 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008591 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008592 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008593 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008594 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008595}
Cameron McInally45325962014-03-26 13:50:50 +00008596}
8597
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008598multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8599 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008600 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008601 mgatherv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008602 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008603 mgatherv8i64>, EVEX_V512;
8604let Predicates = [HasVLX] in {
8605 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008606 vy256xmem, mgatherv8i32>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008607 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008608 vy128xmem, mgatherv4i64>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008609 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008610 vx128xmem, mgatherv4i32>, EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008611 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Craig Topperc1e7b3f2017-11-22 07:11:03 +00008612 vx64xmem, mgatherv2i64, VK2WM>,
Craig Topper16a91ce2017-11-15 07:46:43 +00008613 EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008614}
Cameron McInally45325962014-03-26 13:50:50 +00008615}
Michael Liao5bf95782014-12-04 05:20:33 +00008616
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008617
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008618defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8619 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8620
8621defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8622 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008623
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008624multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper0b590342018-01-11 06:31:28 +00008625 X86MemOperand memop, PatFrag ScatterNode,
8626 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008627
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008628let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008629
Craig Topper0b590342018-01-11 06:31:28 +00008630 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
8631 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008632 !strconcat(OpcodeStr#_.Suffix,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008633 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Craig Topper0b590342018-01-11 06:31:28 +00008634 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8635 MaskRC:$mask, vectoraddr:$dst))]>,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008636 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8637 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008638}
8639
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008640multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8641 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8642 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008643 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008644 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008645 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008646let Predicates = [HasVLX] in {
8647 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008648 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008649 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008650 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008651 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008652 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008653 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008654 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008655}
Cameron McInally45325962014-03-26 13:50:50 +00008656}
8657
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008658multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8659 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008660 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008661 mscatterv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008662 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008663 mscatterv8i64>, EVEX_V512;
8664let Predicates = [HasVLX] in {
8665 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008666 vy256xmem, mscatterv8i32>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008667 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008668 vy128xmem, mscatterv4i64>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008669 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008670 vx128xmem, mscatterv4i32>, EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008671 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Craig Topper0b590342018-01-11 06:31:28 +00008672 vx64xmem, mscatterv2i64, VK2WM>,
8673 EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008674}
Cameron McInally45325962014-03-26 13:50:50 +00008675}
8676
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008677defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8678 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008679
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008680defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8681 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008682
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008683// prefetch
8684multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8685 RegisterClass KRC, X86MemOperand memop> {
8686 let Predicates = [HasPFI], hasSideEffects = 1 in
8687 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008688 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008689 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008690}
8691
8692defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008693 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008694
8695defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008696 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008697
8698defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008699 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008700
8701defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008702 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008703
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008704defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008705 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008706
8707defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008708 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008709
8710defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008711 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008712
8713defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008714 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008715
8716defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008717 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008718
8719defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008720 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008721
8722defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008723 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008724
8725defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008726 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008727
8728defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008729 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008730
8731defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008732 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008733
8734defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008735 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008736
8737defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008738 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008739
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008740multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008741def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008742 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008743 [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))],
8744 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008745}
Michael Liao5bf95782014-12-04 05:20:33 +00008746
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008747multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8748 string OpcodeStr, Predicate prd> {
8749let Predicates = [prd] in
8750 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8751
8752 let Predicates = [prd, HasVLX] in {
8753 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8754 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8755 }
8756}
8757
Michael Zuckerman85436ec2017-03-23 09:57:01 +00008758defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8759defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8760defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8761defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008762
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008763multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
Igor Bregerfca0a342016-01-28 13:19:25 +00008764 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8765 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topperf090e8a2018-01-08 06:53:54 +00008766 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008767 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Igor Bregerfca0a342016-01-28 13:19:25 +00008768}
8769
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008770// Use 512bit version to implement 128/256 bit in case NoVLX.
8771multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00008772 X86VectorVTInfo _> {
8773
Craig Topperf090e8a2018-01-08 06:53:54 +00008774 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
Igor Bregerfca0a342016-01-28 13:19:25 +00008775 (_.KVT (COPY_TO_REGCLASS
8776 (!cast<Instruction>(NAME#"Zrr")
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008777 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00008778 _.RC:$src, _.SubRegIdx)),
8779 _.KRC))>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008780}
8781
8782multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
Igor Bregerfca0a342016-01-28 13:19:25 +00008783 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8784 let Predicates = [prd] in
8785 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8786 EVEX_V512;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008787
8788 let Predicates = [prd, HasVLX] in {
8789 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008790 EVEX_V256;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008791 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008792 EVEX_V128;
8793 }
8794 let Predicates = [prd, NoVLX] in {
8795 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8796 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008797 }
8798}
8799
8800defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8801 avx512vl_i8_info, HasBWI>;
8802defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8803 avx512vl_i16_info, HasBWI>, VEX_W;
8804defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8805 avx512vl_i32_info, HasDQI>;
8806defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8807 avx512vl_i64_info, HasDQI>, VEX_W;
8808
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008809//===----------------------------------------------------------------------===//
8810// AVX-512 - COMPRESS and EXPAND
8811//
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008812
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008813// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8814let Sched = WriteShuffle256 in {
8815def AVX512_COMPRESS : OpndItins<
8816 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8817>;
8818def AVX512_EXPAND : OpndItins<
8819 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8820>;
8821}
8822
Ayman Musad7a5ed42016-09-26 06:22:08 +00008823multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008824 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008825 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00008826 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008827 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8828 Sched<[itins.Sched]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008829
Craig Toppere1cac152016-06-07 07:27:54 +00008830 let mayStore = 1, hasSideEffects = 0 in
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008831 def mr : AVX5128I<opc, MRMDestMem, (outs),
8832 (ins _.MemOp:$dst, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008833 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008834 []>, EVEX_CD8<_.EltSize, CD8VT1>,
8835 Sched<[itins.Sched.Folded]>;
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008836
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008837 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8838 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008839 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Ayman Musad7a5ed42016-09-26 06:22:08 +00008840 []>,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008841 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8842 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008843}
8844
Ayman Musad7a5ed42016-09-26 06:22:08 +00008845multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
Ayman Musad7a5ed42016-09-26 06:22:08 +00008846 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8847 (_.VT _.RC:$src)),
8848 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8849 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8850}
8851
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008852multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008853 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00008854 AVX512VLVectorVTInfo VTInfo,
8855 Predicate Pred = HasAVX512> {
8856 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008857 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00008858 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008859
Coby Tayree71e37cc2017-11-21 09:48:44 +00008860 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008861 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00008862 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008863 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00008864 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008865 }
8866}
8867
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008868defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
8869 avx512vl_i32_info>, EVEX;
8870defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
8871 avx512vl_i64_info>, EVEX, VEX_W;
8872defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
8873 avx512vl_f32_info>, EVEX;
8874defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
8875 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008876
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008877// expand
8878multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008879 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008880 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00008881 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008882 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
8883 Sched<[itins.Sched]>;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00008884
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008885 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8886 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8887 (_.VT (X86expand (_.VT (bitconvert
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008888 (_.LdFrag addr:$src1))))), itins.rm>,
8889 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
8890 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008891}
8892
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008893multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8894
8895 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8896 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8897 _.KRCWM:$mask, addr:$src)>;
8898
8899 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8900 (_.VT _.RC:$src0))),
8901 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8902 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8903}
8904
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008905multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008906 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00008907 AVX512VLVectorVTInfo VTInfo,
8908 Predicate Pred = HasAVX512> {
8909 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008910 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008911 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008912
Coby Tayree71e37cc2017-11-21 09:48:44 +00008913 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008914 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008915 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008916 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008917 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008918 }
8919}
8920
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008921defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
8922 avx512vl_i32_info>, EVEX;
8923defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
8924 avx512vl_i64_info>, EVEX, VEX_W;
8925defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
8926 avx512vl_f32_info>, EVEX;
8927defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
8928 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00008929
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008930//handle instruction reg_vec1 = op(reg_vec,imm)
8931// op(mem_vec,imm)
8932// op(broadcast(eltVt),imm)
8933//all instruction created with FROUND_CURRENT
8934multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008935 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00008936 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008937 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8938 (ins _.RC:$src1, i32u8imm:$src2),
Igor Breger252c2d92016-02-22 12:37:41 +00008939 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008940 (OpNode (_.VT _.RC:$src1),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008941 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008942 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8943 (ins _.MemOp:$src1, i32u8imm:$src2),
8944 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8945 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008946 (i32 imm:$src2)), itins.rm>,
8947 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008948 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8949 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
8950 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
8951 "${src1}"##_.BroadcastStr##", $src2",
8952 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008953 (i32 imm:$src2)), itins.rm>, EVEX_B,
8954 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00008955 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008956}
8957
8958//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8959multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008960 SDNode OpNode, OpndItins itins,
8961 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00008962 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008963 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8964 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topperbfe13ff2016-01-11 00:44:52 +00008965 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008966 "$src1, {sae}, $src2",
8967 (OpNode (_.VT _.RC:$src1),
8968 (i32 imm:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008969 (i32 FROUND_NO_EXC)), itins.rr>,
8970 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008971}
8972
8973multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00008974 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008975 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008976 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008977 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8978 _.info512>,
8979 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
8980 itins, _.info512>, EVEX_V512;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008981 }
8982 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008983 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8984 _.info128>, EVEX_V128;
8985 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8986 _.info256>, EVEX_V256;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008987 }
8988}
8989
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00008990//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8991// op(reg_vec2,mem_vec,imm)
8992// op(reg_vec2,broadcast(eltVt),imm)
8993//all instruction created with FROUND_CURRENT
8994multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008995 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +00008996 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00008997 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008998 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00008999 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9000 (OpNode (_.VT _.RC:$src1),
9001 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009002 (i32 imm:$src3)), itins.rr>,
9003 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009004 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9005 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9006 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9007 (OpNode (_.VT _.RC:$src1),
9008 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009009 (i32 imm:$src3)), itins.rm>,
9010 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009011 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9012 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9013 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9014 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9015 (OpNode (_.VT _.RC:$src1),
9016 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009017 (i32 imm:$src3)), itins.rm>, EVEX_B,
9018 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009019 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009020}
9021
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009022//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9023// op(reg_vec2,mem_vec,imm)
Igor Breger2ae0fe32015-08-31 11:14:02 +00009024multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009025 OpndItins itins, X86VectorVTInfo DestInfo,
9026 X86VectorVTInfo SrcInfo>{
Craig Topper05948fb2016-08-02 05:11:15 +00009027 let ExeDomain = DestInfo.ExeDomain in {
Igor Breger2ae0fe32015-08-31 11:14:02 +00009028 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9029 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9030 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9031 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9032 (SrcInfo.VT SrcInfo.RC:$src2),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009033 (i8 imm:$src3))), itins.rr>,
9034 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009035 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9036 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9037 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9038 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9039 (SrcInfo.VT (bitconvert
9040 (SrcInfo.LdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009041 (i8 imm:$src3))), itins.rm>,
9042 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009043 }
Igor Breger2ae0fe32015-08-31 11:14:02 +00009044}
9045
9046//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9047// op(reg_vec2,mem_vec,imm)
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009048// op(reg_vec2,broadcast(eltVt),imm)
9049multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009050 OpndItins itins, X86VectorVTInfo _>:
9051 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
Igor Breger2ae0fe32015-08-31 11:14:02 +00009052
Craig Topper05948fb2016-08-02 05:11:15 +00009053 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00009054 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9055 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9056 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9057 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9058 (OpNode (_.VT _.RC:$src1),
9059 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009060 (i8 imm:$src3)), itins.rm>, EVEX_B,
9061 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009062}
9063
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009064//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9065// op(reg_vec2,mem_scalar,imm)
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009066multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009067 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009068 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009069 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009070 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009071 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9072 (OpNode (_.VT _.RC:$src1),
9073 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009074 (i32 imm:$src3)), itins.rr>,
9075 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009076 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Bregere73ef852016-09-11 12:38:46 +00009077 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
Craig Toppere1cac152016-06-07 07:27:54 +00009078 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9079 (OpNode (_.VT _.RC:$src1),
9080 (_.VT (scalar_to_vector
9081 (_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009082 (i32 imm:$src3)), itins.rm>,
9083 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009084 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009085}
9086
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009087//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9088multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009089 SDNode OpNode, OpndItins itins,
9090 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009091 let ExeDomain = _.ExeDomain in
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009092 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009093 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009094 OpcodeStr, "$src3, {sae}, $src2, $src1",
9095 "$src1, $src2, {sae}, $src3",
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009096 (OpNode (_.VT _.RC:$src1),
9097 (_.VT _.RC:$src2),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009098 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009099 (i32 FROUND_NO_EXC)), itins.rr>,
9100 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009101}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009102
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009103//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009104multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9105 OpndItins itins, X86VectorVTInfo _> {
Craig Toppercac5d692017-02-26 06:45:37 +00009106 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009107 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9108 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009109 OpcodeStr, "$src3, {sae}, $src2, $src1",
9110 "$src1, $src2, {sae}, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009111 (OpNode (_.VT _.RC:$src1),
9112 (_.VT _.RC:$src2),
9113 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009114 (i32 FROUND_NO_EXC)), itins.rr>,
9115 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009116}
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009117
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009118multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009119 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009120 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009121 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009122 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9123 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009124 EVEX_V512;
9125
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009126 }
9127 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009128 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009129 EVEX_V128;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009130 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009131 EVEX_V256;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009132 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009133}
9134
Igor Breger2ae0fe32015-08-31 11:14:02 +00009135multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009136 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9137 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +00009138 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009139 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009140 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9141 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009142 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009143 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009144 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009145 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009146 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9147 }
9148}
9149
Igor Breger00d9f842015-06-08 14:03:17 +00009150multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009151 bits<8> opc, SDNode OpNode, OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009152 Predicate Pred = HasAVX512> {
9153 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009154 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Igor Breger00d9f842015-06-08 14:03:17 +00009155 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009156 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009157 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9158 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Igor Breger00d9f842015-06-08 14:03:17 +00009159 }
9160}
9161
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009162multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009163 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009164 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009165 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009166 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9167 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009168 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009169}
9170
Igor Breger1e58e8a2015-09-02 11:18:55 +00009171multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009172 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009173 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
Igor Breger1e58e8a2015-09-02 11:18:55 +00009174 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009175 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9176 EVEX_CD8<32, CD8VF>;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009177 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009178 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9179 EVEX_CD8<64, CD8VF>, VEX_W;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009180}
9181
Igor Breger1e58e8a2015-09-02 11:18:55 +00009182defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009183 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
Craig Topper0af48f12017-11-13 02:02:58 +00009184 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009185defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009186 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009187 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009188defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009189 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009190 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009191
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009192defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009193 0x50, X86VRange, X86VRangeRnd,
9194 SSE_ALU_F64P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009195 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9196defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009197 0x50, X86VRange, X86VRangeRnd,
9198 SSE_ALU_F32P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009199 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9200
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009201defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9202 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009203 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9204defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009205 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009206 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9207
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009208defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009209 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009210 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9211defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009212 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009213 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009214
Igor Breger1e58e8a2015-09-02 11:18:55 +00009215defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009216 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009217 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9218defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009219 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009220 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9221
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009222let Predicates = [HasAVX512] in {
9223def : Pat<(v16f32 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009224 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009225def : Pat<(v16f32 (fnearbyint VR512:$src)),
9226 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9227def : Pat<(v16f32 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009228 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009229def : Pat<(v16f32 (frint VR512:$src)),
9230 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9231def : Pat<(v16f32 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009232 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009233
9234def : Pat<(v8f64 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009235 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009236def : Pat<(v8f64 (fnearbyint VR512:$src)),
9237 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9238def : Pat<(v8f64 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009239 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009240def : Pat<(v8f64 (frint VR512:$src)),
9241 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9242def : Pat<(v8f64 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009243 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009244}
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009245
Craig Topperac2508252017-11-11 21:44:51 +00009246let Predicates = [HasVLX] in {
9247def : Pat<(v4f32 (ffloor VR128X:$src)),
9248 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9249def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9250 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9251def : Pat<(v4f32 (fceil VR128X:$src)),
9252 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9253def : Pat<(v4f32 (frint VR128X:$src)),
9254 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9255def : Pat<(v4f32 (ftrunc VR128X:$src)),
9256 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9257
9258def : Pat<(v2f64 (ffloor VR128X:$src)),
9259 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9260def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9261 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9262def : Pat<(v2f64 (fceil VR128X:$src)),
9263 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9264def : Pat<(v2f64 (frint VR128X:$src)),
9265 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9266def : Pat<(v2f64 (ftrunc VR128X:$src)),
9267 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9268
9269def : Pat<(v8f32 (ffloor VR256X:$src)),
9270 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9271def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9272 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9273def : Pat<(v8f32 (fceil VR256X:$src)),
9274 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9275def : Pat<(v8f32 (frint VR256X:$src)),
9276 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9277def : Pat<(v8f32 (ftrunc VR256X:$src)),
9278 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9279
9280def : Pat<(v4f64 (ffloor VR256X:$src)),
9281 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9282def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9283 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9284def : Pat<(v4f64 (fceil VR256X:$src)),
9285 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9286def : Pat<(v4f64 (frint VR256X:$src)),
9287 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9288def : Pat<(v4f64 (ftrunc VR256X:$src)),
9289 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9290}
9291
Simon Pilgrim36be8522017-11-29 18:52:20 +00009292multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9293 AVX512VLVectorVTInfo _, bits<8> opc>{
Craig Topper42a53532017-08-16 23:38:25 +00009294 let Predicates = [HasAVX512] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009295 defm Z : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
Craig Topper42a53532017-08-16 23:38:25 +00009296
9297 }
9298 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009299 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
Craig Topper42a53532017-08-16 23:38:25 +00009300 }
9301}
9302
Simon Pilgrim36be8522017-11-29 18:52:20 +00009303defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
9304 avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9305defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
9306 avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9307defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
9308 avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9309defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
9310 avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger00d9f842015-06-08 14:03:17 +00009311
Craig Topperb561e662017-01-19 02:34:29 +00009312let Predicates = [HasAVX512] in {
9313// Provide fallback in case the load node that is used in the broadcast
9314// patterns above is used by additional users, which prevents the pattern
9315// selection.
9316def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9317 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9318 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9319 0)>;
9320def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9321 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9322 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9323 0)>;
9324
9325def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9326 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9327 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9328 0)>;
9329def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9330 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9331 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9332 0)>;
9333
9334def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9335 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9336 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9337 0)>;
9338
9339def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9340 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9341 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9342 0)>;
9343}
9344
Simon Pilgrim36be8522017-11-29 18:52:20 +00009345multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9346 AVX512VLVectorVTInfo VTInfo_I> {
9347 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
Igor Breger00d9f842015-06-08 14:03:17 +00009348 AVX512AIi8Base, EVEX_4V;
Igor Breger00d9f842015-06-08 14:03:17 +00009349}
9350
Simon Pilgrim36be8522017-11-29 18:52:20 +00009351defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009352 EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009353defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009354 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009355
Simon Pilgrim36be8522017-11-29 18:52:20 +00009356defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009357 avx512vl_i8_info, avx512vl_i8_info>,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009358 EVEX_CD8<8, CD8VF>;
9359
Craig Topper333897e2017-11-03 06:48:02 +00009360// Fragments to help convert valignq into masked valignd. Or valignq/valignd
9361// into vpalignr.
9362def ValignqImm32XForm : SDNodeXForm<imm, [{
9363 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9364}]>;
9365def ValignqImm8XForm : SDNodeXForm<imm, [{
9366 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9367}]>;
9368def ValigndImm8XForm : SDNodeXForm<imm, [{
9369 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9370}]>;
9371
9372multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9373 X86VectorVTInfo From, X86VectorVTInfo To,
9374 SDNodeXForm ImmXForm> {
9375 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9376 (bitconvert
9377 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9378 imm:$src3))),
9379 To.RC:$src0)),
9380 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9381 To.RC:$src1, To.RC:$src2,
9382 (ImmXForm imm:$src3))>;
9383
9384 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9385 (bitconvert
9386 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9387 imm:$src3))),
9388 To.ImmAllZerosV)),
9389 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9390 To.RC:$src1, To.RC:$src2,
9391 (ImmXForm imm:$src3))>;
9392
9393 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9394 (bitconvert
9395 (From.VT (OpNode From.RC:$src1,
9396 (bitconvert (To.LdFrag addr:$src2)),
9397 imm:$src3))),
9398 To.RC:$src0)),
9399 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9400 To.RC:$src1, addr:$src2,
9401 (ImmXForm imm:$src3))>;
9402
9403 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9404 (bitconvert
9405 (From.VT (OpNode From.RC:$src1,
9406 (bitconvert (To.LdFrag addr:$src2)),
9407 imm:$src3))),
9408 To.ImmAllZerosV)),
9409 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9410 To.RC:$src1, addr:$src2,
9411 (ImmXForm imm:$src3))>;
9412}
9413
9414multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9415 X86VectorVTInfo From,
9416 X86VectorVTInfo To,
9417 SDNodeXForm ImmXForm> :
9418 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9419 def : Pat<(From.VT (OpNode From.RC:$src1,
9420 (bitconvert (To.VT (X86VBroadcast
9421 (To.ScalarLdFrag addr:$src2)))),
9422 imm:$src3)),
9423 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9424 (ImmXForm imm:$src3))>;
9425
9426 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9427 (bitconvert
9428 (From.VT (OpNode From.RC:$src1,
9429 (bitconvert
9430 (To.VT (X86VBroadcast
9431 (To.ScalarLdFrag addr:$src2)))),
9432 imm:$src3))),
9433 To.RC:$src0)),
9434 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9435 To.RC:$src1, addr:$src2,
9436 (ImmXForm imm:$src3))>;
9437
9438 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9439 (bitconvert
9440 (From.VT (OpNode From.RC:$src1,
9441 (bitconvert
9442 (To.VT (X86VBroadcast
9443 (To.ScalarLdFrag addr:$src2)))),
9444 imm:$src3))),
9445 To.ImmAllZerosV)),
9446 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9447 To.RC:$src1, addr:$src2,
9448 (ImmXForm imm:$src3))>;
9449}
9450
9451let Predicates = [HasAVX512] in {
9452 // For 512-bit we lower to the widest element type we can. So we only need
9453 // to handle converting valignq to valignd.
9454 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9455 v16i32_info, ValignqImm32XForm>;
9456}
9457
9458let Predicates = [HasVLX] in {
9459 // For 128-bit we lower to the widest element type we can. So we only need
9460 // to handle converting valignq to valignd.
9461 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9462 v4i32x_info, ValignqImm32XForm>;
9463 // For 256-bit we lower to the widest element type we can. So we only need
9464 // to handle converting valignq to valignd.
9465 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9466 v8i32x_info, ValignqImm32XForm>;
9467}
9468
9469let Predicates = [HasVLX, HasBWI] in {
9470 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9471 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9472 v16i8x_info, ValignqImm8XForm>;
9473 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9474 v16i8x_info, ValigndImm8XForm>;
9475}
9476
Simon Pilgrim36be8522017-11-29 18:52:20 +00009477defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9478 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9479 EVEX_CD8<8, CD8VF>;
Igor Bregerf3ded812015-08-31 13:09:30 +00009480
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009481multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009482 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009483 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009484 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger24cab0f2015-11-16 07:22:00 +00009485 (ins _.RC:$src1), OpcodeStr,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009486 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009487 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9488 Sched<[itins.Sched]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009489
Craig Toppere1cac152016-06-07 07:27:54 +00009490 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9491 (ins _.MemOp:$src1), OpcodeStr,
9492 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009493 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9494 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9495 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009496 }
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009497}
9498
9499multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009500 OpndItins itins, X86VectorVTInfo _> :
9501 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009502 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9503 (ins _.ScalarMemOp:$src1), OpcodeStr,
9504 "${src1}"##_.BroadcastStr,
9505 "${src1}"##_.BroadcastStr,
9506 (_.VT (OpNode (X86VBroadcast
Simon Pilgrim756348c2017-11-29 13:49:51 +00009507 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9508 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9509 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009510}
9511
9512multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009513 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9514 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009515 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009516 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9517 EVEX_V512;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009518
9519 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009520 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009521 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009522 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009523 EVEX_V128;
9524 }
9525}
9526
9527multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009528 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9529 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009530 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009531 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009532 EVEX_V512;
9533
9534 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009535 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009536 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009537 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009538 EVEX_V128;
9539 }
9540}
9541
9542multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009543 SDNode OpNode, OpndItins itins, Predicate prd> {
9544 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9545 avx512vl_i64_info, prd>, VEX_W;
9546 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9547 avx512vl_i32_info, prd>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009548}
9549
9550multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009551 SDNode OpNode, OpndItins itins, Predicate prd> {
9552 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9553 avx512vl_i16_info, prd>, VEX_WIG;
9554 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9555 avx512vl_i8_info, prd>, VEX_WIG;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009556}
9557
9558multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9559 bits<8> opc_d, bits<8> opc_q,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009560 string OpcodeStr, SDNode OpNode,
9561 OpndItins itins> {
9562 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009563 HasAVX512>,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009564 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009565 HasBWI>;
9566}
9567
Simon Pilgrim756348c2017-11-29 13:49:51 +00009568defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
Igor Bregerf2460112015-07-26 14:41:44 +00009569
Simon Pilgrimfea153f2017-05-06 19:11:59 +00009570// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9571let Predicates = [HasAVX512, NoVLX] in {
9572 def : Pat<(v4i64 (abs VR256X:$src)),
9573 (EXTRACT_SUBREG
9574 (VPABSQZrr
9575 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9576 sub_ymm)>;
9577 def : Pat<(v2i64 (abs VR128X:$src)),
9578 (EXTRACT_SUBREG
9579 (VPABSQZrr
9580 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9581 sub_xmm)>;
9582}
9583
Craig Topperc0896052017-12-16 02:40:28 +00009584// Use 512bit version to implement 128/256 bit.
9585multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9586 AVX512VLVectorVTInfo _, Predicate prd> {
9587 let Predicates = [prd, NoVLX] in {
9588 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9589 (EXTRACT_SUBREG
9590 (!cast<Instruction>(InstrStr # "Zrr")
9591 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9592 _.info256.RC:$src1,
9593 _.info256.SubRegIdx)),
9594 _.info256.SubRegIdx)>;
9595
9596 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9597 (EXTRACT_SUBREG
9598 (!cast<Instruction>(InstrStr # "Zrr")
9599 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9600 _.info128.RC:$src1,
9601 _.info128.SubRegIdx)),
9602 _.info128.SubRegIdx)>;
9603 }
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009604}
9605
Simon Pilgrim756348c2017-11-29 13:49:51 +00009606// FIXME: Is there a better scheduler itinerary for VPLZCNT?
Craig Topperc0896052017-12-16 02:40:28 +00009607defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9608 SSE_INTALU_ITINS_P, HasCDI>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009609
9610// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9611defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9612 SSE_INTALU_ITINS_P, HasCDI>;
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009613
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009614// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topperc0896052017-12-16 02:40:28 +00009615defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9616defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009617
Igor Breger24cab0f2015-11-16 07:22:00 +00009618//===---------------------------------------------------------------------===//
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009619// Counts number of ones - VPOPCNTD and VPOPCNTQ
9620//===---------------------------------------------------------------------===//
9621
Simon Pilgrim756348c2017-11-29 13:49:51 +00009622// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
Craig Topperc0896052017-12-16 02:40:28 +00009623defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9624 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009625
Craig Topperc0896052017-12-16 02:40:28 +00009626defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9627defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009628
9629//===---------------------------------------------------------------------===//
Igor Breger24cab0f2015-11-16 07:22:00 +00009630// Replicate Single FP - MOVSHDUP and MOVSLDUP
9631//===---------------------------------------------------------------------===//
Simon Pilgrim756348c2017-11-29 13:49:51 +00009632multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9633 OpndItins itins> {
9634 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9635 avx512vl_f32_info, HasAVX512>, XS;
Igor Breger24cab0f2015-11-16 07:22:00 +00009636}
9637
Simon Pilgrim756348c2017-11-29 13:49:51 +00009638defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9639defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009640
9641//===----------------------------------------------------------------------===//
9642// AVX-512 - MOVDDUP
9643//===----------------------------------------------------------------------===//
9644
9645multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009646 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009647 let ExeDomain = _.ExeDomain in {
Igor Breger1f782962015-11-19 08:26:56 +00009648 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9649 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009650 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9651 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009652 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9653 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9654 (_.VT (OpNode (_.VT (scalar_to_vector
Simon Pilgrim756348c2017-11-29 13:49:51 +00009655 (_.ScalarLdFrag addr:$src))))),
9656 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9657 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009658 }
Igor Breger1f782962015-11-19 08:26:56 +00009659}
9660
9661multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009662 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Igor Breger1f782962015-11-19 08:26:56 +00009663
Simon Pilgrim756348c2017-11-29 13:49:51 +00009664 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
Igor Breger1f782962015-11-19 08:26:56 +00009665
9666 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009667 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
Igor Breger1f782962015-11-19 08:26:56 +00009668 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009669 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
Craig Topperf6c69562017-10-13 21:56:48 +00009670 EVEX_V128;
Igor Breger1f782962015-11-19 08:26:56 +00009671 }
9672}
9673
Simon Pilgrim756348c2017-11-29 13:49:51 +00009674multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9675 OpndItins itins> {
9676 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
Igor Breger1f782962015-11-19 08:26:56 +00009677 avx512vl_f64_info>, XD, VEX_W;
Igor Breger1f782962015-11-19 08:26:56 +00009678}
9679
Simon Pilgrim756348c2017-11-29 13:49:51 +00009680defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009681
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009682let Predicates = [HasVLX] in {
Igor Breger1f782962015-11-19 08:26:56 +00009683def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009684 (VMOVDDUPZ128rm addr:$src)>;
9685def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9686 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topperf6c69562017-10-13 21:56:48 +00009687def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9688 (VMOVDDUPZ128rm addr:$src)>;
Craig Topperda84ff32017-01-07 22:20:23 +00009689
9690def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9691 (v2f64 VR128X:$src0)),
9692 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9693 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9694def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9695 (bitconvert (v4i32 immAllZerosV))),
9696 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9697
9698def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9699 (v2f64 VR128X:$src0)),
9700 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9701def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9702 (bitconvert (v4i32 immAllZerosV))),
9703 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topperf6c69562017-10-13 21:56:48 +00009704
9705def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9706 (v2f64 VR128X:$src0)),
9707 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9708def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9709 (bitconvert (v4i32 immAllZerosV))),
9710 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009711}
Igor Breger1f782962015-11-19 08:26:56 +00009712
Igor Bregerf2460112015-07-26 14:41:44 +00009713//===----------------------------------------------------------------------===//
9714// AVX-512 - Unpack Instructions
9715//===----------------------------------------------------------------------===//
Craig Topper9433f972016-08-02 06:16:53 +00009716defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9717 SSE_ALU_ITINS_S>;
9718defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9719 SSE_ALU_ITINS_S>;
Igor Bregerf2460112015-07-26 14:41:44 +00009720
9721defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9722 SSE_INTALU_ITINS_P, HasBWI>;
9723defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9724 SSE_INTALU_ITINS_P, HasBWI>;
9725defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9726 SSE_INTALU_ITINS_P, HasBWI>;
9727defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9728 SSE_INTALU_ITINS_P, HasBWI>;
9729
9730defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9731 SSE_INTALU_ITINS_P, HasAVX512>;
9732defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9733 SSE_INTALU_ITINS_P, HasAVX512>;
9734defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9735 SSE_INTALU_ITINS_P, HasAVX512>;
9736defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9737 SSE_INTALU_ITINS_P, HasAVX512>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009738
9739//===----------------------------------------------------------------------===//
9740// AVX-512 - Extract & Insert Integer Instructions
9741//===----------------------------------------------------------------------===//
9742
9743multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9744 X86VectorVTInfo _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009745 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9746 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9747 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim1dcb9132017-10-23 16:00:57 +00009748 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9749 addr:$dst)]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009750 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009751}
9752
9753multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9754 let Predicates = [HasBWI] in {
9755 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9756 (ins _.RC:$src1, u8imm:$src2),
9757 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9758 [(set GR32orGR64:$dst,
9759 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009760 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009761
9762 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9763 }
9764}
9765
9766multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9767 let Predicates = [HasBWI] in {
9768 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9769 (ins _.RC:$src1, u8imm:$src2),
9770 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9771 [(set GR32orGR64:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009772 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9773 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009774
Craig Topper99f6b622016-05-01 01:03:56 +00009775 let hasSideEffects = 0 in
Igor Breger55747302015-11-18 08:46:16 +00009776 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9777 (ins _.RC:$src1, u8imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00009778 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9779 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9780 Sched<[WriteShuffle]>;
Igor Breger55747302015-11-18 08:46:16 +00009781
Igor Bregerdefab3c2015-10-08 12:55:01 +00009782 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9783 }
9784}
9785
9786multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9787 RegisterClass GRC> {
9788 let Predicates = [HasDQI] in {
9789 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9790 (ins _.RC:$src1, u8imm:$src2),
9791 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9792 [(set GRC:$dst,
9793 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009794 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009795
Craig Toppere1cac152016-06-07 07:27:54 +00009796 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9797 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9798 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9799 [(store (extractelt (_.VT _.RC:$src1),
9800 imm:$src2),addr:$dst)]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009801 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
9802 Sched<[WriteShuffleLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009803 }
9804}
9805
Craig Toppera33846a2017-10-22 06:18:23 +00009806defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9807defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009808defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9809defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9810
9811multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9812 X86VectorVTInfo _, PatFrag LdFrag> {
9813 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9814 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9815 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9816 [(set _.RC:$dst,
9817 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009818 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009819}
9820
9821multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9822 X86VectorVTInfo _, PatFrag LdFrag> {
9823 let Predicates = [HasBWI] in {
9824 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9825 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9826 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9827 [(set _.RC:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009828 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
9829 Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009830
9831 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9832 }
9833}
9834
9835multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9836 X86VectorVTInfo _, RegisterClass GRC> {
9837 let Predicates = [HasDQI] in {
9838 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9839 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9840 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9841 [(set _.RC:$dst,
9842 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009843 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009844
9845 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9846 _.ScalarLdFrag>, TAPD;
9847 }
9848}
9849
9850defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
Craig Toppera33846a2017-10-22 06:18:23 +00009851 extloadi8>, TAPD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009852defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
Craig Toppera33846a2017-10-22 06:18:23 +00009853 extloadi16>, PD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009854defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9855defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009856
Igor Bregera6297c72015-09-02 10:50:58 +00009857//===----------------------------------------------------------------------===//
9858// VSHUFPS - VSHUFPD Operations
9859//===----------------------------------------------------------------------===//
Simon Pilgrim36be8522017-11-29 18:52:20 +00009860
Igor Bregera6297c72015-09-02 10:50:58 +00009861multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9862 AVX512VLVectorVTInfo VTInfo_FP>{
Simon Pilgrim36be8522017-11-29 18:52:20 +00009863 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
9864 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9865 AVX512AIi8Base, EVEX_4V;
Igor Bregera6297c72015-09-02 10:50:58 +00009866}
9867
9868defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9869defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009870
Asaf Badouhd2c35992015-09-02 14:21:54 +00009871//===----------------------------------------------------------------------===//
9872// AVX-512 - Byte shift Left/Right
9873//===----------------------------------------------------------------------===//
9874
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009875let Sched = WriteVecShift in
9876def AVX512_BYTESHIFT : OpndItins<
9877 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
9878>;
9879
Asaf Badouhd2c35992015-09-02 14:21:54 +00009880multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009881 Format MRMm, string OpcodeStr,
9882 OpndItins itins, X86VectorVTInfo _>{
Asaf Badouhd2c35992015-09-02 14:21:54 +00009883 def rr : AVX512<opc, MRMr,
9884 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9885 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009886 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
9887 itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009888 def rm : AVX512<opc, MRMm,
9889 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9890 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9891 [(set _.RC:$dst,(_.VT (OpNode
Simon Pilgrim255fdd02016-06-11 12:54:37 +00009892 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009893 (i8 imm:$src2))))], itins.rm>,
9894 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009895}
9896
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009897multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009898 Format MRMm, string OpcodeStr,
9899 OpndItins itins, Predicate prd>{
Asaf Badouhd2c35992015-09-02 14:21:54 +00009900 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +00009901 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9902 OpcodeStr, itins, v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009903 let Predicates = [prd, HasVLX] in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009904 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009905 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009906 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009907 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009908 }
9909}
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009910defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009911 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9912 EVEX_4V, VEX_WIG;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009913defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009914 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9915 EVEX_4V, VEX_WIG;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009916
9917
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009918multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009919 string OpcodeStr, OpndItins itins,
9920 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
Asaf Badouhd2c35992015-09-02 14:21:54 +00009921 def rr : AVX512BI<opc, MRMSrcReg,
Cong Houdb6220f2015-11-24 19:51:26 +00009922 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
Asaf Badouhd2c35992015-09-02 14:21:54 +00009923 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Cong Houdb6220f2015-11-24 19:51:26 +00009924 [(set _dst.RC:$dst,(_dst.VT
9925 (OpNode (_src.VT _src.RC:$src1),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009926 (_src.VT _src.RC:$src2))))], itins.rr>,
9927 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009928 def rm : AVX512BI<opc, MRMSrcMem,
9929 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
9930 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9931 [(set _dst.RC:$dst,(_dst.VT
9932 (OpNode (_src.VT _src.RC:$src1),
9933 (_src.VT (bitconvert
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009934 (_src.LdFrag addr:$src2))))))], itins.rm>,
9935 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009936}
9937
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009938multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009939 string OpcodeStr, OpndItins itins,
9940 Predicate prd> {
Asaf Badouhd2c35992015-09-02 14:21:54 +00009941 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +00009942 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
9943 v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009944 let Predicates = [prd, HasVLX] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009945 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +00009946 v32i8x_info>, EVEX_V256;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009947 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +00009948 v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009949 }
9950}
9951
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009952defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009953 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
Igor Bregerb4bb1902015-10-15 12:33:24 +00009954
Craig Topper4e794c72017-02-19 19:36:58 +00009955// Transforms to swizzle an immediate to enable better matching when
9956// memory operand isn't in the right place.
9957def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
9958 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
9959 uint8_t Imm = N->getZExtValue();
9960 // Swap bits 1/4 and 3/6.
9961 uint8_t NewImm = Imm & 0xa5;
9962 if (Imm & 0x02) NewImm |= 0x10;
9963 if (Imm & 0x10) NewImm |= 0x02;
9964 if (Imm & 0x08) NewImm |= 0x40;
9965 if (Imm & 0x40) NewImm |= 0x08;
9966 return getI8Imm(NewImm, SDLoc(N));
9967}]>;
9968def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
9969 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9970 uint8_t Imm = N->getZExtValue();
9971 // Swap bits 2/4 and 3/5.
9972 uint8_t NewImm = Imm & 0xc3;
Craig Toppera5fa2e42017-02-20 07:00:34 +00009973 if (Imm & 0x04) NewImm |= 0x10;
9974 if (Imm & 0x10) NewImm |= 0x04;
Craig Topper4e794c72017-02-19 19:36:58 +00009975 if (Imm & 0x08) NewImm |= 0x20;
9976 if (Imm & 0x20) NewImm |= 0x08;
9977 return getI8Imm(NewImm, SDLoc(N));
9978}]>;
Craig Topper48905772017-02-19 21:32:15 +00009979def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
9980 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9981 uint8_t Imm = N->getZExtValue();
9982 // Swap bits 1/2 and 5/6.
9983 uint8_t NewImm = Imm & 0x99;
9984 if (Imm & 0x02) NewImm |= 0x04;
9985 if (Imm & 0x04) NewImm |= 0x02;
9986 if (Imm & 0x20) NewImm |= 0x40;
9987 if (Imm & 0x40) NewImm |= 0x20;
9988 return getI8Imm(NewImm, SDLoc(N));
9989}]>;
Craig Topperc6c68f52017-02-20 07:00:40 +00009990def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
9991 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
9992 uint8_t Imm = N->getZExtValue();
9993 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
9994 uint8_t NewImm = Imm & 0x81;
9995 if (Imm & 0x02) NewImm |= 0x04;
9996 if (Imm & 0x04) NewImm |= 0x10;
9997 if (Imm & 0x08) NewImm |= 0x40;
9998 if (Imm & 0x10) NewImm |= 0x02;
9999 if (Imm & 0x20) NewImm |= 0x08;
10000 if (Imm & 0x40) NewImm |= 0x20;
10001 return getI8Imm(NewImm, SDLoc(N));
10002}]>;
10003def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
10004 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
10005 uint8_t Imm = N->getZExtValue();
10006 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
10007 uint8_t NewImm = Imm & 0x81;
10008 if (Imm & 0x02) NewImm |= 0x10;
10009 if (Imm & 0x04) NewImm |= 0x02;
10010 if (Imm & 0x08) NewImm |= 0x20;
10011 if (Imm & 0x10) NewImm |= 0x04;
10012 if (Imm & 0x20) NewImm |= 0x40;
10013 if (Imm & 0x40) NewImm |= 0x08;
10014 return getI8Imm(NewImm, SDLoc(N));
10015}]>;
Craig Topper4e794c72017-02-19 19:36:58 +000010016
Igor Bregerb4bb1902015-10-15 12:33:24 +000010017multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010018 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010019 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010020 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10021 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
Igor Breger252c2d92016-02-22 12:37:41 +000010022 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
Igor Bregerb4bb1902015-10-15 12:33:24 +000010023 (OpNode (_.VT _.RC:$src1),
10024 (_.VT _.RC:$src2),
10025 (_.VT _.RC:$src3),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010026 (i8 imm:$src4)), itins.rr, 1, 1>,
10027 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010028 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10029 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10030 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10031 (OpNode (_.VT _.RC:$src1),
10032 (_.VT _.RC:$src2),
10033 (_.VT (bitconvert (_.LdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010034 (i8 imm:$src4)), itins.rm, 1, 0>,
10035 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10036 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010037 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10038 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10039 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10040 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10041 (OpNode (_.VT _.RC:$src1),
10042 (_.VT _.RC:$src2),
10043 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010044 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10045 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10046 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010047 }// Constraints = "$src1 = $dst"
Craig Topper4e794c72017-02-19 19:36:58 +000010048
10049 // Additional patterns for matching passthru operand in other positions.
Craig Topper4e794c72017-02-19 19:36:58 +000010050 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10051 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10052 _.RC:$src1)),
10053 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10054 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10055 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10056 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10057 _.RC:$src1)),
10058 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10059 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010060
10061 // Additional patterns for matching loads in other positions.
10062 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10063 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10064 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10065 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10066 def : Pat<(_.VT (OpNode _.RC:$src1,
10067 (bitconvert (_.LdFrag addr:$src3)),
10068 _.RC:$src2, (i8 imm:$src4))),
10069 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10070 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10071
10072 // Additional patterns for matching zero masking with loads in other
10073 // positions.
Craig Topper48905772017-02-19 21:32:15 +000010074 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10075 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10076 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10077 _.ImmAllZerosV)),
10078 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10079 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10080 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10081 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10082 _.RC:$src2, (i8 imm:$src4)),
10083 _.ImmAllZerosV)),
10084 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10085 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010086
10087 // Additional patterns for matching masked loads with different
10088 // operand orders.
Craig Topper48905772017-02-19 21:32:15 +000010089 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10090 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10091 _.RC:$src2, (i8 imm:$src4)),
10092 _.RC:$src1)),
10093 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10094 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010095 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10096 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10097 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10098 _.RC:$src1)),
10099 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10100 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10101 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10102 (OpNode _.RC:$src2, _.RC:$src1,
10103 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10104 _.RC:$src1)),
10105 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10106 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10107 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10108 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10109 _.RC:$src1, (i8 imm:$src4)),
10110 _.RC:$src1)),
10111 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10112 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10113 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10114 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10115 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10116 _.RC:$src1)),
10117 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10118 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Craig Topper5b4e36a2017-02-20 02:47:42 +000010119
10120 // Additional patterns for matching broadcasts in other positions.
10121 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10122 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10123 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10124 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10125 def : Pat<(_.VT (OpNode _.RC:$src1,
10126 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10127 _.RC:$src2, (i8 imm:$src4))),
10128 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10129 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10130
10131 // Additional patterns for matching zero masking with broadcasts in other
10132 // positions.
10133 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10134 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10135 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10136 _.ImmAllZerosV)),
10137 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10138 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10139 (VPTERNLOG321_imm8 imm:$src4))>;
10140 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10141 (OpNode _.RC:$src1,
10142 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10143 _.RC:$src2, (i8 imm:$src4)),
10144 _.ImmAllZerosV)),
10145 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10146 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10147 (VPTERNLOG132_imm8 imm:$src4))>;
10148
10149 // Additional patterns for matching masked broadcasts with different
10150 // operand orders.
10151 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10152 (OpNode _.RC:$src1,
10153 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10154 _.RC:$src2, (i8 imm:$src4)),
10155 _.RC:$src1)),
10156 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10157 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper2012dda2017-02-20 17:44:09 +000010158 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10159 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10160 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10161 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010162 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010163 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10164 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10165 (OpNode _.RC:$src2, _.RC:$src1,
10166 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10167 (i8 imm:$src4)), _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010168 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010169 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10170 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10171 (OpNode _.RC:$src2,
10172 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10173 _.RC:$src1, (i8 imm:$src4)),
10174 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010175 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010176 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10177 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10178 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10179 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10180 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010181 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010182 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010183}
10184
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010185multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10186 AVX512VLVectorVTInfo _> {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010187 let Predicates = [HasAVX512] in
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010188 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010189 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010190 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10191 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010192 }
10193}
10194
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010195defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10196 avx512vl_i32_info>;
10197defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10198 avx512vl_i64_info>, VEX_W;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010199
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010200//===----------------------------------------------------------------------===//
10201// AVX-512 - FixupImm
10202//===----------------------------------------------------------------------===//
10203
10204multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010205 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010206 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010207 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10208 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10209 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10210 (OpNode (_.VT _.RC:$src1),
10211 (_.VT _.RC:$src2),
10212 (_.IntVT _.RC:$src3),
10213 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010214 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010215 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10216 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10217 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10218 (OpNode (_.VT _.RC:$src1),
10219 (_.VT _.RC:$src2),
10220 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10221 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010222 (i32 FROUND_CURRENT)), itins.rm>,
10223 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010224 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10225 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10226 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10227 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10228 (OpNode (_.VT _.RC:$src1),
10229 (_.VT _.RC:$src2),
10230 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10231 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010232 (i32 FROUND_CURRENT)), itins.rm>,
10233 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010234 } // Constraints = "$src1 = $dst"
10235}
10236
10237multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010238 SDNode OpNode, OpndItins itins,
10239 X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010240let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010241 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10242 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010243 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010244 "$src2, $src3, {sae}, $src4",
10245 (OpNode (_.VT _.RC:$src1),
10246 (_.VT _.RC:$src2),
10247 (_.IntVT _.RC:$src3),
10248 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010249 (i32 FROUND_NO_EXC)), itins.rr>,
10250 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010251 }
10252}
10253
10254multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010255 OpndItins itins, X86VectorVTInfo _,
10256 X86VectorVTInfo _src3VT> {
Craig Topper05948fb2016-08-02 05:11:15 +000010257 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10258 ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010259 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10260 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10261 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10262 (OpNode (_.VT _.RC:$src1),
10263 (_.VT _.RC:$src2),
10264 (_src3VT.VT _src3VT.RC:$src3),
10265 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010266 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010267 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10268 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10269 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10270 "$src2, $src3, {sae}, $src4",
10271 (OpNode (_.VT _.RC:$src1),
10272 (_.VT _.RC:$src2),
10273 (_src3VT.VT _src3VT.RC:$src3),
10274 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010275 (i32 FROUND_NO_EXC)), itins.rm>,
10276 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010277 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10278 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10279 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10280 (OpNode (_.VT _.RC:$src1),
10281 (_.VT _.RC:$src2),
10282 (_src3VT.VT (scalar_to_vector
10283 (_src3VT.ScalarLdFrag addr:$src3))),
10284 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010285 (i32 FROUND_CURRENT)), itins.rm>,
10286 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010287 }
10288}
10289
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010290multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010291 let Predicates = [HasAVX512] in
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010292 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10293 _Vec.info512>,
10294 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10295 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010296 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010297 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10298 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10299 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10300 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010301 }
10302}
10303
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010304defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010305 SSE_ALU_F32S, f32x_info, v4i32x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010306 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010307defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010308 SSE_ALU_F64S, f64x_info, v2i64x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010309 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010310defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010311 EVEX_CD8<32, CD8VF>;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010312defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010313 EVEX_CD8<64, CD8VF>, VEX_W;
Craig Topper5625d242016-07-29 06:06:00 +000010314
10315
10316
10317// Patterns used to select SSE scalar fp arithmetic instructions from
10318// either:
10319//
10320// (1) a scalar fp operation followed by a blend
10321//
10322// The effect is that the backend no longer emits unnecessary vector
10323// insert instructions immediately after SSE scalar fp instructions
10324// like addss or mulss.
10325//
10326// For example, given the following code:
10327// __m128 foo(__m128 A, __m128 B) {
10328// A[0] += B[0];
10329// return A;
10330// }
10331//
10332// Previously we generated:
10333// addss %xmm0, %xmm1
10334// movss %xmm1, %xmm0
10335//
10336// We now generate:
10337// addss %xmm1, %xmm0
10338//
10339// (2) a vector packed single/double fp operation followed by a vector insert
10340//
10341// The effect is that the backend converts the packed fp instruction
10342// followed by a vector insert into a single SSE scalar fp instruction.
10343//
10344// For example, given the following code:
10345// __m128 foo(__m128 A, __m128 B) {
10346// __m128 C = A + B;
10347// return (__m128) {c[0], a[1], a[2], a[3]};
10348// }
10349//
10350// Previously we generated:
10351// addps %xmm0, %xmm1
10352// movss %xmm1, %xmm0
10353//
10354// We now generate:
10355// addss %xmm1, %xmm0
10356
10357// TODO: Some canonicalization in lowering would simplify the number of
10358// patterns we have to try to match.
10359multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10360 let Predicates = [HasAVX512] in {
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010361 // extracted scalar math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010362 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10363 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10364 FR32X:$src))))),
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010365 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010366 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010367
Craig Topper5625d242016-07-29 06:06:00 +000010368 // vector math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010369 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10370 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010371 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10372
Craig Topper83f21452016-12-27 01:56:24 +000010373 // extracted masked scalar math op with insert via movss
10374 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10375 (scalar_to_vector
10376 (X86selects VK1WM:$mask,
10377 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10378 FR32X:$src2),
10379 FR32X:$src0))),
10380 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10381 VK1WM:$mask, v4f32:$src1,
10382 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010383 }
10384}
10385
10386defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10387defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10388defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10389defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10390
10391multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10392 let Predicates = [HasAVX512] in {
10393 // extracted scalar math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010394 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10395 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10396 FR64X:$src))))),
Craig Topper5625d242016-07-29 06:06:00 +000010397 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010398 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010399
Craig Topper5625d242016-07-29 06:06:00 +000010400 // vector math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010401 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10402 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010403 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10404
Craig Topper83f21452016-12-27 01:56:24 +000010405 // extracted masked scalar math op with insert via movss
10406 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10407 (scalar_to_vector
10408 (X86selects VK1WM:$mask,
10409 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10410 FR64X:$src2),
10411 FR64X:$src0))),
10412 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10413 VK1WM:$mask, v2f64:$src1,
10414 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010415 }
10416}
10417
10418defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10419defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10420defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10421defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010422
10423//===----------------------------------------------------------------------===//
10424// AES instructions
10425//===----------------------------------------------------------------------===//
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010426
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010427multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10428 let Predicates = [HasVLX, HasVAES] in {
10429 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10430 !cast<Intrinsic>(IntPrefix),
10431 loadv2i64, 0, VR128X, i128mem>,
10432 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10433 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10434 !cast<Intrinsic>(IntPrefix##"_256"),
10435 loadv4i64, 0, VR256X, i256mem>,
10436 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10437 }
10438 let Predicates = [HasAVX512, HasVAES] in
10439 defm Z : AESI_binop_rm_int<Op, OpStr,
10440 !cast<Intrinsic>(IntPrefix##"_512"),
10441 loadv8i64, 0, VR512, i512mem>,
10442 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10443}
10444
10445defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10446defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10447defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10448defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10449
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010450//===----------------------------------------------------------------------===//
10451// PCLMUL instructions - Carry less multiplication
10452//===----------------------------------------------------------------------===//
10453
10454let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10455defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10456 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10457
10458let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10459defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10460 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10461
10462defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10463 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10464 EVEX_CD8<64, CD8VF>, VEX_WIG;
10465}
10466
10467// Aliases
10468defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10469defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10470defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10471
Coby Tayree71e37cc2017-11-21 09:48:44 +000010472//===----------------------------------------------------------------------===//
10473// VBMI2
10474//===----------------------------------------------------------------------===//
10475
10476multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010477 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010478 let Constraints = "$src1 = $dst",
10479 ExeDomain = VTI.ExeDomain in {
10480 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10481 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10482 "$src3, $src2", "$src2, $src3",
Simon Pilgrim36be8522017-11-29 18:52:20 +000010483 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10484 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010485 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10486 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10487 "$src3, $src2", "$src2, $src3",
10488 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010489 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10490 itins.rm>, AVX512FMA3Base,
10491 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010492 }
10493}
10494
10495multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010496 OpndItins itins, X86VectorVTInfo VTI>
10497 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010498 let Constraints = "$src1 = $dst",
10499 ExeDomain = VTI.ExeDomain in
10500 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10501 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10502 "${src3}"##VTI.BroadcastStr##", $src2",
10503 "$src2, ${src3}"##VTI.BroadcastStr,
10504 (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010505 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10506 itins.rm>, AVX512FMA3Base, EVEX_B,
10507 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010508}
10509
10510multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010511 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010512 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010513 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010514 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010515 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10516 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010517 }
10518}
10519
10520multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010521 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010522 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010523 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010524 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010525 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10526 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010527 }
10528}
10529multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010530 SDNode OpNode, OpndItins itins> {
10531 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010532 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010533 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010534 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010535 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010536 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10537}
10538
10539multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010540 SDNode OpNode, OpndItins itins> {
10541 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10542 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10543 VEX_W, EVEX_CD8<16, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010544 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010545 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010546 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010547 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010548}
10549
10550// Concat & Shift
Simon Pilgrim36be8522017-11-29 18:52:20 +000010551defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10552defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10553defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10554defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10555
Coby Tayree71e37cc2017-11-21 09:48:44 +000010556// Compress
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010557defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10558 avx512vl_i8_info, HasVBMI2>, EVEX;
10559defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10560 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010561// Expand
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010562defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10563 avx512vl_i8_info, HasVBMI2>, EVEX;
10564defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10565 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010566
Coby Tayree3880f2a2017-11-21 10:04:28 +000010567//===----------------------------------------------------------------------===//
10568// VNNI
10569//===----------------------------------------------------------------------===//
10570
10571let Constraints = "$src1 = $dst" in
10572multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010573 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010574 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10575 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10576 "$src3, $src2", "$src2, $src3",
10577 (VTI.VT (OpNode VTI.RC:$src1,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010578 VTI.RC:$src2, VTI.RC:$src3)),
10579 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010580 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10581 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10582 "$src3, $src2", "$src2, $src3",
10583 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10584 (VTI.VT (bitconvert
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010585 (VTI.LdFrag addr:$src3))))),
10586 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10587 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010588 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10589 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10590 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10591 "$src2, ${src3}"##VTI.BroadcastStr,
10592 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10593 (VTI.VT (X86VBroadcast
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010594 (VTI.ScalarLdFrag addr:$src3)))),
10595 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10596 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010597}
10598
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010599multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010600 let Predicates = [HasVNNI] in
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010601 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010602 let Predicates = [HasVNNI, HasVLX] in {
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010603 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10604 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010605 }
10606}
10607
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010608// FIXME: Is there a better scheduler itinerary for VPDP?
10609defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10610defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10611defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10612defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010613
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010614//===----------------------------------------------------------------------===//
10615// Bit Algorithms
10616//===----------------------------------------------------------------------===//
10617
Simon Pilgrim756348c2017-11-29 13:49:51 +000010618// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10619defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010620 avx512vl_i8_info, HasBITALG>;
Simon Pilgrim756348c2017-11-29 13:49:51 +000010621defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010622 avx512vl_i16_info, HasBITALG>, VEX_W;
10623
10624defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10625defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010626
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010627multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010628 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10629 (ins VTI.RC:$src1, VTI.RC:$src2),
10630 "vpshufbitqmb",
10631 "$src2, $src1", "$src1, $src2",
10632 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010633 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10634 Sched<[itins.Sched]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010635 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10636 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10637 "vpshufbitqmb",
10638 "$src2, $src1", "$src1, $src2",
10639 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010640 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10641 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10642 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010643}
10644
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010645multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010646 let Predicates = [HasBITALG] in
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010647 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010648 let Predicates = [HasBITALG, HasVLX] in {
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010649 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10650 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010651 }
10652}
10653
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010654// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10655defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010656
Coby Tayreed8b17be2017-11-26 09:36:41 +000010657//===----------------------------------------------------------------------===//
10658// GFNI
10659//===----------------------------------------------------------------------===//
10660
10661multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10662 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10663 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10664 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10665 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10666 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10667 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10668 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10669 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10670 }
10671}
10672
Craig Topperb18d6222018-01-06 07:18:08 +000010673defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10674 EVEX_CD8<8, CD8VF>, T8PD;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010675
10676multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010677 OpndItins itins, X86VectorVTInfo VTI,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010678 X86VectorVTInfo BcstVTI>
Simon Pilgrim36be8522017-11-29 18:52:20 +000010679 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010680 let ExeDomain = VTI.ExeDomain in
10681 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10682 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10683 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10684 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10685 (OpNode (VTI.VT VTI.RC:$src1),
10686 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
Simon Pilgrim36be8522017-11-29 18:52:20 +000010687 (i8 imm:$src3)), itins.rm>, EVEX_B,
10688 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010689}
10690
Simon Pilgrim36be8522017-11-29 18:52:20 +000010691multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10692 OpndItins itins> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010693 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010694 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010695 v8i64_info>, EVEX_V512;
10696 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010697 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010698 v4i64x_info>, EVEX_V256;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010699 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010700 v2i64x_info>, EVEX_V128;
10701 }
10702}
10703
Craig Topperb18d6222018-01-06 07:18:08 +000010704defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10705 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10706 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10707defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10708 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10709 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010710