blob: 188a1679ef32a075cad1654a7bab5a9a00b742ba [file] [log] [blame]
Eric Christopher06b32cd2015-02-20 00:36:53 +00001//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
Adam Nemet5ed17da2014-08-21 19:50:07 +000016// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT). These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000020// The template is also used for scalar types, in this case numelts is 1.
Robert Khasanov4204c1a2014-12-12 14:21:30 +000021class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Adam Nemet5ed17da2014-08-21 19:50:07 +000022 string suffix = ""> {
23 RegisterClass RC = rc;
Robert Khasanov4204c1a2014-12-12 14:21:30 +000024 ValueType EltVT = eltvt;
Adam Nemet449b3f02014-10-15 23:42:09 +000025 int NumElts = numelts;
Adam Nemet5ed17da2014-08-21 19:50:07 +000026
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
Igor Bregerfca0a342016-01-28 13:19:25 +000033 // The mask VT.
Guy Blank548e22a2017-05-19 12:35:15 +000034 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
Simon Pilgrimb13961d2016-06-11 14:34:10 +000035
Adam Nemet5ed17da2014-08-21 19:50:07 +000036 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
38
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000039 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
Robert Khasanov2ea081d2014-08-25 14:49:34 +000046
Adam Nemet5ed17da2014-08-21 19:50:07 +000047 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000048 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000049
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000052 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000057
58 // Size of RC in bits, e.g. 512 for VR512.
59 int Size = VT.Size;
60
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000063 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
Ayman Musaf77219e2017-02-13 09:55:48 +000064 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
Robert Khasanov2ea081d2014-08-25 14:49:34 +000067
68 // Load patterns
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
Craig Toppera78b7682016-08-11 06:04:07 +000075 !if (!eq (Size, 512), "v8i64",
76 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000077
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
Craig Toppera78b7682016-08-11 06:04:07 +000079 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
83 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000084
Robert Khasanov2ea081d2014-08-25 14:49:34 +000085 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000086
Craig Topperd9fe6642017-02-21 04:26:10 +000087 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
91 ?));
92
Adam Nemet5ed17da2014-08-21 19:50:07 +000093 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000094 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
98 VTName,
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
101 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000102
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +0000103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
105 VTName,
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
108 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +0000111
Adam Nemet449b3f02014-10-15 23:42:09 +0000112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
Adam Nemet55536c62014-09-25 23:48:45 +0000117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
119
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122 SSEPackedInt));
Adam Nemet09377232014-10-08 23:25:31 +0000123
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +0000124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
Craig Topperabe80cc2016-08-28 06:06:28 +0000126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
Adam Nemet09377232014-10-08 23:25:31 +0000130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
Elena Demikhovskyd207f172015-03-03 15:03:35 +0000134
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000137}
138
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000139def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000141def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
Adam Nemet6bddb8c2014-09-29 22:54:41 +0000143def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000145
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000146// "x" in v32i8x_info means RC = VR256X
147def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000151def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000153
154def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000158def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000160
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +0000161// We map scalar types to the smallest (128-bit) vector type
162// with the appropriate element type. This allows to use the same masking logic.
Asaf Badouh2744d212015-09-20 14:31:19 +0000163def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000165def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
167
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000168class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
173}
174
175def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176 v16i8x_info>;
177def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178 v8i16x_info>;
179def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180 v4i32x_info>;
181def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182 v2i64x_info>;
Robert Khasanovaf318f72014-10-30 14:21:47 +0000183def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184 v4f32x_info>;
185def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186 v2f64x_info>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000187
Ayman Musa721d97f2017-06-27 12:08:37 +0000188class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189 ValueType _vt> {
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
192 ValueType KVT = _vt;
193}
194
Michael Zuckerman9e588312017-10-31 10:00:19 +0000195def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
Ayman Musa721d97f2017-06-27 12:08:37 +0000196def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000203// This multiclass generates the masking variants from the non-masking
204// variant. It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
Adam Nemet34801422014-10-08 23:25:39 +0000207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208 dag Outs,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210 string OpcodeStr,
211 string AttSrcAsm, string IntelSrcAsm,
212 list<dag> Pattern,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000215 InstrItinClass itin,
Adam Nemet34801422014-10-08 23:25:39 +0000216 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
Craig Topper9d2cab72016-01-11 01:03:40 +0000222 "$dst, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000223 Pattern, itin>;
224
225 // Prefer over VMOV*rrk Pat<>
Craig Topper63801df2017-02-19 21:44:35 +0000226 let isCommutable = IsKCommutable in
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000230 MaskingPattern, itin>,
231 EVEX_K {
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000234 }
235
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
Craig Topper63801df2017-02-19 21:44:35 +0000238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000242 ZeroMaskingPattern,
243 itin>,
244 EVEX_KZ;
245}
246
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000247
Adam Nemet34801422014-10-08 23:25:39 +0000248// Common base class of AVX512_maskable and AVX512_maskable_3src.
249multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250 dag Outs,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252 string OpcodeStr,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000255 InstrItinClass itin,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000256 SDNode Select = vselect,
Adam Nemet34801422014-10-08 23:25:39 +0000257 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000266 itin, MaskingConstraint, IsCommutable,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000267 IsKCommutable>;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000268
Adam Nemet2e91ee52014-08-14 17:13:19 +0000269// This multiclass generates the unconditional/non-masking, the masking and
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000270// the zero-masking variant of the vector instruction. In the masking case, the
Adam Nemet2e91ee52014-08-14 17:13:19 +0000271// perserved vector elements come from a new dummy input operand tied to $dst.
Craig Topper3a622a12017-08-17 15:40:25 +0000272// This version uses a separate dag for non-masking and masking.
273multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000277 InstrItinClass itin,
Craig Topper3a622a12017-08-17 15:40:25 +0000278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
285 [(set _.RC:$dst,
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287 [(set _.RC:$dst,
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
Craig Topper3a622a12017-08-17 15:40:25 +0000290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the vector instruction. In the masking case, the
293// perserved vector elements come from a new dummy input operand tied to $dst.
Adam Nemet34801422014-10-08 23:25:39 +0000294multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000297 dag RHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000298 InstrItinClass itin,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
Adam Nemet34801422014-10-08 23:25:39 +0000301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000307
308// This multiclass generates the unconditional/non-masking, the masking and
309// the zero-masking variant of the scalar instruction.
310multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000313 dag RHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000314 InstrItinClass itin,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000315 bit IsCommutable = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000318
Adam Nemet34801422014-10-08 23:25:39 +0000319// Similar to AVX512_maskable but in this case one of the source operands
Adam Nemet2e91ee52014-08-14 17:13:19 +0000320// ($src1) is already tied to $dst so we just use that for the preserved
321// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
322// $src1.
Adam Nemet34801422014-10-08 23:25:39 +0000323multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000326 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000327 bit IsCommutable = 0,
Craig Topper1aa49ca2017-09-01 07:58:14 +0000328 bit IsKCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000329 SDNode Select = vselect,
330 bit MaskOnly = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
Craig Topperb16598d2017-09-01 07:58:16 +0000335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000339
Igor Breger15820b02015-07-01 13:24:28 +0000340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000343 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000344 bit IsCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000345 bit IsKCommutable = 0,
346 bit MaskOnly = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
Craig Topperb16598d2017-09-01 07:58:16 +0000349 X86selects, MaskOnly>;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000350
Adam Nemet34801422014-10-08 23:25:39 +0000351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352 dag Outs, dag Ins,
353 string OpcodeStr,
354 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim569e53b2017-12-03 21:43:54 +0000355 list<dag> Pattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000356 InstrItinClass itin> :
Adam Nemet34801422014-10-08 23:25:39 +0000357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000361 itin, "$src0 = $dst">;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000362
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000363
364// Instruction with mask that puts result in mask register,
365// like "compare" and "vptest"
366multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367 dag Outs,
368 dag Ins, dag MaskingIns,
369 string OpcodeStr,
370 string AttSrcAsm, string IntelSrcAsm,
371 list<dag> Pattern,
Craig Topper225da2c2016-08-27 05:22:15 +0000372 list<dag> MaskingPattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000373 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000376 def NAME: AVX512<O, F, Outs, Ins,
Craig Topper156622a2016-01-11 00:44:56 +0000377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000379 Pattern, itin>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000380
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Craig Topper156622a2016-01-11 00:44:56 +0000382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000384 MaskingPattern, itin>, EVEX_K;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000385}
386
387multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388 dag Outs,
389 dag Ins, dag MaskingIns,
390 string OpcodeStr,
391 string AttSrcAsm, string IntelSrcAsm,
Craig Topper225da2c2016-08-27 05:22:15 +0000392 dag RHS, dag MaskingRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000393 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000394 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000399
400multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000403 dag RHS, InstrItinClass itin,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000404 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000409
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000410multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000412 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000413 InstrItinClass itin> :
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000417
Craig Topperabe80cc2016-08-28 06:06:28 +0000418// This multiclass generates the unconditional/non-masking, the masking and
419// the zero-masking variant of the vector instruction. In the masking case, the
420// perserved vector elements come from a new dummy input operand tied to $dst.
421multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000425 InstrItinClass itin,
Craig Topperabe80cc2016-08-28 06:06:28 +0000426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
432 [(set _.RC:$dst,
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434 [(set _.RC:$dst,
435 (Select _.KRCWM:$mask, MaskedRHS,
436 _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000437 itin, "$src0 = $dst", IsCommutable>;
Craig Topperabe80cc2016-08-28 06:06:28 +0000438
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000439
Craig Topper9d9251b2016-05-08 20:10:20 +0000440// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
Marina Yatsina6fc2aaa2018-01-22 10:05:23 +0000442// swizzled by ExecutionDomainFix to pxor.
Craig Topper9d9251b2016-05-08 20:10:20 +0000443// We set canFoldAsLoad because this can be converted to a constant-pool
444// load of an all-zeros value if folding it would be beneficial.
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000445let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper86748492016-07-11 05:36:41 +0000446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000447def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
Craig Topper9d9251b2016-05-08 20:10:20 +0000448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
Craig Topper516e14c2016-07-11 05:36:48 +0000449def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000451}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000452
Craig Topper6393afc2017-01-09 02:44:34 +0000453// Alias instructions that allow VPTERNLOG to be used with a mask to create
454// a mix of all ones and all zeros elements. This is done this way to force
455// the same register to be used as input for all three sources.
Simon Pilgrim26f106f2017-12-08 15:17:32 +0000456let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
Craig Topper6393afc2017-01-09 02:44:34 +0000457def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
467}
468
Craig Toppere5ce84a2016-05-08 21:33:53 +0000469let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Craig Toppere5ce84a2016-05-08 21:33:53 +0000471def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475}
476
Craig Topperadd9cc62016-12-18 06:23:14 +0000477// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478// This is expanded by ExpandPostRAPseudos.
479let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
Craig Topperadd9cc62016-12-18 06:23:14 +0000481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
485}
486
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000487//===----------------------------------------------------------------------===//
488// AVX-512 - VECTOR INSERT
489//
Craig Topper3a622a12017-08-17 15:40:25 +0000490
491// Supports two different pattern operators for mask and unmasked ops. Allows
492// null_frag to be passed for one.
493multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494 X86VectorVTInfo To,
495 SDPatternOperator vinsert_insert,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000496 SDPatternOperator vinsert_for_mask,
497 OpndItins itins> {
Craig Topperc228d792017-09-05 05:49:44 +0000498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
Craig Topper3a622a12017-08-17 15:40:25 +0000505 (iPTR imm)),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Topperc228d792017-09-05 05:49:44 +0000510 let mayLoad = 1 in
Craig Topper3a622a12017-08-17 15:40:25 +0000511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Craig Topper3a622a12017-08-17 15:40:25 +0000517 (iPTR imm)),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemet4e2ef472014-10-02 23:18:28 +0000523 }
Adam Nemet4285c1f2014-10-15 23:42:17 +0000524}
Adam Nemet4e2ef472014-10-02 23:18:28 +0000525
Craig Topper3a622a12017-08-17 15:40:25 +0000526// Passes the same pattern operator for masked and unmasked ops.
527multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000529 SDPatternOperator vinsert_insert,
530 OpndItins itins> :
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000532
Igor Breger0ede3cb2015-09-20 06:52:42 +0000533multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
Adam Nemet4285c1f2014-10-15 23:42:17 +0000537 def : Pat<(vinsert_insert:$ins
Igor Breger0ede3cb2015-09-20 06:52:42 +0000538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543 def : Pat<(vinsert_insert:$ins
544 (To.VT To.RC:$src1),
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
546 (iPTR imm)),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
550 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551}
552
Adam Nemetb1c3ef42014-10-15 23:42:04 +0000553multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000554 ValueType EltVT64, int Opcode256,
555 OpndItins itins> {
Igor Breger0ede3cb2015-09-20 06:52:42 +0000556
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000561 vinsert128_insert, itins>, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000562
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000566 vinsert128_insert, itins>, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000567
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000572
Craig Topper3a622a12017-08-17 15:40:25 +0000573 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000574 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000578 null_frag, vinsert128_insert, itins>,
579 VEX_W, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000580
Craig Topper3a622a12017-08-17 15:40:25 +0000581 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000582 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000586 null_frag, vinsert128_insert, itins>,
587 VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000588
Craig Topper3a622a12017-08-17 15:40:25 +0000589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000592 null_frag, vinsert256_insert, itins>,
593 EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000594 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595}
596
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000597// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598let Sched = WriteFShuffle256 in
599def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
601>;
602let Sched = WriteShuffle256 in
603def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605>;
606
607defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000609
Igor Breger0ede3cb2015-09-20 06:52:42 +0000610// Codegen pattern with the alternative types,
Craig Topper3a622a12017-08-17 15:40:25 +0000611// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000612defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000614defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000616
617defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000619defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000621
622defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000624defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000626
627// Codegen pattern with the alternative types insert VEC128 into VEC256
628defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632// Codegen pattern with the alternative types insert VEC128 into VEC512
633defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637// Codegen pattern with the alternative types insert VEC256 into VEC512
638defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
Craig Topperf7a19db2017-10-08 01:33:40 +0000643
644multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
648 list<Predicate> p> {
649let Predicates = p in {
650 def : Pat<(Cast.VT
651 (vselect Cast.KRCWM:$mask,
652 (bitconvert
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
655 (iPTR imm))),
656 Cast.RC:$src0)),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
660 def : Pat<(Cast.VT
661 (vselect Cast.KRCWM:$mask,
662 (bitconvert
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT
665 (bitconvert
666 (From.LdFrag addr:$src2))),
667 (iPTR imm))),
668 Cast.RC:$src0)),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673 def : Pat<(Cast.VT
674 (vselect Cast.KRCWM:$mask,
675 (bitconvert
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
678 (iPTR imm))),
679 Cast.ImmAllZerosV)),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
683 def : Pat<(Cast.VT
684 (vselect Cast.KRCWM:$mask,
685 (bitconvert
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
687 (From.VT
688 (bitconvert
689 (From.LdFrag addr:$src2))),
690 (iPTR imm))),
691 Cast.ImmAllZerosV)),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
695}
696}
697
698defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
730
731defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
749
750defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
775
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000776// vinsertps - insert f32 to XMM
Craig Topper43973152016-10-09 06:41:47 +0000777let ExeDomain = SSEPackedSingle in {
Craig Topper6189d3e2016-07-19 01:26:19 +0000778def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd255a622017-12-06 18:46:06 +0000781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
Craig Topper6189d3e2016-07-19 01:26:19 +0000783def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
Simon Pilgrimd255a622017-12-06 18:46:06 +0000788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
Craig Topper43973152016-10-09 06:41:47 +0000790}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000791
792//===----------------------------------------------------------------------===//
793// AVX-512 VECTOR EXTRACT
794//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000795
Craig Topper3a622a12017-08-17 15:40:25 +0000796// Supports two different pattern operators for mask and unmasked ops. Allows
797// null_frag to be passed for one.
798multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000801 SDPatternOperator vextract_for_mask,
802 OpndItins itins> {
Igor Breger7f69a992015-09-10 12:54:54 +0000803
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000806 (ins From.RC:$src1, u8imm:$idx),
Igor Breger7f69a992015-09-10 12:54:54 +0000807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
Craig Topper3a622a12017-08-17 15:40:25 +0000809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
Craig Toppere1cac152016-06-07 07:27:54 +0000813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
Ayman Musaf77219e2017-02-13 09:55:48 +0000814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000821
Craig Toppere1cac152016-06-07 07:27:54 +0000822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
Ayman Musaf77219e2017-02-13 09:55:48 +0000825 From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000831 }
Igor Bregerac29a822015-09-09 14:35:09 +0000832}
833
Craig Topper3a622a12017-08-17 15:40:25 +0000834// Passes the same pattern operator for masked and unmasked ops.
835multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000837 SDPatternOperator vextract_extract,
838 OpndItins itins> :
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000840
Igor Bregerdefab3c2015-10-08 12:55:01 +0000841// Codegen pattern for the alternative types
842multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
Craig Topper5f3fef82016-05-22 07:40:58 +0000844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
Craig Topperdb960ed2016-05-21 22:50:14 +0000845 let Predicates = p in {
Igor Bregerdefab3c2015-10-08 12:55:01 +0000846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
848 From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
Craig Topperdb960ed2016-05-21 22:50:14 +0000850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854 }
Igor Breger7f69a992015-09-10 12:54:54 +0000855}
856
857multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000858 ValueType EltVT64, int Opcode256,
859 OpndItins itins> {
Craig Topperaadec702017-08-14 01:53:10 +0000860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000864 vextract128_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000869 vextract256_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871 }
Igor Breger7f69a992015-09-10 12:54:54 +0000872 let Predicates = [HasVLX] in
Igor Bregerdefab3c2015-10-08 12:55:01 +0000873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000876 vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Craig Topper3a622a12017-08-17 15:40:25 +0000878
879 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000880 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000884 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000886
887 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000888 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000892 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
Igor Breger7f69a992015-09-10 12:54:54 +0000895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000897 null_frag, vextract256_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
899 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000900}
901
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000902// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903let Sched = WriteFShuffle256 in
904def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
906>;
907let Sched = WriteShuffle256 in
908def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910>;
911
912defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000914
Igor Bregerdefab3c2015-10-08 12:55:01 +0000915// extract_subvector codegen patterns with the alternative types.
Craig Topper3a622a12017-08-17 15:40:25 +0000916// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Bregerdefab3c2015-10-08 12:55:01 +0000917defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000919defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000921
922defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000924defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000926
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000931
Craig Topper08a68572016-05-21 22:50:04 +0000932// Codegen pattern with the alternative types extract VEC128 from VEC256
Craig Topper02626c02016-05-21 07:08:56 +0000933defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938// Codegen pattern with the alternative types extract VEC128 from VEC512
Igor Bregerdefab3c2015-10-08 12:55:01 +0000939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943// Codegen pattern with the alternative types extract VEC256 from VEC512
944defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
Craig Topper5f3fef82016-05-22 07:40:58 +0000949
Craig Topper48a79172017-08-30 07:26:12 +0000950// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951// smaller extract to enable EVEX->VEX.
952let Predicates = [NoVLX] in {
953def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956 (iPTR 1)))>;
957def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960 (iPTR 1)))>;
961def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964 (iPTR 1)))>;
965def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968 (iPTR 1)))>;
969def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972 (iPTR 1)))>;
973def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 (iPTR 1)))>;
977}
978
979// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980// smaller extract to enable EVEX->VEX.
981let Predicates = [HasVLX] in {
982def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985 (iPTR 1)))>;
986def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989 (iPTR 1)))>;
990def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993 (iPTR 1)))>;
994def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997 (iPTR 1)))>;
998def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001 (iPTR 1)))>;
1002def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005 (iPTR 1)))>;
1006}
1007
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001008
Craig Toppera0883622017-08-26 22:24:57 +00001009// Additional patterns for handling a bitcast between the vselect and the
1010// extract_subvector.
1011multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018 (bitconvert
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1021 To.RC:$src0)),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027 (bitconvert
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034}
1035}
1036
1037defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115// vextractps - extract 32 bits from XMM
Craig Topper03b849e2016-05-21 22:50:11 +00001116def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Craig Topperfc946a02015-01-25 02:21:13 +00001117 (ins VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimd255a622017-12-06 18:46:06 +00001119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001121
Craig Topper03b849e2016-05-21 22:50:11 +00001122def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Craig Topperfc946a02015-01-25 02:21:13 +00001123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00001126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001128
1129//===---------------------------------------------------------------------===//
1130// AVX-512 BROADCAST
1131//---
Igor Breger131008f2016-05-01 08:40:00 +00001132// broadcast with a scalar argument.
1133multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
Craig Topperf6df4a62017-01-30 06:59:06 +00001135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
Igor Breger131008f2016-05-01 08:40:00 +00001149}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001150
Craig Topper17854ec2017-08-30 07:48:39 +00001151// Split version to allow mask and broadcast node to be different types. This
1152// helps support the 32x2 broadcasts.
1153multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001154 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
Igor Breger21296d22015-10-20 11:56:42 +00001162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001163 (MaskInfo.VT
1164 (bitconvert
1165 (DestInfo.VT
Craig Topperbf0de9d2017-10-13 06:07:10 +00001166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167 (MaskInfo.VT
1168 (bitconvert
1169 (DestInfo.VT
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
Craig Topperbf0de9d2017-10-13 06:07:10 +00001172 let mayLoad = 1 in
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
Igor Breger52bd1d52016-05-31 07:43:39 +00001175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001176 (MaskInfo.VT
1177 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1180 (MaskInfo.VT
1181 (bitconvert
Craig Topper17854ec2017-08-30 07:48:39 +00001182 (DestInfo.VT (X86VBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185 Sched<[SchedRM]>;
Craig Topper80934372016-07-16 03:42:59 +00001186 }
Craig Toppere1cac152016-06-07 07:27:54 +00001187
Craig Topper17854ec2017-08-30 07:48:39 +00001188 def : Pat<(MaskInfo.VT
1189 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001190 (DestInfo.VT (UnmaskedOp
Craig Topper17854ec2017-08-30 07:48:39 +00001191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195 (bitconvert
1196 (DestInfo.VT
1197 (X86VBroadcast
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
Craig Topper80934372016-07-16 03:42:59 +00001201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
Craig Topper17854ec2017-08-30 07:48:39 +00001202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204 (bitconvert
1205 (DestInfo.VT
1206 (X86VBroadcast
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001213
Craig Topper17854ec2017-08-30 07:48:39 +00001214// Helper class to force mask and broadcast result to same type.
1215multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001216 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
Craig Topper17854ec2017-08-30 07:48:39 +00001221
Craig Topper80934372016-07-16 03:42:59 +00001222multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
Igor Breger21296d22015-10-20 11:56:42 +00001223 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001224 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001228 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001229 }
Robert Khasanovaf318f72014-10-30 14:21:47 +00001230
1231 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001232 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1233 WriteFShuffle256Ld, _.info256, _.info128>,
Igor Breger131008f2016-05-01 08:40:00 +00001234 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001235 EVEX_V256;
Robert Khasanovaf318f72014-10-30 14:21:47 +00001236 }
1237}
1238
Craig Topper80934372016-07-16 03:42:59 +00001239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001241 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001244 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1245 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001246 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001247
Craig Topper80934372016-07-16 03:42:59 +00001248 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001249 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1250 WriteFShuffle256Ld, _.info256, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001251 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1252 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001253 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1254 WriteFShuffle256Ld, _.info128, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001255 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1256 EVEX_V128;
1257 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001258}
Craig Topper80934372016-07-16 03:42:59 +00001259defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1260 avx512vl_f32_info>;
1261defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1262 avx512vl_f64_info>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001263
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001264def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001265 (VBROADCASTSSZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001266def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001267 (VBROADCASTSDZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001268
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001269multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001271 RegisterClass SrcRC> {
Craig Topperfe259882017-02-26 06:45:51 +00001272 let ExeDomain = _.ExeDomain in
Igor Breger0aeda372016-02-07 08:30:50 +00001273 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001274 (ins SrcRC:$src),
1275 "vpbroadcast"##_.Suffix, "$src", "$src",
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001276 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1277 Sched<[SchedRR]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001278}
1279
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001280multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
Guy Blank7f60c992017-08-09 17:21:01 +00001281 X86VectorVTInfo _, SDPatternOperator OpNode,
1282 RegisterClass SrcRC, SubRegIndex Subreg> {
Craig Topper508aa972017-08-14 05:09:34 +00001283 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
Guy Blank7f60c992017-08-09 17:21:01 +00001284 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1285 (outs _.RC:$dst), (ins GR32:$src),
1286 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1287 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1288 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +00001289 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
Guy Blank7f60c992017-08-09 17:21:01 +00001290
1291 def : Pat <(_.VT (OpNode SrcRC:$src)),
1292 (!cast<Instruction>(Name#r)
1293 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1294
1295 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1296 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1297 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1298
1299 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1300 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1301 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302}
1303
1304multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1305 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1306 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1307 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001308 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1309 OpNode, SrcRC, Subreg>, EVEX_V512;
Guy Blank7f60c992017-08-09 17:21:01 +00001310 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001311 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1312 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1313 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1314 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
Guy Blank7f60c992017-08-09 17:21:01 +00001315 }
1316}
1317
Robert Khasanovcbc57032014-12-09 16:38:41 +00001318multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
Craig Topper49ba3f52017-02-26 06:45:48 +00001319 SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001320 RegisterClass SrcRC, Predicate prd> {
1321 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001322 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1323 SrcRC>, EVEX_V512;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001324 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001325 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1326 SrcRC>, EVEX_V256;
1327 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1328 SrcRC>, EVEX_V128;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001329 }
1330}
1331
Guy Blank7f60c992017-08-09 17:21:01 +00001332defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1333 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1334defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1335 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1336 HasBWI>;
Craig Topper49ba3f52017-02-26 06:45:48 +00001337defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1338 X86VBroadcast, GR32, HasAVX512>;
1339defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1340 X86VBroadcast, GR64, HasAVX512>, VEX_W;
Michael Liao5bf95782014-12-04 05:20:33 +00001341
Igor Breger21296d22015-10-20 11:56:42 +00001342// Provide aliases for broadcast from the same register class that
1343// automatically does the extract.
1344multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1345 X86VectorVTInfo SrcInfo> {
1346 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1347 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1348 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1349}
1350
1351multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1352 AVX512VLVectorVTInfo _, Predicate prd> {
1353 let Predicates = [prd] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001354 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1355 WriteShuffle256Ld, _.info512, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001356 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1357 EVEX_V512;
1358 // Defined separately to avoid redefinition.
1359 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1360 }
1361 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001362 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1363 WriteShuffle256Ld, _.info256, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001364 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1365 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001366 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1367 WriteShuffleLd, _.info128, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001368 EVEX_V128;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00001369 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001370}
1371
Igor Breger21296d22015-10-20 11:56:42 +00001372defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1373 avx512vl_i8_info, HasBWI>;
1374defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1375 avx512vl_i16_info, HasBWI>;
1376defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1377 avx512vl_i32_info, HasAVX512>;
1378defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1379 avx512vl_i64_info, HasAVX512>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001380
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001381multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1382 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001383 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00001384 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1385 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001386 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1387 NoItinerary>, AVX5128IBase, EVEX,
1388 Sched<[WriteShuffleLd]>;
Adam Nemet73f72e12014-06-27 00:43:38 +00001389}
1390
Craig Topperd6f4be92017-08-21 05:29:02 +00001391// This should be used for the AVX512DQ broadcast instructions. It disables
1392// the unmasked patterns so that we only use the DQ instructions when masking
1393// is requested.
1394multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1395 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Craig Topperc228d792017-09-05 05:49:44 +00001396 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperd6f4be92017-08-21 05:29:02 +00001397 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1398 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1399 (null_frag),
1400 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001401 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1402 NoItinerary>, AVX5128IBase, EVEX,
1403 Sched<[WriteShuffleLd]>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001404}
1405
Simon Pilgrim79195582017-02-21 16:41:44 +00001406let Predicates = [HasAVX512] in {
1407 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1408 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1409 (VPBROADCASTQZm addr:$src)>;
1410}
1411
Craig Topperad3d0312017-10-10 21:07:14 +00001412let Predicates = [HasVLX] in {
Simon Pilgrim79195582017-02-21 16:41:44 +00001413 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1414 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1415 (VPBROADCASTQZ128m addr:$src)>;
1416 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1417 (VPBROADCASTQZ256m addr:$src)>;
Craig Topperad3d0312017-10-10 21:07:14 +00001418}
1419let Predicates = [HasVLX, HasBWI] in {
Craig Topperbe351ee2016-10-01 06:01:23 +00001420 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1421 // This means we'll encounter truncated i32 loads; match that here.
1422 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1423 (VPBROADCASTWZ128m addr:$src)>;
1424 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1425 (VPBROADCASTWZ256m addr:$src)>;
1426 def : Pat<(v8i16 (X86VBroadcast
1427 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1428 (VPBROADCASTWZ128m addr:$src)>;
1429 def : Pat<(v16i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ256m addr:$src)>;
1432}
1433
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001434//===----------------------------------------------------------------------===//
1435// AVX-512 BROADCAST SUBVECTORS
1436//
1437
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001438defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1439 v16i32_info, v4i32x_info>,
Adam Nemet73f72e12014-06-27 00:43:38 +00001440 EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001441defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1442 v16f32_info, v4f32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1445 v8i64_info, v4i64x_info>, VEX_W,
Adam Nemet73f72e12014-06-27 00:43:38 +00001446 EVEX_V512, EVEX_CD8<64, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001447defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1448 v8f64_info, v4f64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450
Craig Topper715ad7f2016-10-16 23:29:51 +00001451let Predicates = [HasAVX512] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001452def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1453 (VBROADCASTF64X4rm addr:$src)>;
1454def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1455 (VBROADCASTI64X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001456def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1457 (VBROADCASTI64X4rm addr:$src)>;
1458def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1459 (VBROADCASTI64X4rm addr:$src)>;
1460
1461// Provide fallback in case the load node that is used in the patterns above
1462// is used by additional users, which prevents the pattern selection.
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001463def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1464 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001465 (v4f64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001466def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v8f32 VR256X:$src), 1)>;
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001469def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001471 (v4i64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001472def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v8i32 VR256X:$src), 1)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001475def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v16i16 VR256X:$src), 1)>;
1478def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v32i8 VR256X:$src), 1)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001481
Craig Topperd6f4be92017-08-21 05:29:02 +00001482def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1483 (VBROADCASTF32X4rm addr:$src)>;
1484def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1485 (VBROADCASTI32X4rm addr:$src)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001486def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1487 (VBROADCASTI32X4rm addr:$src)>;
1488def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1489 (VBROADCASTI32X4rm addr:$src)>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001490
1491// Patterns for selects of bitcasted operations.
1492def : Pat<(vselect VK16WM:$mask,
1493 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1494 (bc_v16f32 (v16i32 immAllZerosV))),
1495 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1496def : Pat<(vselect VK16WM:$mask,
1497 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1498 VR512:$src0),
1499 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1500def : Pat<(vselect VK16WM:$mask,
1501 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1502 (v16i32 immAllZerosV)),
1503 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1504def : Pat<(vselect VK16WM:$mask,
1505 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1506 VR512:$src0),
1507 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1508
1509def : Pat<(vselect VK8WM:$mask,
1510 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1511 (bc_v8f64 (v16i32 immAllZerosV))),
1512 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1513def : Pat<(vselect VK8WM:$mask,
1514 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1515 VR512:$src0),
1516 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1517def : Pat<(vselect VK8WM:$mask,
1518 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1519 (bc_v8i64 (v16i32 immAllZerosV))),
1520 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1521def : Pat<(vselect VK8WM:$mask,
1522 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1523 VR512:$src0),
1524 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001525}
1526
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001527let Predicates = [HasVLX] in {
1528defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1529 v8i32x_info, v4i32x_info>,
1530 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1531defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1532 v8f32x_info, v4f32x_info>,
1533 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001534
Craig Topperd6f4be92017-08-21 05:29:02 +00001535def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1536 (VBROADCASTF32X4Z256rm addr:$src)>;
1537def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1538 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001539def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1540 (VBROADCASTI32X4Z256rm addr:$src)>;
1541def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1542 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001543
Craig Topper5a2bd992018-02-05 08:37:37 +00001544// Patterns for selects of bitcasted operations.
1545def : Pat<(vselect VK8WM:$mask,
1546 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1547 (bc_v8f32 (v8i32 immAllZerosV))),
1548 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1549def : Pat<(vselect VK8WM:$mask,
1550 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1551 VR256X:$src0),
1552 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1553def : Pat<(vselect VK8WM:$mask,
1554 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1555 (v8i32 immAllZerosV)),
1556 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1557def : Pat<(vselect VK8WM:$mask,
1558 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1559 VR256X:$src0),
1560 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1561
1562
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001563// Provide fallback in case the load node that is used in the patterns above
1564// is used by additional users, which prevents the pattern selection.
Craig Topperd6f4be92017-08-21 05:29:02 +00001565def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1566 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567 (v2f64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001568def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001569 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001570 (v4f32 VR128X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001571def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573 (v2i64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001574def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001575 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001576 (v4i32 VR128X:$src), 1)>;
1577def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001578 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001579 (v8i16 VR128X:$src), 1)>;
1580def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001581 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001582 (v16i8 VR128X:$src), 1)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001583}
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001584
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001585let Predicates = [HasVLX, HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001586defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001587 v4i64x_info, v2i64x_info>, VEX_W,
1588 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001589defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001590 v4f64x_info, v2f64x_info>, VEX_W,
1591 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001592
1593// Patterns for selects of bitcasted operations.
1594def : Pat<(vselect VK4WM:$mask,
1595 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1596 (bc_v4f64 (v8i32 immAllZerosV))),
1597 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1598def : Pat<(vselect VK4WM:$mask,
1599 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1600 VR256X:$src0),
1601 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1602def : Pat<(vselect VK4WM:$mask,
1603 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1604 (bc_v4i64 (v8i32 immAllZerosV))),
1605 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1606def : Pat<(vselect VK4WM:$mask,
1607 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1608 VR256X:$src0),
1609 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001610}
1611
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001612let Predicates = [HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001613defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001614 v8i64_info, v2i64x_info>, VEX_W,
1615 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001616defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001617 v16i32_info, v8i32x_info>,
1618 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001619defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001620 v8f64_info, v2f64x_info>, VEX_W,
1621 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001622defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001623 v16f32_info, v8f32x_info>,
1624 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001625
1626// Patterns for selects of bitcasted operations.
1627def : Pat<(vselect VK16WM:$mask,
1628 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1629 (bc_v16f32 (v16i32 immAllZerosV))),
1630 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1631def : Pat<(vselect VK16WM:$mask,
1632 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1633 VR512:$src0),
1634 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1635def : Pat<(vselect VK16WM:$mask,
1636 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1637 (v16i32 immAllZerosV)),
1638 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1639def : Pat<(vselect VK16WM:$mask,
1640 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1641 VR512:$src0),
1642 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1643
1644def : Pat<(vselect VK8WM:$mask,
1645 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1646 (bc_v8f64 (v16i32 immAllZerosV))),
1647 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1648def : Pat<(vselect VK8WM:$mask,
1649 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1650 VR512:$src0),
1651 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1652def : Pat<(vselect VK8WM:$mask,
1653 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1654 (bc_v8i64 (v16i32 immAllZerosV))),
1655 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1656def : Pat<(vselect VK8WM:$mask,
1657 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1658 VR512:$src0),
1659 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001660}
Adam Nemet73f72e12014-06-27 00:43:38 +00001661
Igor Bregerfa798a92015-11-02 07:39:36 +00001662multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001663 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001664 let Predicates = [HasDQI] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001665 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1666 WriteShuffle256Ld, _Dst.info512,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001667 _Src.info512, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001668 EVEX_V512;
Igor Bregerfa798a92015-11-02 07:39:36 +00001669 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001670 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1671 WriteShuffle256Ld, _Dst.info256,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001672 _Src.info256, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001673 EVEX_V256;
Igor Bregerfa798a92015-11-02 07:39:36 +00001674}
1675
1676multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001677 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1678 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001679
1680 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001681 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1682 WriteShuffleLd, _Dst.info128,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001683 _Src.info128, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001684 EVEX_V128;
Igor Bregerfa798a92015-11-02 07:39:36 +00001685}
1686
Craig Topper51e052f2016-10-15 16:26:02 +00001687defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1688 avx512vl_i32_info, avx512vl_i64_info>;
1689defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1690 avx512vl_f32_info, avx512vl_f64_info>;
Igor Bregerfa798a92015-11-02 07:39:36 +00001691
Craig Topper52317e82017-01-15 05:47:45 +00001692let Predicates = [HasVLX] in {
1693def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1694 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1695def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1696 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1697}
1698
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001699def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001700 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001701def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1702 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1703
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001704def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001705 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001706def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1707 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001708
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001709//===----------------------------------------------------------------------===//
1710// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1711//---
Asaf Badouh0d957b82015-11-18 09:42:45 +00001712multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1713 X86VectorVTInfo _, RegisterClass KRC> {
1714 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00001715 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001716 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1717 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001718}
1719
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
Asaf Badouh0d957b82015-11-18 09:42:45 +00001721 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1722 let Predicates = [HasCDI] in
1723 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1724 let Predicates = [HasCDI, HasVLX] in {
1725 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1726 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1727 }
1728}
1729
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001730defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001731 avx512vl_i32_info, VK16>;
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001732defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001733 avx512vl_i64_info, VK8>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001734
1735//===----------------------------------------------------------------------===//
Craig Topperaad5f112015-11-30 00:13:24 +00001736// -- VPERMI2 - 3 source operands form --
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001737
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001738let Sched = WriteFShuffle256 in
1739def AVX512_PERM2_F : OpndItins<
1740 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1741>;
1742
1743let Sched = WriteShuffle256 in
1744def AVX512_PERM2_I : OpndItins<
1745 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1746>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001747
1748multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1749 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Craig Topper4fa3b502016-09-06 06:56:59 +00001751 // The index operand in the pattern should really be an integer type. However,
1752 // if we do that and it happens to come from a bitcast, then it becomes
1753 // difficult to find the bitcast needed to convert the index to the
1754 // destination type for the passthru since it will be folded with the bitcast
1755 // of the index operand.
1756 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001757 (ins _.RC:$src2, _.RC:$src3),
1758 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001759 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001760 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001761
Craig Topper4fa3b502016-09-06 06:56:59 +00001762 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001763 (ins _.RC:$src2, _.MemOp:$src3),
1764 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Topper4fa3b502016-09-06 06:56:59 +00001765 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001766 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1767 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001768 }
1769}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001770
1771multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001772 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001773 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Craig Topper4fa3b502016-09-06 06:56:59 +00001774 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001775 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1776 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1777 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper4fa3b502016-09-06 06:56:59 +00001778 (_.VT (X86VPermi2X _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001779 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001780 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1781 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemetefe9c982014-07-02 21:25:58 +00001782}
1783
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001784multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001785 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001786 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1787 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001788 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001789 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1790 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1791 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1792 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001793 }
1794}
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001795
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001796multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001797 OpndItins itins,
1798 AVX512VLVectorVTInfo VTInfo,
1799 Predicate Prd> {
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001800 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001801 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001802 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001803 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1804 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001805 }
1806}
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001807
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001808defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001809 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001810defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001811 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001812defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001813 avx512vl_i16_info, HasBWI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001814 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001815defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001816 avx512vl_i8_info, HasVBMI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001817 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001818defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001819 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001820defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001821 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001822
Craig Topperaad5f112015-11-30 00:13:24 +00001823// VPERMT2
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001824multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001825 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001826let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001827 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1828 (ins IdxVT.RC:$src2, _.RC:$src3),
1829 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001830 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001831 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001832
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001833 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1834 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1835 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Toppera47576f2015-11-26 20:21:29 +00001836 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001837 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1838 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001839 }
1840}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001841multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001842 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001843 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001844 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1845 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1846 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1847 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Toppera47576f2015-11-26 20:21:29 +00001848 (_.VT (X86VPermt2 _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001849 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001850 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1851 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001852}
1853
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001854multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001855 AVX512VLVectorVTInfo VTInfo,
1856 AVX512VLVectorVTInfo ShuffleMask> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001857 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001858 ShuffleMask.info512>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001859 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001860 ShuffleMask.info512>, EVEX_V512;
1861 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001862 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001863 ShuffleMask.info128>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001864 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001865 ShuffleMask.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001866 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001867 ShuffleMask.info256>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001868 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001869 ShuffleMask.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001870 }
1871}
1872
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001874 AVX512VLVectorVTInfo VTInfo,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001875 AVX512VLVectorVTInfo Idx,
1876 Predicate Prd> {
1877 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001878 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Craig Toppera47576f2015-11-26 20:21:29 +00001879 Idx.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001880 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001881 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Craig Toppera47576f2015-11-26 20:21:29 +00001882 Idx.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001883 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001884 Idx.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001885 }
1886}
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001887
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001888defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001889 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001890defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001891 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001892defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001893 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1894 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001895defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001896 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1897 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001899 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001901 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +00001902
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001903//===----------------------------------------------------------------------===//
1904// AVX-512 - BLEND using mask
1905//
Simon Pilgrimd4953012017-12-05 21:05:25 +00001906
Simon Pilgrim75673942017-12-06 11:23:13 +00001907let Sched = WriteFVarBlend in
1908def AVX512_BLENDM : OpndItins<
1909 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001910>;
1911
Simon Pilgrim75673942017-12-06 11:23:13 +00001912let Sched = WriteVarBlend in
1913def AVX512_PBLENDM : OpndItins<
1914 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001915>;
1916
1917multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1918 X86VectorVTInfo _> {
Craig Toppera74e3082017-01-07 22:20:34 +00001919 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001920 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1921 (ins _.RC:$src1, _.RC:$src2),
1922 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001923 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001924 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001925 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1926 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001927 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001928 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001929 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001930 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1931 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1932 !strconcat(OpcodeStr,
1933 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001934 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
Craig Toppera74e3082017-01-07 22:20:34 +00001935 let mayLoad = 1 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001936 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1937 (ins _.RC:$src1, _.MemOp:$src2),
1938 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001939 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001940 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1941 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001942 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1943 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001944 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001946 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1947 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001948 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1949 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1950 !strconcat(OpcodeStr,
1951 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001952 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1953 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001954 }
Craig Toppera74e3082017-01-07 22:20:34 +00001955 }
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001956}
Simon Pilgrimd4953012017-12-05 21:05:25 +00001957multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1958 X86VectorVTInfo _> {
Craig Topper81f20aa2017-01-07 22:20:26 +00001959 let mayLoad = 1, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001960 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1961 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1962 !strconcat(OpcodeStr,
1963 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1964 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001965 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1966 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001967
Craig Topper16b20242018-02-23 20:48:44 +00001968 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1969 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1970 !strconcat(OpcodeStr,
1971 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1972 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"),
1973 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1974 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1975
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001976 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1977 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1978 !strconcat(OpcodeStr,
1979 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1980 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001981 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1982 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper81f20aa2017-01-07 22:20:26 +00001983 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001984}
1985
Simon Pilgrimd4953012017-12-05 21:05:25 +00001986multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001987 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001988 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1989 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001990
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001991 let Predicates = [HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001992 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1993 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1994 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1995 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001996 }
1997}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001998
Simon Pilgrimd4953012017-12-05 21:05:25 +00001999multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002000 AVX512VLVectorVTInfo VTInfo> {
2001 let Predicates = [HasBWI] in
Simon Pilgrimd4953012017-12-05 21:05:25 +00002002 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002003
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002004 let Predicates = [HasBWI, HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00002005 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
2006 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002007 }
2008}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002009
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002010
Simon Pilgrimd4953012017-12-05 21:05:25 +00002011defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
2012defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
2013defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
2014defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
2015defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
2016defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002017
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002018
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002019//===----------------------------------------------------------------------===//
2020// Compare Instructions
2021//===----------------------------------------------------------------------===//
2022
2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002024
Simon Pilgrim71660c62017-12-05 14:34:42 +00002025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2026 OpndItins itins> {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002027 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2028 (outs _.KRC:$dst),
2029 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2030 "vcmp${cc}"#_.Suffix,
2031 "$src2, $src1", "$src1, $src2",
2032 (OpNode (_.VT _.RC:$src1),
2033 (_.VT _.RC:$src2),
Simon Pilgrim71660c62017-12-05 14:34:42 +00002034 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00002035 let mayLoad = 1 in
Craig Toppere1cac152016-06-07 07:27:54 +00002036 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2037 (outs _.KRC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00002038 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
Craig Toppere1cac152016-06-07 07:27:54 +00002039 "vcmp${cc}"#_.Suffix,
2040 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00002041 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002042 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2043 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002044
2045 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2046 (outs _.KRC:$dst),
2047 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2048 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002049 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002050 (OpNodeRnd (_.VT _.RC:$src1),
2051 (_.VT _.RC:$src2),
2052 imm:$cc,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002053 (i32 FROUND_NO_EXC)), itins.rr>,
2054 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002055 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002056 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002057 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2058 (outs VK1:$dst),
2059 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2060 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002061 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
2062 Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00002063 let mayLoad = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002064 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2065 (outs _.KRC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00002066 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002067 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002068 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2069 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002071
2072 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2073 (outs _.KRC:$dst),
2074 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2075 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002076 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
2077 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002078 }// let isAsmParserOnly = 1, hasSideEffects = 0
2079
2080 let isCodeGenOnly = 1 in {
Craig Topper225da2c2016-08-27 05:22:15 +00002081 let isCommutable = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002082 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2083 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2084 !strconcat("vcmp${cc}", _.Suffix,
2085 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2086 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2087 _.FRC:$src2,
2088 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00002089 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002090 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2091 (outs _.KRC:$dst),
2092 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2093 !strconcat("vcmp${cc}", _.Suffix,
2094 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2095 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2096 (_.ScalarLdFrag addr:$src2),
2097 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00002098 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2099 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002100 }
2101}
2102
2103let Predicates = [HasAVX512] in {
Craig Topperd890db62017-02-21 04:26:04 +00002104 let ExeDomain = SSEPackedSingle in
Simon Pilgrim71660c62017-12-05 14:34:42 +00002105 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2106 SSE_ALU_F32S>, AVX512XSIi8Base;
Craig Topperd890db62017-02-21 04:26:04 +00002107 let ExeDomain = SSEPackedDouble in
Simon Pilgrim71660c62017-12-05 14:34:42 +00002108 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2109 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002110}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002111
Craig Topper513d3fa2018-01-27 20:19:02 +00002112multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002113 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
Craig Topper392cd032016-09-03 16:28:03 +00002114 let isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002115 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002116 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2117 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2118 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002119 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002120 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002121 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2122 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2123 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2124 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002125 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1d81032017-06-13 07:13:47 +00002126 let isCommutable = IsCommutable in
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002127 def rrk : AVX512BI<opc, MRMSrcReg,
2128 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2129 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2130 "$dst {${mask}}, $src1, $src2}"),
2131 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2132 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002133 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002134 def rmk : AVX512BI<opc, MRMSrcMem,
2135 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2136 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2137 "$dst {${mask}}, $src1, $src2}"),
2138 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2139 (OpNode (_.VT _.RC:$src1),
2140 (_.VT (bitconvert
2141 (_.LdFrag addr:$src2))))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002142 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002143}
2144
Craig Topper513d3fa2018-01-27 20:19:02 +00002145multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002146 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2147 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002148 def rmb : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2150 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2151 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2152 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2153 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002154 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002155 def rmbk : AVX512BI<opc, MRMSrcMem,
2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2157 _.ScalarMemOp:$src2),
2158 !strconcat(OpcodeStr,
2159 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2160 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2161 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2162 (OpNode (_.VT _.RC:$src1),
2163 (X86VBroadcast
2164 (_.ScalarLdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002165 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2166 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002167}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002168
Craig Topper513d3fa2018-01-27 20:19:02 +00002169multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002170 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2171 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002172 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002173 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002174 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002175
2176 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002177 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002178 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002179 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002180 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002181 }
2182}
2183
2184multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
Craig Topper513d3fa2018-01-27 20:19:02 +00002185 PatFrag OpNode, OpndItins itins,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002186 AVX512VLVectorVTInfo VTInfo,
2187 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002188 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002189 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002190 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002191
2192 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002193 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002194 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002195 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002196 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002197 }
2198}
2199
Craig Topper9471a7c2018-02-19 19:23:31 +00002200// This fragment treats X86cmpm as commutable to help match loads in both
2201// operands for PCMPEQ.
2202def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2203 (X86cmpm_c node:$src1, node:$src2, (i8 0))>;
Craig Topper513d3fa2018-01-27 20:19:02 +00002204def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2205 (X86cmpm node:$src1, node:$src2, (i8 6))>;
2206
Simon Pilgrima2b58622017-12-05 12:02:22 +00002207// FIXME: Is there a better scheduler itinerary for VPCMP?
Craig Topper9471a7c2018-02-19 19:23:31 +00002208defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002209 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002210 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002211
Craig Topper9471a7c2018-02-19 19:23:31 +00002212defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002213 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002214 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002215
Craig Topper9471a7c2018-02-19 19:23:31 +00002216defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002217 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002218 EVEX_CD8<32, CD8VF>;
2219
Craig Topper9471a7c2018-02-19 19:23:31 +00002220defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002221 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002222 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2223
2224defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002225 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002226 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002227
2228defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002229 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002230 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002231
Robert Khasanovf70f7982014-09-18 14:06:55 +00002232defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002233 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002234 EVEX_CD8<32, CD8VF>;
2235
Robert Khasanovf70f7982014-09-18 14:06:55 +00002236defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002237 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002238 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002239
Craig Toppera88306e2017-10-10 06:36:46 +00002240// Transforms to swizzle an immediate to help matching memory operand in first
2241// operand.
2242def CommutePCMPCC : SDNodeXForm<imm, [{
2243 uint8_t Imm = N->getZExtValue() & 0x7;
Craig Topper9b64bf52018-02-20 03:58:11 +00002244 Imm = X86::getSwappedVPCMPImm(Imm);
Craig Toppera88306e2017-10-10 06:36:46 +00002245 return getI8Imm(Imm, SDLoc(N));
2246}]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002247
Robert Khasanov29e3b962014-08-27 09:34:37 +00002248multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002249 OpndItins itins, X86VectorVTInfo _> {
Craig Topper149e6bd2016-09-09 01:36:10 +00002250 let isCommutable = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002251 def rri : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002252 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002253 !strconcat("vpcmp${cc}", Suffix,
2254 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002255 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2256 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002257 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002258 def rmi : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002259 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002260 !strconcat("vpcmp${cc}", Suffix,
2261 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002262 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2263 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002264 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002265 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper8b876762017-06-13 07:13:50 +00002266 let isCommutable = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002267 def rrik : AVX512AIi8<opc, MRMSrcReg,
2268 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002269 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002270 !strconcat("vpcmp${cc}", Suffix,
2271 "\t{$src2, $src1, $dst {${mask}}|",
2272 "$dst {${mask}}, $src1, $src2}"),
2273 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2274 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Craig Topper6e3a5822014-12-27 20:08:45 +00002275 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002276 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002277 def rmik : AVX512AIi8<opc, MRMSrcMem,
2278 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002279 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002280 !strconcat("vpcmp${cc}", Suffix,
2281 "\t{$src2, $src1, $dst {${mask}}|",
2282 "$dst {${mask}}, $src1, $src2}"),
2283 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2284 (OpNode (_.VT _.RC:$src1),
2285 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002286 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002287 itins.rm>, EVEX_4V, EVEX_K,
2288 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002289
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002290 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002291 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002292 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002293 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002294 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2295 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002296 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002297 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002298 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002299 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002300 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2301 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002302 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002303 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2304 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002305 u8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00002306 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002307 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2308 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002309 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002310 let mayLoad = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002311 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2312 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002313 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002314 !strconcat("vpcmp", Suffix,
2315 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2316 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002317 [], itins.rm>, EVEX_4V, EVEX_K,
2318 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002319 }
Craig Toppera88306e2017-10-10 06:36:46 +00002320
2321 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2322 (_.VT _.RC:$src1), imm:$cc),
2323 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2324 (CommutePCMPCC imm:$cc))>;
2325
2326 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2327 (_.VT _.RC:$src1), imm:$cc)),
2328 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2329 _.RC:$src1, addr:$src2,
2330 (CommutePCMPCC imm:$cc))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002331}
2332
Robert Khasanov29e3b962014-08-27 09:34:37 +00002333multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002334 OpndItins itins, X86VectorVTInfo _> :
2335 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002336 def rmib : AVX512AIi8<opc, MRMSrcMem,
2337 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002338 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002339 !strconcat("vpcmp${cc}", Suffix,
2340 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2341 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2342 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2343 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002344 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002345 itins.rm>, EVEX_4V, EVEX_B,
2346 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002347 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2348 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002349 _.ScalarMemOp:$src2, AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002350 !strconcat("vpcmp${cc}", Suffix,
2351 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2352 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2353 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2354 (OpNode (_.VT _.RC:$src1),
2355 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002356 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002357 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2358 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002359
Robert Khasanov29e3b962014-08-27 09:34:37 +00002360 // Accept explicit immediate argument form instead of comparison code.
Craig Topper9f4d4852015-01-20 12:15:30 +00002361 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002362 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2363 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002364 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002365 !strconcat("vpcmp", Suffix,
2366 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2367 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002368 [], itins.rm>, EVEX_4V, EVEX_B,
2369 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002370 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2371 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002372 _.ScalarMemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002373 !strconcat("vpcmp", Suffix,
2374 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2375 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002376 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2377 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002378 }
Craig Toppera88306e2017-10-10 06:36:46 +00002379
2380 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2381 (_.VT _.RC:$src1), imm:$cc),
2382 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2383 (CommutePCMPCC imm:$cc))>;
2384
2385 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2386 (_.ScalarLdFrag addr:$src2)),
2387 (_.VT _.RC:$src1), imm:$cc)),
2388 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2389 _.RC:$src1, addr:$src2,
2390 (CommutePCMPCC imm:$cc))>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002391}
2392
2393multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002394 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2395 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002396 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002397 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2398 EVEX_V512;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002399
2400 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002401 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2402 EVEX_V256;
2403 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2404 EVEX_V128;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002405 }
2406}
2407
2408multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002409 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2410 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002411 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002412 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002413 EVEX_V512;
2414
2415 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002416 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002417 EVEX_V256;
Simon Pilgrimaa911552017-12-05 12:14:36 +00002418 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002419 EVEX_V128;
2420 }
2421}
2422
Simon Pilgrimaa911552017-12-05 12:14:36 +00002423// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2424defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2425 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2426defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2427 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002428
Simon Pilgrimaa911552017-12-05 12:14:36 +00002429defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2430 avx512vl_i16_info, HasBWI>,
2431 VEX_W, EVEX_CD8<16, CD8VF>;
2432defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2433 avx512vl_i16_info, HasBWI>,
2434 VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002435
Simon Pilgrimaa911552017-12-05 12:14:36 +00002436defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2437 avx512vl_i32_info, HasAVX512>,
2438 EVEX_CD8<32, CD8VF>;
2439defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2440 avx512vl_i32_info, HasAVX512>,
2441 EVEX_CD8<32, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002442
Simon Pilgrimaa911552017-12-05 12:14:36 +00002443defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2444 avx512vl_i64_info, HasAVX512>,
2445 VEX_W, EVEX_CD8<64, CD8VF>;
2446defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2447 avx512vl_i64_info, HasAVX512>,
2448 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002449
Ayman Musa721d97f2017-06-27 12:08:37 +00002450
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002451multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002452 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2453 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2454 "vcmp${cc}"#_.Suffix,
2455 "$src2, $src1", "$src1, $src2",
2456 (X86cmpm (_.VT _.RC:$src1),
2457 (_.VT _.RC:$src2),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002458 imm:$cc), itins.rr, 1>,
2459 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002460
Craig Toppere1cac152016-06-07 07:27:54 +00002461 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2462 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2463 "vcmp${cc}"#_.Suffix,
2464 "$src2, $src1", "$src1, $src2",
2465 (X86cmpm (_.VT _.RC:$src1),
2466 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002467 imm:$cc), itins.rm>,
2468 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002469
Craig Toppere1cac152016-06-07 07:27:54 +00002470 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2471 (outs _.KRC:$dst),
2472 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2473 "vcmp${cc}"#_.Suffix,
2474 "${src2}"##_.BroadcastStr##", $src1",
2475 "$src1, ${src2}"##_.BroadcastStr,
2476 (X86cmpm (_.VT _.RC:$src1),
2477 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002478 imm:$cc), itins.rm>,
2479 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002480 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002481 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002482 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2483 (outs _.KRC:$dst),
2484 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2485 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002486 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2487 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002488
2489 let mayLoad = 1 in {
2490 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2491 (outs _.KRC:$dst),
2492 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2493 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002494 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2495 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002496
2497 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2498 (outs _.KRC:$dst),
2499 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2500 "vcmp"#_.Suffix,
2501 "$cc, ${src2}"##_.BroadcastStr##", $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002502 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2503 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002504 }
Craig Topper61956982017-09-30 17:02:39 +00002505 }
2506
2507 // Patterns for selecting with loads in other operand.
2508 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2509 CommutableCMPCC:$cc),
2510 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2511 imm:$cc)>;
2512
2513 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2514 (_.VT _.RC:$src1),
2515 CommutableCMPCC:$cc)),
2516 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2517 _.RC:$src1, addr:$src2,
2518 imm:$cc)>;
2519
2520 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2521 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2522 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2523 imm:$cc)>;
2524
2525 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2526 (_.ScalarLdFrag addr:$src2)),
2527 (_.VT _.RC:$src1),
2528 CommutableCMPCC:$cc)),
2529 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2530 _.RC:$src1, addr:$src2,
2531 imm:$cc)>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002532}
2533
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002534multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002535 // comparison code form (VCMP[EQ/LT/LE/...]
2536 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2537 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2538 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002539 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002540 (X86cmpmRnd (_.VT _.RC:$src1),
2541 (_.VT _.RC:$src2),
2542 imm:$cc,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002543 (i32 FROUND_NO_EXC)), itins.rr>,
2544 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002545
2546 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2547 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2548 (outs _.KRC:$dst),
2549 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2550 "vcmp"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002551 "$cc, {sae}, $src2, $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002552 "$src1, $src2, {sae}, $cc", itins.rr>,
2553 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002554 }
2555}
2556
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002557multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002558 let Predicates = [HasAVX512] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002559 defm Z : avx512_vcmp_common<itins, _.info512>,
2560 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002561
2562 }
2563 let Predicates = [HasAVX512,HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002564 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2565 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002566 }
2567}
2568
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002569defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002570 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002571defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002572 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002573
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00002574
Craig Topper61956982017-09-30 17:02:39 +00002575// Patterns to select fp compares with load as first operand.
2576let Predicates = [HasAVX512] in {
2577 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2578 CommutableCMPCC:$cc)),
2579 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2580
2581 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2582 CommutableCMPCC:$cc)),
2583 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2584}
2585
Asaf Badouh572bbce2015-09-20 08:46:07 +00002586// ----------------------------------------------------------------
2587// FPClass
Asaf Badouh696e8e02015-10-18 11:04:38 +00002588//handle fpclass instruction mask = op(reg_scalar,imm)
2589// op(mem_scalar,imm)
2590multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002591 OpndItins itins, X86VectorVTInfo _,
2592 Predicate prd> {
Craig Topper4a638432017-11-11 06:57:44 +00002593 let Predicates = [prd], ExeDomain = _.ExeDomain in {
Craig Topper702097d2017-08-20 18:30:24 +00002594 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
Asaf Badouh696e8e02015-10-18 11:04:38 +00002595 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002596 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh696e8e02015-10-18 11:04:38 +00002597 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002598 (i32 imm:$src2)))], itins.rr>,
2599 Sched<[itins.Sched]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002600 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2601 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2602 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002603 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002604 [(set _.KRC:$dst,(and _.KRCWM:$mask,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002605 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002606 (i32 imm:$src2))))], itins.rr>,
2607 EVEX_K, Sched<[itins.Sched]>;
Craig Topper63801df2017-02-19 21:44:35 +00002608 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002609 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002610 OpcodeStr##_.Suffix##
2611 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2612 [(set _.KRC:$dst,
Craig Topperca8abed2017-11-13 06:46:48 +00002613 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002614 (i32 imm:$src2)))], itins.rm>,
2615 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper63801df2017-02-19 21:44:35 +00002616 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002617 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002618 OpcodeStr##_.Suffix##
2619 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002620 [(set _.KRC:$dst,(and _.KRCWM:$mask,
Craig Topperca8abed2017-11-13 06:46:48 +00002621 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002622 (i32 imm:$src2))))], itins.rm>,
2623 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002624 }
2625}
2626
Asaf Badouh572bbce2015-09-20 08:46:07 +00002627//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2628// fpclass(reg_vec, mem_vec, imm)
2629// fpclass(reg_vec, broadcast(eltVt), imm)
2630multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002631 OpndItins itins, X86VectorVTInfo _,
2632 string mem, string broadcast>{
Craig Topper4a638432017-11-11 06:57:44 +00002633 let ExeDomain = _.ExeDomain in {
Asaf Badouh572bbce2015-09-20 08:46:07 +00002634 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2635 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002636 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh572bbce2015-09-20 08:46:07 +00002637 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002638 (i32 imm:$src2)))], itins.rr>,
2639 Sched<[itins.Sched]>;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002640 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2641 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2642 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002643 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002644 [(set _.KRC:$dst,(and _.KRCWM:$mask,
Asaf Badouh572bbce2015-09-20 08:46:07 +00002645 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002646 (i32 imm:$src2))))], itins.rr>,
2647 EVEX_K, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002648 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2649 (ins _.MemOp:$src1, i32u8imm:$src2),
2650 OpcodeStr##_.Suffix##mem#
2651 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002652 [(set _.KRC:$dst,(OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002653 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002654 (i32 imm:$src2)))], itins.rm>,
2655 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002656 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2657 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2658 OpcodeStr##_.Suffix##mem#
2659 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002660 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002661 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002662 (i32 imm:$src2))))], itins.rm>,
2663 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002664 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2665 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2666 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2667 _.BroadcastStr##", $dst|$dst, ${src1}"
2668 ##_.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002669 [(set _.KRC:$dst,(OpNode
2670 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002671 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002672 (i32 imm:$src2)))], itins.rm>,
2673 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002674 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2675 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2676 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2677 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2678 _.BroadcastStr##", $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002679 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002680 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002681 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002682 (i32 imm:$src2))))], itins.rm>,
2683 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper4a638432017-11-11 06:57:44 +00002684 }
Asaf Badouh572bbce2015-09-20 08:46:07 +00002685}
2686
Simon Pilgrim54c60832017-12-01 16:51:48 +00002687multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2688 bits<8> opc, SDNode OpNode,
2689 OpndItins itins, Predicate prd,
2690 string broadcast>{
Asaf Badouh572bbce2015-09-20 08:46:07 +00002691 let Predicates = [prd] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002692 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2693 _.info512, "{z}", broadcast>, EVEX_V512;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002694 }
2695 let Predicates = [prd, HasVLX] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002696 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2697 _.info128, "{x}", broadcast>, EVEX_V128;
2698 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2699 _.info256, "{y}", broadcast>, EVEX_V256;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002700 }
2701}
2702
Simon Pilgrim54c60832017-12-01 16:51:48 +00002703// FIXME: Is there a better scheduler itinerary for VFPCLASS?
Asaf Badouh572bbce2015-09-20 08:46:07 +00002704multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002705 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002706 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002707 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2708 EVEX_CD8<32, CD8VF>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002709 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002710 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2711 EVEX_CD8<64, CD8VF> , VEX_W;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002712 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002713 SSE_ALU_F32S, f32x_info, prd>,
2714 EVEX_CD8<32, CD8VT1>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002715 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002716 SSE_ALU_F64S, f64x_info, prd>,
2717 EVEX_CD8<64, CD8VT1>, VEX_W;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002718}
2719
Asaf Badouh696e8e02015-10-18 11:04:38 +00002720defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2721 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002722
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002723//-----------------------------------------------------------------
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002724// Mask register copy, including
2725// - copy between mask registers
2726// - load/store mask registers
2727// - copy from GPR to mask register and vice versa
2728//
2729multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2730 string OpcodeStr, RegisterClass KRC,
Elena Demikhovskyba846722015-02-17 09:20:12 +00002731 ValueType vvt, X86MemOperand x86memop> {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002732 let hasSideEffects = 0, SchedRW = [WriteMove] in
Craig Toppere1cac152016-06-07 07:27:54 +00002733 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002734 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrim07e13372018-02-12 16:59:04 +00002735 IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002736 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2737 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim07e13372018-02-12 16:59:04 +00002738 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>,
2739 Sched<[WriteLoad]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002740 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2741 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim07e13372018-02-12 16:59:04 +00002742 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>,
2743 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002744}
2745
2746multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2747 string OpcodeStr,
2748 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002749 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002750 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002751 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2752 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002753 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002754 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2755 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002756 }
2757}
2758
Robert Khasanov74acbb72014-07-23 14:49:42 +00002759let Predicates = [HasDQI] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002760 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002761 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2762 VEX, PD;
2763
2764let Predicates = [HasAVX512] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002765 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002766 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002767 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002768
2769let Predicates = [HasBWI] in {
Elena Demikhovskyba846722015-02-17 09:20:12 +00002770 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2771 VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002772 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2773 VEX, XD;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002774 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2775 VEX, PS, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002776 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2777 VEX, XD, VEX_W;
2778}
2779
2780// GR from/to mask register
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002781def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002782 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002783def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002784 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002785
2786def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002787 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002788def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002789 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002790
2791def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002792 (KMOVWrk VK16:$src)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002793def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002794 (COPY_TO_REGCLASS VK16:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002795
2796def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002797 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002798def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002799 (COPY_TO_REGCLASS VK8:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002800
2801def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2802 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2803def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2804 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2805def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2806 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2807def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2808 (COPY_TO_REGCLASS VK64:$src, GR64)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002809
Robert Khasanov74acbb72014-07-23 14:49:42 +00002810// Load/store kreg
2811let Predicates = [HasDQI] in {
Igor Bregerd6c187b2016-01-27 08:43:25 +00002812 def : Pat<(store VK1:$src, addr:$dst),
2813 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002814
Craig Topperbe315852018-03-04 01:48:00 +00002815 def : Pat<(v1i1 (load addr:$src)),
2816 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002817 def : Pat<(v2i1 (load addr:$src)),
2818 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2819 def : Pat<(v4i1 (load addr:$src)),
2820 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002821}
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002822
Robert Khasanov74acbb72014-07-23 14:49:42 +00002823let Predicates = [HasAVX512] in {
Craig Topper876ec0b2017-12-31 07:38:41 +00002824 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2825 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002826}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00002827
Robert Khasanov74acbb72014-07-23 14:49:42 +00002828let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002829 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2830 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2831 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002832
Guy Blank548e22a2017-05-19 12:35:15 +00002833 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2834 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
Guy Blank548e22a2017-05-19 12:35:15 +00002835 }
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002836
Guy Blank548e22a2017-05-19 12:35:15 +00002837 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2838 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2839 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2840 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2841 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2842 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2843 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
Elena Demikhovskyb906df92016-09-13 07:57:00 +00002844
Craig Topper26a701f2018-01-23 05:36:53 +00002845 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2846 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
Guy Blank548e22a2017-05-19 12:35:15 +00002847 (COPY_TO_REGCLASS
Craig Topper26a701f2018-01-23 05:36:53 +00002848 (KMOVWkr (AND32ri8
2849 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2850 (i32 1))), VK16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002851}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002852
2853// Mask unary operation
2854// - KNOT
2855multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002856 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002857 OpndItins itins, Predicate prd> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002858 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002859 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00002860 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002861 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2862 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002863}
2864
Robert Khasanov74acbb72014-07-23 14:49:42 +00002865multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002866 SDPatternOperator OpNode, OpndItins itins> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002867 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002868 itins, HasDQI>, VEX, PD;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002869 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002870 itins, HasAVX512>, VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002871 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002872 itins, HasBWI>, VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002873 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002874 itins, HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002875}
2876
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002877defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002878
Robert Khasanov74acbb72014-07-23 14:49:42 +00002879// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
Craig Topper7b9cc142016-11-03 06:04:28 +00002880let Predicates = [HasAVX512, NoDQI] in
2881def : Pat<(vnot VK8:$src),
2882 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2883
2884def : Pat<(vnot VK4:$src),
2885 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2886def : Pat<(vnot VK2:$src),
2887 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002888
2889// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00002890// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002891multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00002892 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002893 OpndItins itins, Predicate prd, bit IsCommutable> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002894 let Predicates = [prd], isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002895 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2896 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002897 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002898 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2899 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002900}
2901
Robert Khasanov595683d2014-07-28 13:46:45 +00002902multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002903 SDPatternOperator OpNode, OpndItins itins,
2904 bit IsCommutable, Predicate prdW = HasAVX512> {
Robert Khasanov595683d2014-07-28 13:46:45 +00002905 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002906 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002907 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002908 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
Robert Khasanov595683d2014-07-28 13:46:45 +00002909 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002910 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002911 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002912 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002913}
2914
2915def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2916def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002917// These nodes use 'vnot' instead of 'not' to support vectors.
2918def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2919def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002920
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002921defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2922defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2923defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2924defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2925defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
Craig Topper3ce035a2018-02-12 01:33:38 +00002926defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SSE_BIT_ITINS_P, 1, HasDQI>;
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00002927
Craig Topper7b9cc142016-11-03 06:04:28 +00002928multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2929 Instruction Inst> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002930 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2931 // for the DQI set, this type is legal and KxxxB instruction is used
2932 let Predicates = [NoDQI] in
Craig Topper7b9cc142016-11-03 06:04:28 +00002933 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002934 (COPY_TO_REGCLASS
2935 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2936 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2937
2938 // All types smaller than 8 bits require conversion anyway
2939 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2940 (COPY_TO_REGCLASS (Inst
2941 (COPY_TO_REGCLASS VK1:$src1, VK16),
2942 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002943 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002944 (COPY_TO_REGCLASS (Inst
2945 (COPY_TO_REGCLASS VK2:$src1, VK16),
2946 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002947 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002948 (COPY_TO_REGCLASS (Inst
2949 (COPY_TO_REGCLASS VK4:$src1, VK16),
2950 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002951}
2952
Craig Topper7b9cc142016-11-03 06:04:28 +00002953defm : avx512_binop_pat<and, and, KANDWrr>;
2954defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2955defm : avx512_binop_pat<or, or, KORWrr>;
2956defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2957defm : avx512_binop_pat<xor, xor, KXORWrr>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002958
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002959// Mask unpacking
Igor Bregera54a1a82015-09-08 13:10:00 +00002960multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002961 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
Igor Bregera54a1a82015-09-08 13:10:00 +00002962 let Predicates = [prd] in {
Craig Topperad2ce362016-01-05 07:44:08 +00002963 let hasSideEffects = 0 in
Igor Bregera54a1a82015-09-08 13:10:00 +00002964 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2965 (ins KRC:$src1, KRC:$src2),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002966 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2967 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
Igor Bregera54a1a82015-09-08 13:10:00 +00002968
2969 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2970 (!cast<Instruction>(NAME##rr)
2971 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2972 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2973 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002974}
2975
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002976defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2977defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2978defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002979
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002980// Mask bit testing
2981multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002982 SDNode OpNode, OpndItins itins, Predicate prd> {
Igor Breger5ea0a6812015-08-31 13:30:19 +00002983 let Predicates = [prd], Defs = [EFLAGS] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002984 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Craig Topperedb09112014-11-25 20:11:23 +00002985 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002986 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2987 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002988}
2989
Igor Breger5ea0a6812015-08-31 13:30:19 +00002990multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002991 OpndItins itins, Predicate prdW = HasAVX512> {
2992 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002993 VEX, PD;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002994 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002995 VEX, PS;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002996 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002997 VEX, PS, VEX_W;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002998 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002999 VEX, PD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003000}
3001
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003002defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
3003defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003004
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003005// Mask shift
3006multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003007 SDNode OpNode, OpndItins itins> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003008 let Predicates = [HasAVX512] in
Craig Topper7ff6ab32015-01-21 08:43:49 +00003009 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003010 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00003011 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003012 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
3013 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003014}
3015
3016multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003017 SDNode OpNode, OpndItins itins> {
3018 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3019 itins>, VEX, TAPD, VEX_W;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00003020 let Predicates = [HasDQI] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003021 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3022 itins>, VEX, TAPD;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00003023 let Predicates = [HasBWI] in {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003024 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3025 itins>, VEX, TAPD, VEX_W;
3026 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3027 itins>, VEX, TAPD;
Michael Liao66233b72015-08-06 09:06:20 +00003028 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003029}
3030
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003031defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
3032defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003033
Craig Topper513d3fa2018-01-27 20:19:02 +00003034multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00003035 X86VectorVTInfo Narrow,
3036 X86VectorVTInfo Wide> {
Craig Topper5e4b4532018-01-27 23:49:14 +00003037 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00003038 (Narrow.VT Narrow.RC:$src2))),
3039 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00003040 (!cast<Instruction>(InstStr#"Zrr")
Craig Topperd58c1652018-01-07 18:20:37 +00003041 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3042 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3043 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003044
Craig Topper5e4b4532018-01-27 23:49:14 +00003045 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3046 (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00003047 (Narrow.VT Narrow.RC:$src2)))),
Craig Toppereb5c4112017-09-24 05:24:52 +00003048 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00003049 (!cast<Instruction>(InstStr#"Zrrk")
Craig Topperd58c1652018-01-07 18:20:37 +00003050 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3051 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3052 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3053 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003054}
3055
3056multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00003057 X86VectorVTInfo Narrow,
3058 X86VectorVTInfo Wide> {
3059def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3060 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3061 (COPY_TO_REGCLASS
3062 (!cast<Instruction>(InstStr##Zrri)
3063 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3065 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003066
Craig Topperd58c1652018-01-07 18:20:37 +00003067def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3068 (OpNode (Narrow.VT Narrow.RC:$src1),
3069 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3070 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3071 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3072 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3073 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3074 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003075}
3076
3077let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003078 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003079 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003080
Craig Topperd58c1652018-01-07 18:20:37 +00003081 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003082 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003083
3084 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003085 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003086
3087 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003088 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003089
3090 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3091 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3092 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v8i32x_info, v16i32_info>;
3093
3094 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3095 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3096 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v4i32x_info, v16i32_info>;
3097
3098 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3099 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3100 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v4i64x_info, v8i64_info>;
3101
3102 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3103 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3104 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v2i64x_info, v8i64_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003105}
3106
Craig Toppera2018e792018-01-08 06:53:52 +00003107let Predicates = [HasBWI, NoVLX] in {
3108 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003109 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003110
3111 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003112 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003113
3114 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003115 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003116
3117 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003118 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003119
3120 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3121 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
3122
3123 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v16i8x_info, v64i8_info>;
3125
3126 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3127 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v16i16x_info, v32i16_info>;
3128
3129 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3130 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v8i16x_info, v32i16_info>;
3131}
3132
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003133// Mask setting all 0s or 1s
3134multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3135 let Predicates = [HasAVX512] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003136 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3137 SchedRW = [WriteZero] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003138 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3139 [(set KRC:$dst, (VT Val))]>;
3140}
3141
3142multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003143 defm W : avx512_mask_setop<VK16, v16i1, Val>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003144 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3145 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003146}
3147
3148defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3149defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3150
3151// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3152let Predicates = [HasAVX512] in {
3153 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
Igor Breger86724082016-08-14 05:25:07 +00003154 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3155 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003156 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003157 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003158 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3159 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003160 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003161}
Igor Bregerf1bd7612016-03-06 07:46:03 +00003162
3163// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3164multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3165 RegisterClass RC, ValueType VT> {
3166 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3167 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003168
Igor Bregerf1bd7612016-03-06 07:46:03 +00003169 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003170 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003171}
Guy Blank548e22a2017-05-19 12:35:15 +00003172defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3173defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3174defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3175defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3176defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3177defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003178
3179defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3180defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3181defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3182defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3183defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3184
3185defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3186defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3187defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3188defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3189
3190defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3191defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3192defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3193
3194defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3195defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3196
3197defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003198
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003199//===----------------------------------------------------------------------===//
3200// AVX-512 - Aligned and unaligned load and store
3201//
3202
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003203
Simon Pilgrimdf052512017-12-06 17:59:26 +00003204multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3205 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3206 bit NoRMPattern = 0,
3207 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003208 let hasSideEffects = 0 in {
3209 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003210 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003211 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003212 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3213 (ins _.KRCWM:$mask, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003214 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
Simon Pilgrim18bcf932016-02-03 09:41:59 +00003215 "${dst} {${mask}} {z}, $src}"),
Craig Topper5c46c752017-01-08 05:46:21 +00003216 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
Igor Breger7a000f52016-01-21 14:18:11 +00003217 (_.VT _.RC:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003218 _.ImmAllZerosV)))], _.ExeDomain,
3219 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003220
Simon Pilgrimdf052512017-12-06 17:59:26 +00003221 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003222 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003223 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Toppercb0e7492017-07-31 17:35:44 +00003224 !if(NoRMPattern, [],
3225 [(set _.RC:$dst,
3226 (_.VT (bitconvert (ld_frag addr:$src))))]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003227 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003228
Craig Topper63e2cd62017-01-14 07:50:52 +00003229 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003230 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3231 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3232 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3233 "${dst} {${mask}}, $src1}"),
3234 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3235 (_.VT _.RC:$src1),
3236 (_.VT _.RC:$src0))))], _.ExeDomain,
3237 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003238 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3239 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003240 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3241 "${dst} {${mask}}, $src1}"),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003242 [(set _.RC:$dst, (_.VT
3243 (vselect _.KRCWM:$mask,
3244 (_.VT (bitconvert (ld_frag addr:$src1))),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003245 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3246 EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003247 }
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003248 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3249 (ins _.KRCWM:$mask, _.MemOp:$src),
3250 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3251 "${dst} {${mask}} {z}, $src}",
3252 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3253 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003254 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003255 }
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003256 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3257 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3258
3259 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3260 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3261
3262 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3263 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3264 _.KRCWM:$mask, addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003265}
3266
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003267multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3268 AVX512VLVectorVTInfo _,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003269 Predicate prd,
3270 bit NoRMPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003271 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003272 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003273 _.info512.AlignedLdFrag, masked_load_aligned512,
3274 NoRMPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003275
3276 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003277 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003278 _.info256.AlignedLdFrag, masked_load_aligned256,
3279 NoRMPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003280 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003281 _.info128.AlignedLdFrag, masked_load_aligned128,
3282 NoRMPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003283 }
3284}
3285
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003286multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3287 AVX512VLVectorVTInfo _,
3288 Predicate prd,
Craig Toppercb0e7492017-07-31 17:35:44 +00003289 bit NoRMPattern = 0,
Craig Topperc9293492016-02-26 06:50:29 +00003290 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003291 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003292 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003293 masked_load_unaligned, NoRMPattern,
3294 SelectOprr>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003295
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003296 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003297 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003298 masked_load_unaligned, NoRMPattern,
3299 SelectOprr>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003300 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003301 masked_load_unaligned, NoRMPattern,
3302 SelectOprr>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003303 }
3304}
3305
Simon Pilgrimdf052512017-12-06 17:59:26 +00003306multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3307 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3308 string Name, bit NoMRPattern = 0> {
Craig Topper99f6b622016-05-01 01:03:56 +00003309 let hasSideEffects = 0 in {
Igor Breger81b79de2015-11-19 07:43:43 +00003310 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3311 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003312 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3313 Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003314 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3315 (ins _.KRCWM:$mask, _.RC:$src),
3316 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3317 "${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003318 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3319 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003320 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003321 (ins _.KRCWM:$mask, _.RC:$src),
Igor Breger81b79de2015-11-19 07:43:43 +00003322 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003323 "${dst} {${mask}} {z}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003324 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3325 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
Craig Topper99f6b622016-05-01 01:03:56 +00003326 }
Igor Breger81b79de2015-11-19 07:43:43 +00003327
Craig Topper2462a712017-08-01 15:31:24 +00003328 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003329 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003330 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topper2462a712017-08-01 15:31:24 +00003331 !if(NoMRPattern, [],
3332 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003333 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003334 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003335 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3336 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003337 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003338
3339 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3340 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3341 _.KRCWM:$mask, _.RC:$src)>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003342}
3343
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003344
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003345multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003346 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper2462a712017-08-01 15:31:24 +00003347 string Name, bit NoMRPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003348 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003349 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
Craig Topper2462a712017-08-01 15:31:24 +00003350 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003351
3352 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003353 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
Craig Topper2462a712017-08-01 15:31:24 +00003354 masked_store_unaligned, Name#Z256,
3355 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003356 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
Craig Topper2462a712017-08-01 15:31:24 +00003357 masked_store_unaligned, Name#Z128,
3358 NoMRPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003359 }
3360}
3361
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003362multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003363 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper83b0a982018-01-18 07:44:09 +00003364 string Name, bit NoMRPattern = 0> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003365 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003366 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003367 masked_store_aligned512, Name#Z,
3368 NoMRPattern>, EVEX_V512;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003369
3370 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003371 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003372 masked_store_aligned256, Name#Z256,
3373 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003374 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003375 masked_store_aligned128, Name#Z128,
3376 NoMRPattern>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003377 }
3378}
3379
3380defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3381 HasAVX512>,
3382 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003383 HasAVX512, "VMOVAPS">,
3384 PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003385
3386defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3387 HasAVX512>,
3388 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003389 HasAVX512, "VMOVAPD">,
3390 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003391
Craig Topperc9293492016-02-26 06:50:29 +00003392defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003393 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003394 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3395 "VMOVUPS">,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003396 PS, EVEX_CD8<32, CD8VF>;
3397
Craig Topper4e7b8882016-10-03 02:00:29 +00003398defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003399 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003400 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3401 "VMOVUPD">,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003402 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003403
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003404defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003405 HasAVX512, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003406 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003407 HasAVX512, "VMOVDQA32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003408 PD, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003409
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003410defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3411 HasAVX512>,
3412 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003413 HasAVX512, "VMOVDQA64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003414 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003415
Craig Toppercb0e7492017-07-31 17:35:44 +00003416defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003417 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
Craig Topper2462a712017-08-01 15:31:24 +00003418 HasBWI, "VMOVDQU8", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003419 XD, EVEX_CD8<8, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003420
Craig Toppercb0e7492017-07-31 17:35:44 +00003421defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003422 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
Craig Topper2462a712017-08-01 15:31:24 +00003423 HasBWI, "VMOVDQU16", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003424 XD, VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003425
Craig Topperc9293492016-02-26 06:50:29 +00003426defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003427 1, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003428 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003429 HasAVX512, "VMOVDQU32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003430 XS, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003431
Craig Topperc9293492016-02-26 06:50:29 +00003432defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003433 0, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003434 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003435 HasAVX512, "VMOVDQU64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003436 XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00003437
Craig Topperd875d6b2016-09-29 06:07:09 +00003438// Special instructions to help with spilling when we don't have VLX. We need
3439// to load or store from a ZMM register instead. These are converted in
3440// expandPostRAPseudos.
Craig Toppereab23d32016-10-03 02:22:33 +00003441let isReMaterializable = 1, canFoldAsLoad = 1,
Craig Topperd875d6b2016-09-29 06:07:09 +00003442 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3443def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003444 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003445def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003446 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003447def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003448 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003449def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003450 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003451}
3452
Simon Pilgrimdf052512017-12-06 17:59:26 +00003453let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
Craig Topperf3e671e2016-09-30 05:35:47 +00003454def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003455 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003456def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003457 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003458def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003459 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003460def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003461 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003462}
3463
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003464def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003465 (v8i64 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003466 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003467 VK8), VR512:$src)>;
3468
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003469def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003470 (v16i32 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003471 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyf1de34b2014-12-04 09:40:44 +00003472
Craig Topper33c550c2016-05-22 00:39:30 +00003473// These patterns exist to prevent the above patterns from introducing a second
3474// mask inversion when one already exists.
3475def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3476 (bc_v8i64 (v16i32 immAllZerosV)),
3477 (v8i64 VR512:$src))),
3478 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3479def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3480 (v16i32 immAllZerosV),
3481 (v16i32 VR512:$src))),
3482 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3483
Craig Topperfc3ce492018-01-01 01:11:29 +00003484multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3485 X86VectorVTInfo Wide> {
3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3487 Narrow.RC:$src1, Narrow.RC:$src0)),
3488 (EXTRACT_SUBREG
3489 (Wide.VT
3490 (!cast<Instruction>(InstrStr#"rrk")
3491 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3492 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3493 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3494 Narrow.SubRegIdx)>;
3495
3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3497 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3498 (EXTRACT_SUBREG
3499 (Wide.VT
3500 (!cast<Instruction>(InstrStr#"rrkz")
3501 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3502 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3503 Narrow.SubRegIdx)>;
3504}
3505
Craig Topper96ab6fd2017-01-09 04:19:34 +00003506// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3507// available. Use a 512-bit operation and extract.
3508let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003509 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3510 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
Craig Topperfc3ce492018-01-01 01:11:29 +00003511 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3512 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003513
3514 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3515 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3516 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3517 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
Craig Topper96ab6fd2017-01-09 04:19:34 +00003518}
3519
Craig Toppere9fc0cd2018-01-14 02:05:51 +00003520let Predicates = [HasBWI, NoVLX] in {
3521 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3522 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3523
3524 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3525 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3526}
3527
Craig Topper2462a712017-08-01 15:31:24 +00003528let Predicates = [HasAVX512] in {
3529 // 512-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003530 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3531 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003532 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003533 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003534 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003535 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3536 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3537 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003538 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003539 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003540 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003541 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003542}
3543
3544let Predicates = [HasVLX] in {
3545 // 128-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003546 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3547 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003548 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003549 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003550 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003551 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3552 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3553 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003554 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003555 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003556 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003557 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper14aa2662016-08-11 06:04:04 +00003558
Craig Topper2462a712017-08-01 15:31:24 +00003559 // 256-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003560 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3561 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003562 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003563 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003564 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003565 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3566 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3567 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003568 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003569 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003570 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003571 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper95bdabd2016-05-22 23:44:33 +00003572}
3573
Craig Topper80075a52017-08-27 19:03:36 +00003574multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3575 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3576 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3577 (bitconvert
3578 (To.VT (extract_subvector
3579 (From.VT From.RC:$src), (iPTR 0)))),
3580 To.RC:$src0)),
3581 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3582 Cast.RC:$src0, Cast.KRCWM:$mask,
3583 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3584
3585 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3586 (bitconvert
3587 (To.VT (extract_subvector
3588 (From.VT From.RC:$src), (iPTR 0)))),
3589 Cast.ImmAllZerosV)),
3590 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3591 Cast.KRCWM:$mask,
3592 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3593}
3594
3595
Craig Topperd27386a2017-08-25 23:34:59 +00003596let Predicates = [HasVLX] in {
3597// A masked extract from the first 128-bits of a 256-bit vector can be
3598// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003599defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3600defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3601defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3602defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3603defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3604defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3605defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3606defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3607defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3608defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3609defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3610defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003611
3612// A masked extract from the first 128-bits of a 512-bit vector can be
3613// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003614defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3615defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3616defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3617defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3618defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3619defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3620defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3621defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3622defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3623defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3624defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3625defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003626
3627// A masked extract from the first 256-bits of a 512-bit vector can be
3628// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003629defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3630defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3631defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3632defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3633defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3634defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3635defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3636defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3637defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3638defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3639defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3640defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003641}
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003642
3643// Move Int Doubleword to Packed Double Int
3644//
3645let ExeDomain = SSEPackedInt in {
3646def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3647 "vmovd\t{$src, $dst|$dst, $src}",
3648 [(set VR128X:$dst,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003649 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003650 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003651def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003652 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003653 [(set VR128X:$dst,
3654 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003655 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003656def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003657 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003658 [(set VR128X:$dst,
3659 (v2i64 (scalar_to_vector GR64:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003660 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperc648c9b2015-12-28 06:11:42 +00003661let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3662def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3663 (ins i64mem:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003664 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3665 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00003666let isCodeGenOnly = 1 in {
Craig Topperaf88afb2015-12-28 06:11:45 +00003667def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003668 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003669 [(set FR64X:$dst, (bitconvert GR64:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003670 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper5971b542017-02-12 18:47:44 +00003671def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3672 "vmovq\t{$src, $dst|$dst, $src}",
3673 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003674 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003675def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003676 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003677 [(set GR64:$dst, (bitconvert FR64X:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003678 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003679def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003680 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003681 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003682 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3683 EVEX_CD8<64, CD8VT1>;
3684}
3685} // ExeDomain = SSEPackedInt
3686
3687// Move Int Doubleword to Single Scalar
3688//
3689let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3690def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3691 "vmovd\t{$src, $dst|$dst, $src}",
3692 [(set FR32X:$dst, (bitconvert GR32:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003693 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003694
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003695def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003696 "vmovd\t{$src, $dst|$dst, $src}",
3697 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003698 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003699} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3700
3701// Move doubleword from xmm register to r/m32
3702//
3703let ExeDomain = SSEPackedInt in {
3704def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3705 "vmovd\t{$src, $dst|$dst, $src}",
3706 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003707 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003708 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003709def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003710 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003711 "vmovd\t{$src, $dst|$dst, $src}",
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003712 [(store (i32 (extractelt (v4i32 VR128X:$src),
3713 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003714 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003715} // ExeDomain = SSEPackedInt
3716
3717// Move quadword from xmm1 register to r/m64
3718//
3719let ExeDomain = SSEPackedInt in {
3720def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3721 "vmovq\t{$src, $dst|$dst, $src}",
3722 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003723 (iPTR 0)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003724 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003725 Requires<[HasAVX512, In64BitMode]>;
3726
Craig Topperc648c9b2015-12-28 06:11:42 +00003727let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3728def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3729 "vmovq\t{$src, $dst|$dst, $src}",
Simon Pilgrim75673942017-12-06 11:23:13 +00003730 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003731 Requires<[HasAVX512, In64BitMode]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003732
Craig Topperc648c9b2015-12-28 06:11:42 +00003733def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3734 (ins i64mem:$dst, VR128X:$src),
3735 "vmovq\t{$src, $dst|$dst, $src}",
3736 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3737 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topper401675c2015-12-28 06:32:47 +00003738 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003739 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3740
3741let hasSideEffects = 0 in
3742def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003743 (ins VR128X:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003744 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3745 EVEX, VEX_W, Sched<[WriteMove]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003746} // ExeDomain = SSEPackedInt
3747
3748// Move Scalar Single to Double Int
3749//
3750let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3751def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3752 (ins FR32X:$src),
3753 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003754 [(set GR32:$dst, (bitconvert FR32X:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003755 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003756def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003757 (ins i32mem:$dst, FR32X:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003758 "vmovd\t{$src, $dst|$dst, $src}",
3759 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
Simon Pilgrim75673942017-12-06 11:23:13 +00003760 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003761} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3762
3763// Move Quadword Int to Packed Quadword Int
3764//
3765let ExeDomain = SSEPackedInt in {
3766def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3767 (ins i64mem:$src),
3768 "vmovq\t{$src, $dst|$dst, $src}",
3769 [(set VR128X:$dst,
3770 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003771 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003772} // ExeDomain = SSEPackedInt
3773
Craig Topper29476ab2018-01-05 21:57:23 +00003774// Allow "vmovd" but print "vmovq".
3775def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3776 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3777def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3778 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3779
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003780//===----------------------------------------------------------------------===//
3781// AVX-512 MOVSS, MOVSD
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003782//===----------------------------------------------------------------------===//
3783
Craig Topperc7de3a12016-07-29 02:49:08 +00003784multiclass avx512_move_scalar<string asm, SDNode OpNode,
Asaf Badouh41ecf462015-12-06 13:26:56 +00003785 X86VectorVTInfo _> {
Craig Topperc7de3a12016-07-29 02:49:08 +00003786 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003787 (ins _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003788 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Craig Topper6fb55712017-10-04 17:20:12 +00003789 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003790 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003791 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003792 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003793 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3794 "$dst {${mask}} {z}, $src1, $src2}"),
3795 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003796 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003797 _.ImmAllZerosV)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003798 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003799 let Constraints = "$src0 = $dst" in
3800 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003801 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003802 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3803 "$dst {${mask}}, $src1, $src2}"),
3804 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003805 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003806 (_.VT _.RC:$src0))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003807 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
Craig Toppere4f868e2016-07-29 06:06:04 +00003808 let canFoldAsLoad = 1, isReMaterializable = 1 in
Craig Topperc7de3a12016-07-29 02:49:08 +00003809 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3810 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3811 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003812 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003813 let mayLoad = 1, hasSideEffects = 0 in {
3814 let Constraints = "$src0 = $dst" in
3815 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3816 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3817 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3818 "$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003819 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003820 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3821 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3822 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3823 "$dst {${mask}} {z}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003824 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Asaf Badouh41ecf462015-12-06 13:26:56 +00003825 }
Craig Toppere1cac152016-06-07 07:27:54 +00003826 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3827 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3828 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003829 EVEX, Sched<[WriteStore]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003830 let mayStore = 1, hasSideEffects = 0 in
Craig Toppere1cac152016-06-07 07:27:54 +00003831 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3832 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3833 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003834 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003835}
3836
Asaf Badouh41ecf462015-12-06 13:26:56 +00003837defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3838 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003839
Asaf Badouh41ecf462015-12-06 13:26:56 +00003840defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3841 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003842
Ayman Musa46af8f92016-11-13 14:29:32 +00003843
3844multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3845 PatLeaf ZeroFP, X86VectorVTInfo _> {
3846
3847def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003848 (_.VT (scalar_to_vector
Craig Topper7bcac492018-02-24 00:15:05 +00003849 (_.EltVT (X86selects VK1WM:$mask,
Ayman Musa46af8f92016-11-13 14:29:32 +00003850 (_.EltVT _.FRC:$src1),
3851 (_.EltVT _.FRC:$src2))))))),
Craig Topper00230602017-10-01 23:53:50 +00003852 (!cast<Instruction>(InstrStr#rrk)
3853 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
Craig Topper7bcac492018-02-24 00:15:05 +00003854 VK1WM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003855 (_.VT _.RC:$src0),
3856 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003857
3858def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003859 (_.VT (scalar_to_vector
Craig Topper7bcac492018-02-24 00:15:05 +00003860 (_.EltVT (X86selects VK1WM:$mask,
Ayman Musa46af8f92016-11-13 14:29:32 +00003861 (_.EltVT _.FRC:$src1),
3862 (_.EltVT ZeroFP))))))),
Craig Topper00230602017-10-01 23:53:50 +00003863 (!cast<Instruction>(InstrStr#rrkz)
Craig Topper7bcac492018-02-24 00:15:05 +00003864 VK1WM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003865 (_.VT _.RC:$src0),
3866 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003867}
3868
3869multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3870 dag Mask, RegisterClass MaskRC> {
3871
3872def : Pat<(masked_store addr:$dst, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003873 (_.info512.VT (insert_subvector undef,
Craig Toppercc060e92018-03-13 22:05:25 +00003874 (_.info128.VT _.info128.RC:$src),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003875 (iPTR 0)))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003876 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003877 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003878 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003879
3880}
3881
Craig Topper058f2f62017-03-28 16:35:29 +00003882multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3883 AVX512VLVectorVTInfo _,
3884 dag Mask, RegisterClass MaskRC,
3885 SubRegIndex subreg> {
3886
3887def : Pat<(masked_store addr:$dst, Mask,
3888 (_.info512.VT (insert_subvector undef,
Craig Toppercc060e92018-03-13 22:05:25 +00003889 (_.info128.VT _.info128.RC:$src),
Craig Topper058f2f62017-03-28 16:35:29 +00003890 (iPTR 0)))),
3891 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003892 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003893 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3894
3895}
3896
Ayman Musa46af8f92016-11-13 14:29:32 +00003897multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3898 dag Mask, RegisterClass MaskRC> {
3899
3900def : Pat<(_.info128.VT (extract_subvector
3901 (_.info512.VT (masked_load addr:$srcAddr, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003902 (_.info512.VT (bitconvert
Ayman Musa46af8f92016-11-13 14:29:32 +00003903 (v16i32 immAllZerosV))))),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003904 (iPTR 0))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003905 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003906 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003907 addr:$srcAddr)>;
3908
3909def : Pat<(_.info128.VT (extract_subvector
3910 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3911 (_.info512.VT (insert_subvector undef,
Craig Toppercc060e92018-03-13 22:05:25 +00003912 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003913 (iPTR 0))))),
3914 (iPTR 0))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003915 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003916 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003917 addr:$srcAddr)>;
3918
3919}
3920
Craig Topper058f2f62017-03-28 16:35:29 +00003921multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3922 AVX512VLVectorVTInfo _,
3923 dag Mask, RegisterClass MaskRC,
3924 SubRegIndex subreg> {
3925
3926def : Pat<(_.info128.VT (extract_subvector
3927 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3928 (_.info512.VT (bitconvert
3929 (v16i32 immAllZerosV))))),
3930 (iPTR 0))),
3931 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003932 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003933 addr:$srcAddr)>;
3934
3935def : Pat<(_.info128.VT (extract_subvector
3936 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3937 (_.info512.VT (insert_subvector undef,
Craig Toppercc060e92018-03-13 22:05:25 +00003938 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
Craig Topper058f2f62017-03-28 16:35:29 +00003939 (iPTR 0))))),
3940 (iPTR 0))),
3941 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003942 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003943 addr:$srcAddr)>;
3944
3945}
3946
Ayman Musa46af8f92016-11-13 14:29:32 +00003947defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3948defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3949
3950defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3951 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003952defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3953 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3954defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3955 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003956
3957defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3958 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003959defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3960 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3961defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3962 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003963
Craig Topper61d6ddb2018-02-23 20:13:42 +00003964def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
Guy Blankb169d56d2017-07-31 08:26:14 +00003965 (f32 FR32X:$src1), (f32 FR32X:$src2))),
3966 (COPY_TO_REGCLASS
3967 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3968 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3969 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003970 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3971 FR32X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003972
Craig Topper74ed0872016-05-18 06:55:59 +00003973def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003974 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003975 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3976 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003977
Craig Topper61d6ddb2018-02-23 20:13:42 +00003978def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
Guy Blankb169d56d2017-07-31 08:26:14 +00003979 (f64 FR64X:$src1), (f64 FR64X:$src2))),
3980 (COPY_TO_REGCLASS
3981 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3982 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3983 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003984 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3985 FR64X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003986
Craig Topper74ed0872016-05-18 06:55:59 +00003987def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003988 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003989 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3990 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003991
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00003992def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
Guy Blank548e22a2017-05-19 12:35:15 +00003993 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00003994 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3995
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003996let hasSideEffects = 0 in {
Simon Pilgrim64fff142017-07-16 18:37:23 +00003997 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003998 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003999 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004000 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
4001 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
Igor Breger4424aaa2015-11-19 07:58:33 +00004002
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004003let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00004004 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4005 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00004006 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004007 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4008 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004009 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4010 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
Simon Pilgrim64fff142017-07-16 18:37:23 +00004011
4012 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004013 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004014 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4015 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004016 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4017 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004018
Simon Pilgrim64fff142017-07-16 18:37:23 +00004019 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004020 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004021 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004022 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
4023 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004024
4025let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00004026 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4027 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00004028 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004029 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4030 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004031 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4032 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004033
Simon Pilgrim64fff142017-07-16 18:37:23 +00004034 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4035 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
Craig Topper6fb55712017-10-04 17:20:12 +00004036 VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004037 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4038 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004039 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4040 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004041}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004042
4043let Predicates = [HasAVX512] in {
4044 let AddedComplexity = 15 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004045 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00004046 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004047 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00004048 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004049 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
Craig Topper6fb55712017-10-04 17:20:12 +00004050 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4051 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
Craig Topper3f8126e2016-08-13 05:43:20 +00004052 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004053
4054 // Move low f32 and clear high bits.
4055 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4056 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004057 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004058 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
4059 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4060 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004061 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004062 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004063 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4064 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004065 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004066 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4067 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4068 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004069 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004070 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004071
4072 let AddedComplexity = 20 in {
4073 // MOVSSrm zeros the high parts of the register; represent this
4074 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4075 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4076 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4077 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4078 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4079 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4080 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004081 def : Pat<(v4f32 (X86vzload addr:$src)),
4082 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004083
4084 // MOVSDrm zeros the high parts of the register; represent this
4085 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4086 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4087 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4088 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4089 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4090 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4091 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4092 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4093 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4094 def : Pat<(v2f64 (X86vzload addr:$src)),
4095 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4096
4097 // Represent the same patterns above but in the form they appear for
4098 // 256-bit types
4099 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4100 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004101 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004102 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4103 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4104 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004105 def : Pat<(v8f32 (X86vzload addr:$src)),
4106 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004107 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4108 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4109 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004110 def : Pat<(v4f64 (X86vzload addr:$src)),
4111 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004112
4113 // Represent the same patterns above but in the form they appear for
4114 // 512-bit types
4115 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4116 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4117 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4118 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4119 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4120 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004121 def : Pat<(v16f32 (X86vzload addr:$src)),
4122 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004123 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4124 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4125 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004126 def : Pat<(v8f64 (X86vzload addr:$src)),
4127 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004128 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004129 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4130 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004131 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004132
4133 // Move low f64 and clear high bits.
4134 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4135 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004136 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004137 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004138 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4139 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004140 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004141 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004142
4143 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004144 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004145 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004146 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004147 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004148 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004149
4150 // Extract and store.
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +00004151 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004152 addr:$dst),
4153 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004154
4155 // Shuffle with VMOVSS
4156 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004157 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4158
4159 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4160 (VMOVSSZrr VR128X:$src1,
4161 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004162
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004163 // Shuffle with VMOVSD
4164 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004165 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4166
4167 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4168 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004169
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004170 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004171 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004172 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004173 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004174}
4175
Simon Pilgrim369e59d2018-02-12 16:18:36 +00004176let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004177let AddedComplexity = 15 in
4178def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4179 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00004180 "vmovq\t{$src, $dst|$dst, $src}",
Michael Liao5bf95782014-12-04 05:20:33 +00004181 [(set VR128X:$dst, (v2i64 (X86vzmovl
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004182 (v2i64 VR128X:$src))))],
4183 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
Simon Pilgrim369e59d2018-02-12 16:18:36 +00004184}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004185
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004186let Predicates = [HasAVX512] in {
Craig Topperde549852016-05-22 06:09:34 +00004187 let AddedComplexity = 15 in {
4188 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4189 (VMOVDI2PDIZrr GR32:$src)>;
4190
4191 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4192 (VMOV64toPQIZrr GR64:$src)>;
4193
4194 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4195 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4196 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004197
4198 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4199 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4200 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperde549852016-05-22 06:09:34 +00004201 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004202 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4203 let AddedComplexity = 20 in {
Simon Pilgrima4c350f2017-02-17 20:43:32 +00004204 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4205 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004206 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4207 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004208 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4209 (VMOVDI2PDIZrm addr:$src)>;
4210 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4211 (VMOVDI2PDIZrm addr:$src)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004212 def : Pat<(v4i32 (X86vzload addr:$src)),
4213 (VMOVDI2PDIZrm addr:$src)>;
4214 def : Pat<(v8i32 (X86vzload addr:$src)),
4215 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004216 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004217 (VMOVQI2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004218 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004219 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00004220 def : Pat<(v2i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004221 (VMOVQI2PQIZrm addr:$src)>;
Craig Topperde549852016-05-22 06:09:34 +00004222 def : Pat<(v4i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004223 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004224 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00004225
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004226 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4227 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4228 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4229 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004230 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4231 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4232 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4233
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004234 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004235 def : Pat<(v16i32 (X86vzload addr:$src)),
4236 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004237 def : Pat<(v8i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004238 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004239}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004240//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00004241// AVX-512 - Non-temporals
4242//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00004243let SchedRW = [WriteLoad] in {
4244 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4245 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004246 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
Robert Khasanoved882972014-08-13 10:46:00 +00004247 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004248
Craig Topper2f90c1f2016-06-07 07:27:57 +00004249 let Predicates = [HasVLX] in {
Robert Khasanoved882972014-08-13 10:46:00 +00004250 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004251 (ins i256mem:$src),
4252 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004253 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004254 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004255
Robert Khasanoved882972014-08-13 10:46:00 +00004256 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004257 (ins i128mem:$src),
4258 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004259 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004260 EVEX_CD8<64, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004261 }
Adam Nemetefd07852014-06-18 16:51:10 +00004262}
4263
Igor Bregerd3341f52016-01-20 13:11:47 +00004264multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4265 PatFrag st_frag = alignednontemporalstore,
4266 InstrItinClass itin = IIC_SSE_MOVNT> {
Craig Toppere1cac152016-06-07 07:27:54 +00004267 let SchedRW = [WriteStore], AddedComplexity = 400 in
Igor Bregerd3341f52016-01-20 13:11:47 +00004268 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanoved882972014-08-13 10:46:00 +00004269 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Igor Bregerd3341f52016-01-20 13:11:47 +00004270 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4271 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004272}
4273
Igor Bregerd3341f52016-01-20 13:11:47 +00004274multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4275 AVX512VLVectorVTInfo VTInfo> {
4276 let Predicates = [HasAVX512] in
4277 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
Robert Khasanoved882972014-08-13 10:46:00 +00004278
Igor Bregerd3341f52016-01-20 13:11:47 +00004279 let Predicates = [HasAVX512, HasVLX] in {
4280 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4281 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
Robert Khasanoved882972014-08-13 10:46:00 +00004282 }
4283}
4284
Igor Bregerd3341f52016-01-20 13:11:47 +00004285defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4286defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4287defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
Robert Khasanoved882972014-08-13 10:46:00 +00004288
Craig Topper707c89c2016-05-08 23:43:17 +00004289let Predicates = [HasAVX512], AddedComplexity = 400 in {
4290 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4291 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4292 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4293 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4294 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4295 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004296
4297 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4298 (VMOVNTDQAZrm addr:$src)>;
4299 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4300 (VMOVNTDQAZrm addr:$src)>;
4301 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4302 (VMOVNTDQAZrm addr:$src)>;
Craig Topper707c89c2016-05-08 23:43:17 +00004303}
4304
Craig Topperc41320d2016-05-08 23:08:45 +00004305let Predicates = [HasVLX], AddedComplexity = 400 in {
4306 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4307 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4308 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4309 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4310 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4311 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4312
Simon Pilgrim9a896232016-06-07 13:34:24 +00004313 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4314 (VMOVNTDQAZ256rm addr:$src)>;
4315 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4316 (VMOVNTDQAZ256rm addr:$src)>;
4317 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4318 (VMOVNTDQAZ256rm addr:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004319
Craig Topperc41320d2016-05-08 23:08:45 +00004320 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4321 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4322 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4323 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4324 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4325 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004326
4327 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4328 (VMOVNTDQAZ128rm addr:$src)>;
4329 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4330 (VMOVNTDQAZ128rm addr:$src)>;
4331 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4332 (VMOVNTDQAZ128rm addr:$src)>;
Craig Topperc41320d2016-05-08 23:08:45 +00004333}
4334
Adam Nemet7f62b232014-06-10 16:39:53 +00004335//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004336// AVX-512 - Integer arithmetic
4337//
4338multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanov44241442014-10-08 14:37:45 +00004339 X86VectorVTInfo _, OpndItins itins,
4340 bit IsCommutable = 0> {
Adam Nemet34801422014-10-08 23:25:39 +00004341 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Bregerf2460112015-07-26 14:41:44 +00004342 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Robert Khasanov44241442014-10-08 14:37:45 +00004343 "$src2, $src1", "$src1, $src2",
4344 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004345 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4346 Sched<[itins.Sched]>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004347
Craig Toppere1cac152016-06-07 07:27:54 +00004348 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4349 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4350 "$src2, $src1", "$src1, $src2",
4351 (_.VT (OpNode _.RC:$src1,
4352 (bitconvert (_.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004353 itins.rm>, AVX512BIBase, EVEX_4V,
4354 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004355}
4356
4357multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4358 X86VectorVTInfo _, OpndItins itins,
4359 bit IsCommutable = 0> :
4360 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
Craig Toppere1cac152016-06-07 07:27:54 +00004361 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4362 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4363 "${src2}"##_.BroadcastStr##", $src1",
4364 "$src1, ${src2}"##_.BroadcastStr,
4365 (_.VT (OpNode _.RC:$src1,
4366 (X86VBroadcast
4367 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004368 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4369 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004370}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004371
Robert Khasanovd5b14f72014-10-09 08:38:48 +00004372multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4373 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4374 Predicate prd, bit IsCommutable = 0> {
4375 let Predicates = [prd] in
4376 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4377 IsCommutable>, EVEX_V512;
4378
4379 let Predicates = [prd, HasVLX] in {
4380 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4381 IsCommutable>, EVEX_V256;
4382 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4383 IsCommutable>, EVEX_V128;
4384 }
4385}
4386
Robert Khasanov545d1b72014-10-14 14:36:19 +00004387multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4388 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4389 Predicate prd, bit IsCommutable = 0> {
4390 let Predicates = [prd] in
4391 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4392 IsCommutable>, EVEX_V512;
4393
4394 let Predicates = [prd, HasVLX] in {
4395 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4396 IsCommutable>, EVEX_V256;
4397 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4398 IsCommutable>, EVEX_V128;
4399 }
4400}
4401
4402multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4403 OpndItins itins, Predicate prd,
4404 bit IsCommutable = 0> {
4405 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4406 itins, prd, IsCommutable>,
4407 VEX_W, EVEX_CD8<64, CD8VF>;
4408}
4409
4410multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4411 OpndItins itins, Predicate prd,
4412 bit IsCommutable = 0> {
4413 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4414 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4415}
4416
4417multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4418 OpndItins itins, Predicate prd,
4419 bit IsCommutable = 0> {
4420 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004421 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4422 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004423}
4424
4425multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4426 OpndItins itins, Predicate prd,
4427 bit IsCommutable = 0> {
4428 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004429 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4430 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004431}
4432
4433multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4434 SDNode OpNode, OpndItins itins, Predicate prd,
4435 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004436 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004437 IsCommutable>;
4438
Igor Bregerf2460112015-07-26 14:41:44 +00004439 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004440 IsCommutable>;
4441}
4442
4443multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4444 SDNode OpNode, OpndItins itins, Predicate prd,
4445 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004446 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004447 IsCommutable>;
4448
Igor Bregerf2460112015-07-26 14:41:44 +00004449 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004450 IsCommutable>;
4451}
4452
4453multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4454 bits<8> opc_d, bits<8> opc_q,
4455 string OpcodeStr, SDNode OpNode,
4456 OpndItins itins, bit IsCommutable = 0> {
4457 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4458 itins, HasAVX512, IsCommutable>,
4459 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4460 itins, HasBWI, IsCommutable>;
4461}
4462
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004463multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
Michael Liao66233b72015-08-06 09:06:20 +00004464 SDNode OpNode,X86VectorVTInfo _Src,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004465 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4466 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004467 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004468 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004469 "$src2, $src1","$src1, $src2",
4470 (_Dst.VT (OpNode
4471 (_Src.VT _Src.RC:$src1),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004472 (_Src.VT _Src.RC:$src2))),
Michael Liao66233b72015-08-06 09:06:20 +00004473 itins.rr, IsCommutable>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004474 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004475 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4476 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4477 "$src2, $src1", "$src1, $src2",
4478 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4479 (bitconvert (_Src.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004480 itins.rm>, AVX512BIBase, EVEX_4V,
4481 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004482
4483 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Coby Tayree99a66392016-11-20 17:19:55 +00004484 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
Craig Toppere1cac152016-06-07 07:27:54 +00004485 OpcodeStr,
4486 "${src2}"##_Brdct.BroadcastStr##", $src1",
Coby Tayree99a66392016-11-20 17:19:55 +00004487 "$src1, ${src2}"##_Brdct.BroadcastStr,
Craig Toppere1cac152016-06-07 07:27:54 +00004488 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4489 (_Brdct.VT (X86VBroadcast
4490 (_Brdct.ScalarLdFrag addr:$src2)))))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004491 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4492 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004493}
4494
Robert Khasanov545d1b72014-10-14 14:36:19 +00004495defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4496 SSE_INTALU_ITINS_P, 1>;
4497defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4498 SSE_INTALU_ITINS_P, 0>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004499defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4500 SSE_INTALU_ITINS_P, HasBWI, 1>;
4501defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4502 SSE_INTALU_ITINS_P, HasBWI, 0>;
4503defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
Michael Liao66233b72015-08-06 09:06:20 +00004504 SSE_INTALU_ITINS_P, HasBWI, 1>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004505defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
Michael Liao66233b72015-08-06 09:06:20 +00004506 SSE_INTALU_ITINS_P, HasBWI, 0>;
Igor Bregerf2460112015-07-26 14:41:44 +00004507defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
Craig Topper13a0f832018-03-31 04:54:32 +00004508 SSE_PMULLD_ITINS, HasAVX512, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004509defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004510 SSE_INTMUL_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004511defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004512 SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4513defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
Asaf Badouh73f26f82015-07-05 12:23:20 +00004514 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004515defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004516 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004517defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004518 HasBWI, 1>, T8PD;
Asaf Badouh81f03c32015-06-18 12:30:53 +00004519defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
Michael Liao66233b72015-08-06 09:06:20 +00004520 SSE_INTALU_ITINS_P, HasBWI, 1>;
Craig Toppera4067962018-03-08 08:02:52 +00004521defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4522 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
4523defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4524 SSE_INTMUL_ITINS_P, HasAVX512, 1>;
Michael Liao66233b72015-08-06 09:06:20 +00004525
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004526multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004527 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4528 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4529 let Predicates = [prd] in
4530 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4531 _SrcVTInfo.info512, _DstVTInfo.info512,
4532 v8i64_info, IsCommutable>,
4533 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4534 let Predicates = [HasVLX, prd] in {
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004535 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004536 _SrcVTInfo.info256, _DstVTInfo.info256,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004537 v4i64x_info, IsCommutable>,
4538 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004539 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004540 _SrcVTInfo.info128, _DstVTInfo.info128,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004541 v2i64x_info, IsCommutable>,
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004542 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4543 }
Michael Liao66233b72015-08-06 09:06:20 +00004544}
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004545
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004546defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4547 avx512vl_i8_info, avx512vl_i8_info,
4548 X86multishift, HasVBMI, 0>, T8PD;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004549
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004550multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004551 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4552 OpndItins itins> {
Craig Toppere1cac152016-06-07 07:27:54 +00004553 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4554 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4555 OpcodeStr,
4556 "${src2}"##_Src.BroadcastStr##", $src1",
4557 "$src1, ${src2}"##_Src.BroadcastStr,
4558 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4559 (_Src.VT (X86VBroadcast
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004560 (_Src.ScalarLdFrag addr:$src2)))))),
4561 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4562 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004563}
4564
Michael Liao66233b72015-08-06 09:06:20 +00004565multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4566 SDNode OpNode,X86VectorVTInfo _Src,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004567 X86VectorVTInfo _Dst, OpndItins itins,
4568 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004569 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004570 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004571 "$src2, $src1","$src1, $src2",
4572 (_Dst.VT (OpNode
4573 (_Src.VT _Src.RC:$src1),
Craig Topper37e8c542016-08-14 17:57:22 +00004574 (_Src.VT _Src.RC:$src2))),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004575 itins.rr, IsCommutable>,
4576 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004577 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4578 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4579 "$src2, $src1", "$src1, $src2",
4580 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004581 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4582 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4583 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004584}
4585
4586multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4587 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004588 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004589 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004590 v32i16_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004591 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004592 v32i16_info, SSE_PACK>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004593 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004594 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004595 v16i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004596 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004597 v16i16x_info, SSE_PACK>, EVEX_V256;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004598 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004599 v8i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004600 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004601 v8i16x_info, SSE_PACK>, EVEX_V128;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004602 }
4603}
4604multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4605 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004606 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004607 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004608 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004609 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004610 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004611 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004612 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004613 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004614 }
4615}
Igor Bregerf7fd5472015-07-21 07:11:28 +00004616
4617multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4618 SDNode OpNode, AVX512VLVectorVTInfo _Src,
Craig Topper37e8c542016-08-14 17:57:22 +00004619 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004620 let Predicates = [HasBWI] in
Igor Bregerf7fd5472015-07-21 07:11:28 +00004621 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004622 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004623 let Predicates = [HasBWI, HasVLX] in {
Igor Bregerf7fd5472015-07-21 07:11:28 +00004624 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004625 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004626 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004627 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004628 }
4629}
4630
Craig Topperb6da6542016-05-01 17:38:32 +00004631defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4632defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4633defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4634defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004635
Craig Topper5acb5a12016-05-01 06:24:57 +00004636defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
Craig Toppera33846a2017-10-22 06:18:23 +00004637 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004638defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
Craig Toppera33846a2017-10-22 06:18:23 +00004639 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004640
Igor Bregerf2460112015-07-26 14:41:44 +00004641defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004642 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004643defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004644 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004645defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004646 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004647
Igor Bregerf2460112015-07-26 14:41:44 +00004648defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004649 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004650defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004651 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004652defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004653 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004654
Igor Bregerf2460112015-07-26 14:41:44 +00004655defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004656 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004657defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004658 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004659defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004660 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004661
Igor Bregerf2460112015-07-26 14:41:44 +00004662defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004663 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004664defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004665 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004666defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004667 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Craig Topperabe80cc2016-08-28 06:06:28 +00004668
Simon Pilgrim47c1ff72016-10-27 17:07:40 +00004669// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4670let Predicates = [HasDQI, NoVLX] in {
4671 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4672 (EXTRACT_SUBREG
4673 (VPMULLQZrr
4674 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4675 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4676 sub_ymm)>;
4677
4678 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4679 (EXTRACT_SUBREG
4680 (VPMULLQZrr
4681 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4682 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4683 sub_xmm)>;
4684}
4685
Craig Topper4520d4f2017-12-04 07:21:01 +00004686// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4687let Predicates = [HasDQI, NoVLX] in {
4688 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4689 (EXTRACT_SUBREG
4690 (VPMULLQZrr
4691 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4692 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4693 sub_ymm)>;
4694
4695 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4696 (EXTRACT_SUBREG
4697 (VPMULLQZrr
4698 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4699 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4700 sub_xmm)>;
4701}
4702
4703multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4704 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4705 (EXTRACT_SUBREG
4706 (Instr
4707 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4708 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4709 sub_ymm)>;
4710
4711 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4712 (EXTRACT_SUBREG
4713 (Instr
4714 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4715 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4716 sub_xmm)>;
4717}
4718
Craig Topper694c73a2018-01-01 01:11:32 +00004719let Predicates = [HasAVX512, NoVLX] in {
Craig Topper4520d4f2017-12-04 07:21:01 +00004720 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4721 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4722 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4723 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4724}
4725
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004726//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004727// AVX-512 Logical Instructions
4728//===----------------------------------------------------------------------===//
4729
Craig Topperafce0ba2017-08-30 16:38:33 +00004730// OpNodeMsk is the OpNode to use when element size is important. OpNode will
4731// be set to null_frag for 32-bit elements.
4732multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4733 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004734 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004735 bit IsCommutable = 0> {
4736 let hasSideEffects = 0 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004737 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4738 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4739 "$src2, $src1", "$src1, $src2",
4740 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4741 (bitconvert (_.VT _.RC:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004742 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4743 _.RC:$src2)))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004744 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4745 Sched<[itins.Sched]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004746
Craig Topperafce0ba2017-08-30 16:38:33 +00004747 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004748 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4749 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4750 "$src2, $src1", "$src1, $src2",
4751 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4752 (bitconvert (_.LdFrag addr:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004753 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004754 (bitconvert (_.LdFrag addr:$src2)))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004755 itins.rm>, AVX512BIBase, EVEX_4V,
4756 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004757}
4758
Craig Topperafce0ba2017-08-30 16:38:33 +00004759// OpNodeMsk is the OpNode to use where element size is important. So use
4760// for all of the broadcast patterns.
4761multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4762 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004763 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004764 bit IsCommutable = 0> :
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004765 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4766 IsCommutable> {
Craig Topperabe80cc2016-08-28 06:06:28 +00004767 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4768 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4769 "${src2}"##_.BroadcastStr##", $src1",
4770 "$src1, ${src2}"##_.BroadcastStr,
Craig Topperafce0ba2017-08-30 16:38:33 +00004771 (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004772 (bitconvert
4773 (_.VT (X86VBroadcast
4774 (_.ScalarLdFrag addr:$src2)))))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004775 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004776 (bitconvert
4777 (_.VT (X86VBroadcast
4778 (_.ScalarLdFrag addr:$src2)))))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004779 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4780 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004781}
4782
Craig Topperafce0ba2017-08-30 16:38:33 +00004783multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4784 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004785 SDNode OpNodeMsk, OpndItins itins,
4786 AVX512VLVectorVTInfo VTInfo,
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004787 bit IsCommutable = 0> {
4788 let Predicates = [HasAVX512] in
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004789 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4790 VTInfo.info512, IsCommutable>, EVEX_V512;
Craig Topperabe80cc2016-08-28 06:06:28 +00004791
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004792 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004793 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004794 VTInfo.info256, IsCommutable>, EVEX_V256;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004795 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004796 VTInfo.info128, IsCommutable>, EVEX_V128;
Craig Topperabe80cc2016-08-28 06:06:28 +00004797 }
4798}
4799
Craig Topperabe80cc2016-08-28 06:06:28 +00004800multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004801 SDNode OpNode, OpndItins itins,
4802 bit IsCommutable = 0> {
4803 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004804 avx512vl_i64_info, IsCommutable>,
4805 VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004806 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004807 avx512vl_i32_info, IsCommutable>,
4808 EVEX_CD8<32, CD8VF>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004809}
4810
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004811defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4812defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4813defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4814defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004815
4816//===----------------------------------------------------------------------===//
4817// AVX-512 FP arithmetic
4818//===----------------------------------------------------------------------===//
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004819multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4820 SDNode OpNode, SDNode VecNode, OpndItins itins,
4821 bit IsCommutable> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004822 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004823 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4824 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4825 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004826 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4827 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004828 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004829
4830 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00004831 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004832 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004833 (_.VT (VecNode _.RC:$src1,
4834 _.ScalarIntMemCPat:$src2,
4835 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004836 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper79011a62016-07-26 08:06:18 +00004837 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004838 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004839 (ins _.FRC:$src1, _.FRC:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004840 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4841 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004842 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004843 let isCommutable = IsCommutable;
4844 }
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004845 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004846 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004847 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4848 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004849 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4850 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004851 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004852 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004853}
4854
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004855multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00004856 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004857 let ExeDomain = _.ExeDomain in
Craig Topperda7e78e2017-12-10 04:07:28 +00004858 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004859 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4860 "$rc, $src2, $src1", "$src1, $src2, $rc",
4861 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004862 (i32 imm:$rc)), itins.rr, IsCommutable>,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004863 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004864}
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004865multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Craig Topper56d40222017-02-22 06:54:18 +00004866 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4867 OpndItins itins, bit IsCommutable> {
4868 let ExeDomain = _.ExeDomain in {
4869 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4870 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4871 "$src2, $src1", "$src1, $src2",
4872 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004873 itins.rr>, Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004874
4875 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4876 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4877 "$src2, $src1", "$src1, $src2",
4878 (_.VT (VecNode _.RC:$src1,
4879 _.ScalarIntMemCPat:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004880 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004881
4882 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4883 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4884 (ins _.FRC:$src1, _.FRC:$src2),
4885 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4886 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004887 itins.rr>, Sched<[itins.Sched]> {
Craig Topper56d40222017-02-22 06:54:18 +00004888 let isCommutable = IsCommutable;
4889 }
4890 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4891 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4892 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4893 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004894 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4895 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004896 }
4897
Craig Topperda7e78e2017-12-10 04:07:28 +00004898 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004899 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004900 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Topper56d40222017-02-22 06:54:18 +00004901 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +00004902 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4903 Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004904 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004905}
4906
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004907multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4908 SDNode VecNode,
4909 SizeItins itins, bit IsCommutable> {
4910 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4911 itins.s, IsCommutable>,
4912 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4913 itins.s, IsCommutable>,
4914 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4915 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4916 itins.d, IsCommutable>,
4917 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4918 itins.d, IsCommutable>,
4919 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4920}
4921
4922multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper56d40222017-02-22 06:54:18 +00004923 SDNode VecNode, SDNode SaeNode,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004924 SizeItins itins, bit IsCommutable> {
Craig Topper56d40222017-02-22 06:54:18 +00004925 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4926 VecNode, SaeNode, itins.s, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004927 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper56d40222017-02-22 06:54:18 +00004928 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4929 VecNode, SaeNode, itins.d, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004930 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4931}
Craig Topper8783bbb2017-02-24 07:21:10 +00004932defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4933defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4934defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4935defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4936defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004937 SSE_ALU_ITINS_S, 0>;
Craig Topper8783bbb2017-02-24 07:21:10 +00004938defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004939 SSE_ALU_ITINS_S, 0>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004940
4941// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4942// X86fminc and X86fmaxc instead of X86fmin and X86fmax
4943multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4944 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
Craig Topper03669332017-02-26 06:45:56 +00004945 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004946 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4947 (ins _.FRC:$src1, _.FRC:$src2),
4948 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4949 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004950 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004951 let isCommutable = 1;
4952 }
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004953 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4954 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4955 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4956 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004957 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4958 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004959 }
4960}
4961defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4962 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4963 EVEX_CD8<32, CD8VT1>;
4964
4965defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4966 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4967 EVEX_CD8<64, CD8VT1>;
4968
4969defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4970 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4971 EVEX_CD8<32, CD8VT1>;
4972
4973defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4974 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4975 EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004976
Craig Topper375aa902016-12-19 00:42:28 +00004977multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00004978 X86VectorVTInfo _, OpndItins itins,
4979 bit IsCommutable> {
Craig Topper375aa902016-12-19 00:42:28 +00004980 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004981 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4982 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4983 "$src2, $src1", "$src1, $src2",
Craig Topper9433f972016-08-02 06:16:53 +00004984 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004985 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper375aa902016-12-19 00:42:28 +00004986 let mayLoad = 1 in {
4987 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4988 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4989 "$src2, $src1", "$src1, $src2",
4990 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004991 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00004992 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4993 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4994 "${src2}"##_.BroadcastStr##", $src1",
4995 "$src1, ${src2}"##_.BroadcastStr,
4996 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4997 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004998 itins.rm>, EVEX_4V, EVEX_B,
4999 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00005000 }
Craig Topper5ec33a92016-07-22 05:00:42 +00005001 }
Robert Khasanov595e5982014-10-29 15:43:02 +00005002}
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005003
Craig Topper375aa902016-12-19 00:42:28 +00005004multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005005 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00005006 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00005007 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005008 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5009 "$rc, $src2, $src1", "$src1, $src2, $rc",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005010 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
5011 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005012}
5013
Craig Topper375aa902016-12-19 00:42:28 +00005014multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005015 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00005016 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00005017 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005018 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5019 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005020 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
5021 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005022}
5023
Craig Topper375aa902016-12-19 00:42:28 +00005024multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00005025 Predicate prd, SizeItins itins,
5026 bit IsCommutable = 0> {
Craig Topperdb290662016-05-01 05:57:06 +00005027 let Predicates = [prd] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005028 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
Craig Topper9433f972016-08-02 06:16:53 +00005029 itins.s, IsCommutable>, EVEX_V512, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005030 EVEX_CD8<32, CD8VF>;
5031 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
Craig Topper9433f972016-08-02 06:16:53 +00005032 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005033 EVEX_CD8<64, CD8VF>;
Craig Topperdb290662016-05-01 05:57:06 +00005034 }
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005035
Robert Khasanov595e5982014-10-29 15:43:02 +00005036 // Define only if AVX512VL feature is present.
Craig Topperdb290662016-05-01 05:57:06 +00005037 let Predicates = [prd, HasVLX] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005038 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005039 itins.s, IsCommutable>, EVEX_V128, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005040 EVEX_CD8<32, CD8VF>;
5041 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005042 itins.s, IsCommutable>, EVEX_V256, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005043 EVEX_CD8<32, CD8VF>;
5044 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005045 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005046 EVEX_CD8<64, CD8VF>;
5047 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005048 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005049 EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005050 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005051}
5052
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005053multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5054 SizeItins itins> {
5055 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005056 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005057 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005058 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5059}
5060
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005061multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5062 SizeItins itins> {
5063 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005064 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005065 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005066 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5067}
5068
Craig Topper9433f972016-08-02 06:16:53 +00005069defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5070 SSE_ALU_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005071 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005072defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5073 SSE_MUL_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005074 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005075defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005076 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005077defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005078 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005079defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5080 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005081 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005082defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5083 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005084 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
Igor Breger58c07802016-05-03 11:51:45 +00005085let isCodeGenOnly = 1 in {
Craig Topper9433f972016-08-02 06:16:53 +00005086 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5087 SSE_ALU_ITINS_P, 1>;
5088 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5089 SSE_ALU_ITINS_P, 1>;
Igor Breger58c07802016-05-03 11:51:45 +00005090}
Craig Topper375aa902016-12-19 00:42:28 +00005091defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005092 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005093defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005094 SSE_ALU_ITINS_P, 0>;
Craig Topper375aa902016-12-19 00:42:28 +00005095defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005096 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005097defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005098 SSE_ALU_ITINS_P, 1>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00005099
Craig Topper8f6827c2016-08-31 05:37:52 +00005100// Patterns catch floating point selects with bitcasted integer logic ops.
Craig Topper45d65032016-09-02 05:29:13 +00005101multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5102 X86VectorVTInfo _, Predicate prd> {
5103let Predicates = [prd] in {
5104 // Masked register-register logical operations.
5105 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5106 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5107 _.RC:$src0)),
5108 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5109 _.RC:$src1, _.RC:$src2)>;
5110 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5111 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5112 _.ImmAllZerosV)),
5113 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5114 _.RC:$src2)>;
5115 // Masked register-memory logical operations.
5116 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5117 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5118 (load addr:$src2)))),
5119 _.RC:$src0)),
5120 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5121 _.RC:$src1, addr:$src2)>;
5122 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5123 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5124 _.ImmAllZerosV)),
5125 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5126 addr:$src2)>;
5127 // Register-broadcast logical operations.
5128 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5129 (bitconvert (_.VT (X86VBroadcast
5130 (_.ScalarLdFrag addr:$src2)))))),
5131 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5132 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5133 (bitconvert
5134 (_.i64VT (OpNode _.RC:$src1,
5135 (bitconvert (_.VT
5136 (X86VBroadcast
5137 (_.ScalarLdFrag addr:$src2))))))),
5138 _.RC:$src0)),
5139 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5140 _.RC:$src1, addr:$src2)>;
5141 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5142 (bitconvert
5143 (_.i64VT (OpNode _.RC:$src1,
5144 (bitconvert (_.VT
5145 (X86VBroadcast
5146 (_.ScalarLdFrag addr:$src2))))))),
5147 _.ImmAllZerosV)),
5148 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5149 _.RC:$src1, addr:$src2)>;
5150}
Craig Topper8f6827c2016-08-31 05:37:52 +00005151}
5152
Craig Topper45d65032016-09-02 05:29:13 +00005153multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5154 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5155 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5156 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5157 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5158 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5159 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
Craig Topper8f6827c2016-08-31 05:37:52 +00005160}
5161
Craig Topper45d65032016-09-02 05:29:13 +00005162defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5163defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5164defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5165defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5166
Craig Topper2baef8f2016-12-18 04:17:00 +00005167let Predicates = [HasVLX,HasDQI] in {
Craig Topperd3295c62016-12-17 19:26:00 +00005168 // Use packed logical operations for scalar ops.
5169 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5170 (COPY_TO_REGCLASS (VANDPDZ128rr
5171 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5172 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5173 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5174 (COPY_TO_REGCLASS (VORPDZ128rr
5175 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5176 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5177 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5178 (COPY_TO_REGCLASS (VXORPDZ128rr
5179 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5180 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5181 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5182 (COPY_TO_REGCLASS (VANDNPDZ128rr
5183 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5184 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5185
5186 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5187 (COPY_TO_REGCLASS (VANDPSZ128rr
5188 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5189 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5190 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5191 (COPY_TO_REGCLASS (VORPSZ128rr
5192 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5193 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5194 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5195 (COPY_TO_REGCLASS (VXORPSZ128rr
5196 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5197 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5198 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5199 (COPY_TO_REGCLASS (VANDNPSZ128rr
5200 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5201 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5202}
5203
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005204multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005205 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005206 let ExeDomain = _.ExeDomain in {
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005207 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5208 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5209 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005210 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5211 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005212 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5213 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5214 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005215 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5216 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005217 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5218 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5219 "${src2}"##_.BroadcastStr##", $src1",
5220 "$src1, ${src2}"##_.BroadcastStr,
5221 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005222 (_.ScalarLdFrag addr:$src2))),
5223 (i32 FROUND_CURRENT)), itins.rm>,
5224 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005225 }
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005226}
5227
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005228multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005229 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005230 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005231 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5232 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5233 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005234 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5235 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005236 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00005237 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
Craig Toppere1cac152016-06-07 07:27:54 +00005238 "$src2, $src1", "$src1, $src2",
Craig Topper75d71542017-11-13 08:07:33 +00005239 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005240 (i32 FROUND_CURRENT)), itins.rm>,
5241 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005242 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005243}
5244
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005245multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005246 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5247 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005248 EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005249 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5250 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005251 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005252 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005253 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005254 EVEX_4V,EVEX_CD8<32, CD8VT1>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005255 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005256 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005257 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5258
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005259 // Define only if AVX512VL feature is present.
5260 let Predicates = [HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005261 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005262 EVEX_V128, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005263 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005264 EVEX_V256, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005265 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005266 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005267 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005268 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5269 }
5270}
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005271defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005272
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005273//===----------------------------------------------------------------------===//
5274// AVX-512 VPTESTM instructions
5275//===----------------------------------------------------------------------===//
5276
Craig Topper15d69732018-01-28 00:56:30 +00005277multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5278 OpndItins itins, X86VectorVTInfo _, string Suffix> {
Craig Topper1a093932017-11-11 06:19:12 +00005279 let ExeDomain = _.ExeDomain in {
Igor Breger639fde72016-03-03 14:18:38 +00005280 let isCommutable = 1 in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005281 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5282 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5283 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005284 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5285 _.ImmAllZerosV), itins.rr>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005286 EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005287 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5288 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5289 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005290 (OpNode (bitconvert
5291 (_.i64VT (and _.RC:$src1,
5292 (bitconvert (_.LdFrag addr:$src2))))),
5293 _.ImmAllZerosV),
5294 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005295 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper1a093932017-11-11 06:19:12 +00005296 }
Craig Topper15d69732018-01-28 00:56:30 +00005297
5298 // Patterns for compare with 0 that just use the same source twice.
5299 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5300 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
5301 _.RC:$src, _.RC:$src))>;
5302
5303 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5304 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
5305 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005306}
5307
Craig Topper15d69732018-01-28 00:56:30 +00005308multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005309 OpndItins itins, X86VectorVTInfo _> {
Craig Topper1a093932017-11-11 06:19:12 +00005310 let ExeDomain = _.ExeDomain in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005311 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5312 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5313 "${src2}"##_.BroadcastStr##", $src1",
5314 "$src1, ${src2}"##_.BroadcastStr,
Craig Topper15d69732018-01-28 00:56:30 +00005315 (OpNode (and _.RC:$src1,
5316 (X86VBroadcast
5317 (_.ScalarLdFrag addr:$src2))),
5318 _.ImmAllZerosV),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005319 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5320 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005321}
Igor Bregerfca0a342016-01-28 13:19:25 +00005322
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005323// Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topper15d69732018-01-28 00:56:30 +00005324multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00005325 X86VectorVTInfo _, string Suffix> {
Craig Topper15d69732018-01-28 00:56:30 +00005326 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5327 _.ImmAllZerosV)),
Craig Topper5e4b4532018-01-27 23:49:14 +00005328 (_.KVT (COPY_TO_REGCLASS
5329 (!cast<Instruction>(NAME # Suffix # "Zrr")
5330 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5331 _.RC:$src1, _.SubRegIdx),
5332 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5333 _.RC:$src2, _.SubRegIdx)),
5334 _.KRC))>;
5335
5336 def : Pat<(_.KVT (and _.KRC:$mask,
Craig Topper15d69732018-01-28 00:56:30 +00005337 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5338 _.ImmAllZerosV))),
Craig Topper5e4b4532018-01-27 23:49:14 +00005339 (COPY_TO_REGCLASS
5340 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5341 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5342 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5343 _.RC:$src1, _.SubRegIdx),
5344 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5345 _.RC:$src2, _.SubRegIdx)),
5346 _.KRC)>;
Craig Topper15d69732018-01-28 00:56:30 +00005347
5348 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5349 (_.KVT (COPY_TO_REGCLASS
5350 (!cast<Instruction>(NAME # Suffix # "Zrr")
5351 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5352 _.RC:$src, _.SubRegIdx),
5353 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5354 _.RC:$src, _.SubRegIdx)),
5355 _.KRC))>;
5356
5357 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5358 (COPY_TO_REGCLASS
5359 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5360 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5361 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5362 _.RC:$src, _.SubRegIdx),
5363 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5364 _.RC:$src, _.SubRegIdx)),
5365 _.KRC)>;
Igor Bregerfca0a342016-01-28 13:19:25 +00005366}
5367
Craig Topper15d69732018-01-28 00:56:30 +00005368multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005369 OpndItins itins, AVX512VLVectorVTInfo _,
5370 string Suffix> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005371 let Predicates = [HasAVX512] in
Craig Topper15d69732018-01-28 00:56:30 +00005372 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005373 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005374
5375 let Predicates = [HasAVX512, HasVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005376 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005377 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005378 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005379 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005380 }
Igor Bregerfca0a342016-01-28 13:19:25 +00005381 let Predicates = [HasAVX512, NoVLX] in {
5382 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5383 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005384 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005385}
5386
Craig Topper15d69732018-01-28 00:56:30 +00005387multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005388 OpndItins itins> {
5389 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005390 avx512vl_i32_info, "D">;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005391 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005392 avx512vl_i64_info, "Q">, VEX_W;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005393}
5394
5395multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005396 PatFrag OpNode, OpndItins itins> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005397 let Predicates = [HasBWI] in {
Craig Topper15d69732018-01-28 00:56:30 +00005398 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005399 EVEX_V512, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005400 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005401 EVEX_V512;
5402 }
5403 let Predicates = [HasVLX, HasBWI] in {
5404
Craig Topper15d69732018-01-28 00:56:30 +00005405 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005406 EVEX_V256, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005407 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005408 EVEX_V128, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005409 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005410 EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005411 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005412 EVEX_V128;
5413 }
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005414
Igor Bregerfca0a342016-01-28 13:19:25 +00005415 let Predicates = [HasAVX512, NoVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005416 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
5417 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
5418 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
5419 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005420 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005421}
5422
Craig Topper9471a7c2018-02-19 19:23:31 +00005423// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5424// as commutable here because we already canonicalized all zeros vectors to the
5425// RHS during lowering.
5426def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5427 (X86cmpm node:$src1, node:$src2, (i8 0))>;
5428def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5429 (X86cmpm node:$src1, node:$src2, (i8 4))>;
5430
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005431multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005432 PatFrag OpNode, OpndItins itins> :
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005433 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5434 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005435
Craig Topper15d69732018-01-28 00:56:30 +00005436defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005437 SSE_BIT_ITINS_P>, T8PD;
Craig Topper15d69732018-01-28 00:56:30 +00005438defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005439 SSE_BIT_ITINS_P>, T8XS;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005440
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005441
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005442//===----------------------------------------------------------------------===//
5443// AVX-512 Shift instructions
5444//===----------------------------------------------------------------------===//
5445multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005446 string OpcodeStr, SDNode OpNode, OpndItins itins,
5447 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005448 let ExeDomain = _.ExeDomain in {
Cameron McInally04400442014-11-14 15:43:00 +00005449 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005450 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005451 "$src2, $src1", "$src1, $src2",
5452 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005453 itins.rr>, Sched<[itins.Sched]>;
Cameron McInally04400442014-11-14 15:43:00 +00005454 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005455 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005456 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005457 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5458 (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005459 itins.rm>, Sched<[itins.Sched.Folded]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005460 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005461}
5462
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005463multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005464 string OpcodeStr, SDNode OpNode, OpndItins itins,
5465 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005466 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005467 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5468 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5469 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5470 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005471 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005472}
5473
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005474multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005475 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5476 X86VectorVTInfo _> {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005477 // src2 is always 128-bit
Craig Topper05948fb2016-08-02 05:11:15 +00005478 let ExeDomain = _.ExeDomain in {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005479 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5480 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5481 "$src2, $src1", "$src1, $src2",
5482 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005483 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005484 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5485 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5486 "$src2, $src1", "$src1, $src2",
Craig Topper820d4922015-02-09 04:04:50 +00005487 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005488 itins.rm>, AVX512BIBase,
5489 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005490 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005491}
5492
Cameron McInally5fb084e2014-12-11 17:13:05 +00005493multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005494 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5495 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005496 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005497 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005498 VTInfo.info512>, EVEX_V512,
5499 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5500 let Predicates = [prd, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005501 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005502 VTInfo.info256>, EVEX_V256,
5503 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005504 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005505 VTInfo.info128>, EVEX_V128,
5506 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5507 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005508}
5509
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005510multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005511 string OpcodeStr, SDNode OpNode,
5512 OpndItins itins> {
5513 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5514 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5515 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5516 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5517 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5518 bc_v2i64, avx512vl_i16_info, HasBWI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005519}
5520
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005521multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005522 string OpcodeStr, SDNode OpNode,
5523 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005524 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005525 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005526 VTInfo.info512>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005527 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005528 VTInfo.info512>, EVEX_V512;
5529 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005530 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005531 VTInfo.info256>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005532 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005533 VTInfo.info256>, EVEX_V256;
5534 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005535 itins, VTInfo.info128>,
5536 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005537 VTInfo.info128>, EVEX_V128;
5538 }
5539}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005540
Michael Liao66233b72015-08-06 09:06:20 +00005541multiclass avx512_shift_rmi_w<bits<8> opcw,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005542 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005543 string OpcodeStr, SDNode OpNode,
5544 OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005545 let Predicates = [HasBWI] in
5546 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005547 itins, v32i16_info>, EVEX_V512, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005548 let Predicates = [HasVLX, HasBWI] in {
5549 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005550 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005551 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005552 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005553 }
5554}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005555
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005556multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5557 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005558 string OpcodeStr, SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005559 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005560 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005561 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005562 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005563}
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005564
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005565defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5566 SSE_INTSHIFT_P>,
5567 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5568 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005569
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005570defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5571 SSE_INTSHIFT_P>,
5572 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5573 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005574
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005575defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5576 SSE_INTSHIFT_P>,
5577 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5578 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005579
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005580defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5581 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5582defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5583 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005584
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005585defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5586defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5587defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005588
Simon Pilgrim5910ebe2017-02-20 12:16:38 +00005589// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5590let Predicates = [HasAVX512, NoVLX] in {
5591 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5592 (EXTRACT_SUBREG (v8i64
5593 (VPSRAQZrr
5594 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5595 VR128X:$src2)), sub_ymm)>;
5596
5597 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5598 (EXTRACT_SUBREG (v8i64
5599 (VPSRAQZrr
5600 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5601 VR128X:$src2)), sub_xmm)>;
5602
5603 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5604 (EXTRACT_SUBREG (v8i64
5605 (VPSRAQZri
5606 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5607 imm:$src2)), sub_ymm)>;
5608
5609 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5610 (EXTRACT_SUBREG (v8i64
5611 (VPSRAQZri
5612 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5613 imm:$src2)), sub_xmm)>;
5614}
5615
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005616//===-------------------------------------------------------------------===//
5617// Variable Bit Shifts
5618//===-------------------------------------------------------------------===//
5619multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005620 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005621 let ExeDomain = _.ExeDomain in {
Cameron McInally5fb084e2014-12-11 17:13:05 +00005622 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5623 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5624 "$src2, $src1", "$src1, $src2",
5625 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005626 itins.rr>, AVX5128IBase, EVEX_4V,
5627 Sched<[itins.Sched]>;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005628 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5629 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5630 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005631 (_.VT (OpNode _.RC:$src1,
5632 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005633 itins.rm>, AVX5128IBase, EVEX_4V,
5634 EVEX_CD8<_.EltSize, CD8VF>,
5635 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005636 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005637}
5638
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005639multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005640 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005641 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005642 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5643 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5644 "${src2}"##_.BroadcastStr##", $src1",
5645 "$src1, ${src2}"##_.BroadcastStr,
5646 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5647 (_.ScalarLdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005648 itins.rm>, AVX5128IBase, EVEX_B,
5649 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5650 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005651}
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005652
Cameron McInally5fb084e2014-12-11 17:13:05 +00005653multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005654 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005655 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005656 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5657 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005658
5659 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005660 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5661 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5662 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5663 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005664 }
Cameron McInally5fb084e2014-12-11 17:13:05 +00005665}
5666
5667multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005668 SDNode OpNode, OpndItins itins> {
5669 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005670 avx512vl_i32_info>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005671 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005672 avx512vl_i64_info>, VEX_W;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005673}
5674
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005675// Use 512bit version to implement 128/256 bit in case NoVLX.
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005676multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5677 SDNode OpNode, list<Predicate> p> {
5678 let Predicates = p in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005679 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005680 (_.info256.VT _.info256.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005681 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005682 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005683 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5684 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5685 sub_ymm)>;
5686
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005687 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005688 (_.info128.VT _.info128.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005689 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005690 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005691 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5692 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5693 sub_xmm)>;
5694 }
5695}
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005696multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005697 SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005698 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005699 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005700 EVEX_V512, VEX_W;
5701 let Predicates = [HasVLX, HasBWI] in {
5702
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005703 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005704 EVEX_V256, VEX_W;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005705 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005706 EVEX_V128, VEX_W;
5707 }
5708}
5709
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005710defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5711 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005712
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005713defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5714 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005715
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005716defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5717 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005718
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005719defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5720defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005721
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005722defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5723defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5724defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5725defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5726
Craig Topper05629d02016-07-24 07:32:45 +00005727// Special handing for handling VPSRAV intrinsics.
5728multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5729 list<Predicate> p> {
5730 let Predicates = p in {
5731 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5732 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5733 _.RC:$src2)>;
5734 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5735 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5736 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005737 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5738 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5739 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5740 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5741 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5742 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5743 _.RC:$src0)),
5744 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5745 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005746 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5747 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5748 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5749 _.RC:$src1, _.RC:$src2)>;
5750 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5751 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5752 _.ImmAllZerosV)),
5753 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5754 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005755 }
5756}
5757
5758multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5759 list<Predicate> p> :
5760 avx512_var_shift_int_lowering<InstrStr, _, p> {
5761 let Predicates = p in {
5762 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5763 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5764 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5765 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005766 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5767 (X86vsrav _.RC:$src1,
5768 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5769 _.RC:$src0)),
5770 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5771 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005772 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5773 (X86vsrav _.RC:$src1,
5774 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5775 _.ImmAllZerosV)),
5776 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5777 _.RC:$src1, addr:$src2)>;
5778 }
5779}
5780
5781defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5782defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5783defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5784defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5785defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5786defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5787defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5788defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5789defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5790
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005791
5792// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5793let Predicates = [HasAVX512, NoVLX] in {
5794 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5795 (EXTRACT_SUBREG (v8i64
5796 (VPROLVQZrr
5797 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005798 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005799 sub_xmm)>;
5800 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5801 (EXTRACT_SUBREG (v8i64
5802 (VPROLVQZrr
5803 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005804 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005805 sub_ymm)>;
5806
5807 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5808 (EXTRACT_SUBREG (v16i32
5809 (VPROLVDZrr
5810 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005811 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005812 sub_xmm)>;
5813 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5814 (EXTRACT_SUBREG (v16i32
5815 (VPROLVDZrr
5816 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005817 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005818 sub_ymm)>;
5819
5820 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5821 (EXTRACT_SUBREG (v8i64
5822 (VPROLQZri
5823 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5824 imm:$src2)), sub_xmm)>;
5825 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5826 (EXTRACT_SUBREG (v8i64
5827 (VPROLQZri
5828 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5829 imm:$src2)), sub_ymm)>;
5830
5831 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5832 (EXTRACT_SUBREG (v16i32
5833 (VPROLDZri
5834 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5835 imm:$src2)), sub_xmm)>;
5836 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5837 (EXTRACT_SUBREG (v16i32
5838 (VPROLDZri
5839 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5840 imm:$src2)), sub_ymm)>;
5841}
5842
5843// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5844let Predicates = [HasAVX512, NoVLX] in {
5845 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5846 (EXTRACT_SUBREG (v8i64
5847 (VPRORVQZrr
5848 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005849 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005850 sub_xmm)>;
5851 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5852 (EXTRACT_SUBREG (v8i64
5853 (VPRORVQZrr
5854 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005855 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005856 sub_ymm)>;
5857
5858 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5859 (EXTRACT_SUBREG (v16i32
5860 (VPRORVDZrr
5861 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005862 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005863 sub_xmm)>;
5864 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5865 (EXTRACT_SUBREG (v16i32
5866 (VPRORVDZrr
5867 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005868 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005869 sub_ymm)>;
5870
5871 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5872 (EXTRACT_SUBREG (v8i64
5873 (VPRORQZri
5874 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5875 imm:$src2)), sub_xmm)>;
5876 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5877 (EXTRACT_SUBREG (v8i64
5878 (VPRORQZri
5879 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5880 imm:$src2)), sub_ymm)>;
5881
5882 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5883 (EXTRACT_SUBREG (v16i32
5884 (VPRORDZri
5885 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5886 imm:$src2)), sub_xmm)>;
5887 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5888 (EXTRACT_SUBREG (v16i32
5889 (VPRORDZri
5890 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5891 imm:$src2)), sub_ymm)>;
5892}
5893
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005894//===-------------------------------------------------------------------===//
5895// 1-src variable permutation VPERMW/D/Q
5896//===-------------------------------------------------------------------===//
5897multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005898 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005899 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005900 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5901 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005902
5903 let Predicates = [HasAVX512, HasVLX] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005904 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5905 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005906}
5907
5908multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5909 string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005910 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005911 let Predicates = [HasAVX512] in
5912 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005913 itins, VTInfo.info512>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005914 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005915 itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005916 let Predicates = [HasAVX512, HasVLX] in
5917 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005918 itins, VTInfo.info256>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005919 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005920 itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005921}
5922
Michael Zuckermand9cac592016-01-19 17:07:43 +00005923multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5924 Predicate prd, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005925 OpndItins itins, AVX512VLVectorVTInfo _> {
Michael Zuckermand9cac592016-01-19 17:07:43 +00005926 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005927 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005928 EVEX_V512 ;
5929 let Predicates = [HasVLX, prd] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005930 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005931 EVEX_V256 ;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005932 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005933 EVEX_V128 ;
5934 }
5935}
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005936
Michael Zuckermand9cac592016-01-19 17:07:43 +00005937defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005938 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
Michael Zuckermand9cac592016-01-19 17:07:43 +00005939defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005940 AVX2_PERMV_I, avx512vl_i8_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005941
5942defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005943 AVX2_PERMV_I, avx512vl_i32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005944defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005945 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005946defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005947 AVX2_PERMV_F, avx512vl_f32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005948defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005949 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005950
5951defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005952 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005953 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5954defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005955 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005956 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger78741a12015-10-04 07:20:41 +00005957//===----------------------------------------------------------------------===//
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005958// AVX-512 - VPERMIL
Igor Breger78741a12015-10-04 07:20:41 +00005959//===----------------------------------------------------------------------===//
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005960
Simon Pilgrim1401a752017-11-29 14:58:34 +00005961multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5962 OpndItins itins, X86VectorVTInfo _,
5963 X86VectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005964 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5965 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5966 "$src2, $src1", "$src1, $src2",
5967 (_.VT (OpNode _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005968 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5969 T8PD, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005970 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5971 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5972 "$src2, $src1", "$src1, $src2",
5973 (_.VT (OpNode
5974 _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005975 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5976 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5977 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005978 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5979 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5980 "${src2}"##_.BroadcastStr##", $src1",
5981 "$src1, ${src2}"##_.BroadcastStr,
5982 (_.VT (OpNode
5983 _.RC:$src1,
5984 (Ctrl.VT (X86VBroadcast
Simon Pilgrim1401a752017-11-29 14:58:34 +00005985 (Ctrl.ScalarLdFrag addr:$src2))))),
5986 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5987 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger78741a12015-10-04 07:20:41 +00005988}
5989
5990multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005991 OpndItins itins, AVX512VLVectorVTInfo _,
5992 AVX512VLVectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005993 let Predicates = [HasAVX512] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00005994 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5995 _.info512, Ctrl.info512>, EVEX_V512;
Igor Breger78741a12015-10-04 07:20:41 +00005996 }
5997 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00005998 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5999 _.info128, Ctrl.info128>, EVEX_V128;
6000 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6001 _.info256, Ctrl.info256>, EVEX_V256;
Igor Breger78741a12015-10-04 07:20:41 +00006002 }
6003}
6004
6005multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6006 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
Simon Pilgrim1401a752017-11-29 14:58:34 +00006007 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
Igor Breger78741a12015-10-04 07:20:41 +00006008 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006009 X86VPermilpi, AVX_VPERMILV, _>,
Igor Breger78741a12015-10-04 07:20:41 +00006010 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
Igor Breger78741a12015-10-04 07:20:41 +00006011}
6012
Craig Topper05948fb2016-08-02 05:11:15 +00006013let ExeDomain = SSEPackedSingle in
Igor Breger78741a12015-10-04 07:20:41 +00006014defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6015 avx512vl_i32_info>;
Craig Topper05948fb2016-08-02 05:11:15 +00006016let ExeDomain = SSEPackedDouble in
Igor Breger78741a12015-10-04 07:20:41 +00006017defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6018 avx512vl_i64_info>, VEX_W;
Simon Pilgrim1401a752017-11-29 14:58:34 +00006019
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006020//===----------------------------------------------------------------------===//
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006021// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6022//===----------------------------------------------------------------------===//
6023
6024defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006025 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006026 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6027defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006028 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006029defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006030 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
Michael Liao66233b72015-08-06 09:06:20 +00006031
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006032multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6033 OpndItins itins> {
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006034 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006035 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006036
6037 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006038 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
6039 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006040 }
6041}
6042
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006043defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006044
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006045//===----------------------------------------------------------------------===//
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00006046// Move Low to High and High to Low packed FP Instructions
6047//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006048def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6049 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00006050 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006051 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006052 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006053def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6054 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00006055 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006056 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006057 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006058
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006059//===----------------------------------------------------------------------===//
Igor Bregerb6b27af2015-11-10 07:09:07 +00006060// VMOVHPS/PD VMOVLPS Instructions
6061// All patterns was taken from SSS implementation.
6062//===----------------------------------------------------------------------===//
6063multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
6064 X86VectorVTInfo _> {
Craig Toppere70231b2017-02-26 06:45:54 +00006065 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00006066 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6067 (ins _.RC:$src1, f64mem:$src2),
6068 !strconcat(OpcodeStr,
6069 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6070 [(set _.RC:$dst,
6071 (OpNode _.RC:$src1,
6072 (_.VT (bitconvert
6073 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006074 IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd, ReadAfterLd]>, EVEX_4V;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006075}
6076
6077defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
6078 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
Craig Topper3b11fca2017-09-18 00:20:53 +00006079defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006080 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6081defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
6082 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6083defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
6084 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6085
6086let Predicates = [HasAVX512] in {
6087 // VMOVHPS patterns
6088 def : Pat<(X86Movlhps VR128X:$src1,
6089 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
6090 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6091 def : Pat<(X86Movlhps VR128X:$src1,
Craig Topper0a197df2017-09-17 18:59:32 +00006092 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006093 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6094 // VMOVHPD patterns
6095 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006096 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6097 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6098 // VMOVLPS patterns
6099 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
6100 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006101 // VMOVLPD patterns
6102 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6103 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006104 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6105 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6106 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6107}
6108
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006109let SchedRW = [WriteStore] in {
Igor Bregerb6b27af2015-11-10 07:09:07 +00006110def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6111 (ins f64mem:$dst, VR128X:$src),
6112 "vmovhps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006113 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006114 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6115 (bc_v2f64 (v4f32 VR128X:$src))),
6116 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6117 EVEX, EVEX_CD8<32, CD8VT2>;
6118def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6119 (ins f64mem:$dst, VR128X:$src),
6120 "vmovhpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006121 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006122 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6123 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6124 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6125def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6126 (ins f64mem:$dst, VR128X:$src),
6127 "vmovlps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006128 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006129 (iPTR 0))), addr:$dst)],
6130 IIC_SSE_MOV_LH>,
6131 EVEX, EVEX_CD8<32, CD8VT2>;
6132def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6133 (ins f64mem:$dst, VR128X:$src),
6134 "vmovlpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006135 [(store (f64 (extractelt (v2f64 VR128X:$src),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006136 (iPTR 0))), addr:$dst)],
6137 IIC_SSE_MOV_LH>,
6138 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006139} // SchedRW
Craig Toppere1cac152016-06-07 07:27:54 +00006140
Igor Bregerb6b27af2015-11-10 07:09:07 +00006141let Predicates = [HasAVX512] in {
6142 // VMOVHPD patterns
Craig Topperc9b19232016-05-01 04:59:44 +00006143 def : Pat<(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006144 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6145 (iPTR 0))), addr:$dst),
6146 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6147 // VMOVLPS patterns
6148 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6149 addr:$src1),
6150 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006151 // VMOVLPD patterns
6152 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6153 addr:$src1),
6154 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006155}
6156//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006157// FMA - Fused Multiply Operations
6158//
Adam Nemet26371ce2014-10-24 00:02:55 +00006159
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006160multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006161 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006162 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Adam Nemet34801422014-10-08 23:25:39 +00006163 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Adam Nemet6bddb8c2014-09-29 22:54:41 +00006164 (ins _.RC:$src2, _.RC:$src3),
Adam Nemet2e91ee52014-08-14 17:13:19 +00006165 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006166 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006167 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006168
Craig Toppere1cac152016-06-07 07:27:54 +00006169 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6170 (ins _.RC:$src2, _.MemOp:$src3),
6171 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006172 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
Craig Topper468a8132017-12-12 07:06:35 +00006173 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006174
Craig Toppere1cac152016-06-07 07:27:54 +00006175 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6176 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6177 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6178 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper6bcbf532016-07-25 07:20:28 +00006179 (OpNode _.RC:$src2,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006180 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6181 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006182 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006183 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006184}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006185
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006186multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006187 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006188 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006189 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006190 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6191 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006192 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6193 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006194}
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006195
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006196multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006197 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6198 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006199 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006200 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6201 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6202 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006203 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006204 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006205 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006206 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006207 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006208 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006209 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006210}
6211
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006212multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006213 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006214 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006215 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006216 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006217 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006218}
6219
Craig Topperaf0b9922017-09-04 06:59:50 +00006220defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006221defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6222defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6223defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6224defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6225defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6226
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006227
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006228multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006229 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006230 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006231 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6232 (ins _.RC:$src2, _.RC:$src3),
6233 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006234 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6235 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006236
Craig Toppere1cac152016-06-07 07:27:54 +00006237 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6238 (ins _.RC:$src2, _.MemOp:$src3),
6239 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006240 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
Craig Topper468a8132017-12-12 07:06:35 +00006241 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006242
Craig Toppere1cac152016-06-07 07:27:54 +00006243 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6244 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6245 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6246 "$src2, ${src3}"##_.BroadcastStr,
6247 (_.VT (OpNode _.RC:$src2,
6248 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006249 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006250 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006251 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006252}
6253
6254multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006255 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006256 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006257 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6258 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6259 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006260 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6261 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006262 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006263}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006264
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006265multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006266 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6267 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006268 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006269 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6270 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6271 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006272 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006273 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006274 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006275 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006276 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006277 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006278 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006279}
6280
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006281multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006282 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006283 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006284 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006285 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006286 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006287}
6288
Craig Topperaf0b9922017-09-04 06:59:50 +00006289defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006290defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6291defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6292defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6293defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6294defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6295
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006296multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006297 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006298 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006299 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006300 (ins _.RC:$src2, _.RC:$src3),
6301 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006302 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6303 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006304
Craig Topper69e22782017-09-04 07:35:05 +00006305 // Pattern is 312 order so that the load is in a different place from the
6306 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006307 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006308 (ins _.RC:$src2, _.MemOp:$src3),
6309 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006310 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
Craig Topper468a8132017-12-12 07:06:35 +00006311 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006312
Craig Topper69e22782017-09-04 07:35:05 +00006313 // Pattern is 312 order so that the load is in a different place from the
6314 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006315 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006316 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6317 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6318 "$src2, ${src3}"##_.BroadcastStr,
Craig Topper69e22782017-09-04 07:35:05 +00006319 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006320 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
Craig Topper468a8132017-12-12 07:06:35 +00006321 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006322 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006323}
6324
6325multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006326 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006327 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006328 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006329 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6330 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006331 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6332 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006333 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006334}
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006335
6336multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006337 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6338 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006339 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006340 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6341 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6342 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006343 }
6344 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006345 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006346 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006347 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006348 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6349 }
6350}
6351
6352multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006353 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006354 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006355 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006356 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006357 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006358}
6359
Craig Topperaf0b9922017-09-04 06:59:50 +00006360defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006361defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6362defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6363defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6364defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6365defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006366
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006367// Scalar FMA
Igor Breger15820b02015-07-01 13:24:28 +00006368multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6369 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
Craig Topper69e22782017-09-04 07:35:05 +00006370 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
Craig Topperb16598d2017-09-01 07:58:16 +00006371let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
Igor Breger15820b02015-07-01 13:24:28 +00006372 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6373 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006374 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6375 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006376
Craig Toppere1cac152016-06-07 07:27:54 +00006377 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00006378 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006379 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
Craig Topper468a8132017-12-12 07:06:35 +00006380 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006381
6382 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6383 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006384 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6385 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
Craig Toppera2f55282017-12-10 03:16:36 +00006386 Sched<[WriteFMA]>;
Igor Breger15820b02015-07-01 13:24:28 +00006387
Craig Toppereafdbec2016-08-13 06:48:41 +00006388 let isCodeGenOnly = 1, isCommutable = 1 in {
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006389 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
Igor Breger15820b02015-07-01 13:24:28 +00006390 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6391 !strconcat(OpcodeStr,
6392 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Simon Pilgrim97160be2017-11-27 10:41:32 +00006393 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006394 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00006395 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6396 !strconcat(OpcodeStr,
6397 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Craig Topper468a8132017-12-12 07:06:35 +00006398 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006399 }// isCodeGenOnly = 1
Igor Breger15820b02015-07-01 13:24:28 +00006400}// Constraints = "$src1 = $dst"
Craig Topperb16598d2017-09-01 07:58:16 +00006401}
Igor Breger15820b02015-07-01 13:24:28 +00006402
6403multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006404 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6405 SDNode OpNodeRnds1, SDNode OpNodes3,
6406 SDNode OpNodeRnds3, X86VectorVTInfo _,
6407 string SUFF> {
Craig Topper2caa97c2017-02-25 19:36:28 +00006408 let ExeDomain = _.ExeDomain in {
Craig Topperb16598d2017-09-01 07:58:16 +00006409 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
Craig Toppera55b4832016-12-09 06:42:28 +00006410 // Operands for intrinsic are in 123 order to preserve passthu
6411 // semantics.
Craig Topper07dac552017-11-06 05:48:25 +00006412 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6413 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6414 _.ScalarIntMemCPat:$src3)),
Craig Toppera55b4832016-12-09 06:42:28 +00006415 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
Igor Breger15820b02015-07-01 13:24:28 +00006416 (i32 imm:$rc))),
6417 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6418 _.FRC:$src3))),
6419 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
Craig Topper69e22782017-09-04 07:35:05 +00006420 (_.ScalarLdFrag addr:$src3)))), 0>;
Igor Breger15820b02015-07-01 13:24:28 +00006421
Craig Topperb16598d2017-09-01 07:58:16 +00006422 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
Craig Topper07dac552017-11-06 05:48:25 +00006423 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6424 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6425 _.RC:$src1)),
Craig Toppera55b4832016-12-09 06:42:28 +00006426 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
Igor Breger15820b02015-07-01 13:24:28 +00006427 (i32 imm:$rc))),
6428 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6429 _.FRC:$src1))),
6430 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
Craig Topper69e22782017-09-04 07:35:05 +00006431 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
Igor Breger15820b02015-07-01 13:24:28 +00006432
Craig Toppereec768b2017-09-06 03:35:58 +00006433 // One pattern is 312 order so that the load is in a different place from the
6434 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Topperb16598d2017-09-01 07:58:16 +00006435 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
Craig Topper69e22782017-09-04 07:35:05 +00006436 (null_frag),
Craig Topper07dac552017-11-06 05:48:25 +00006437 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6438 _.RC:$src2)),
Craig Topper69e22782017-09-04 07:35:05 +00006439 (null_frag),
Igor Breger15820b02015-07-01 13:24:28 +00006440 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6441 _.FRC:$src2))),
Craig Toppereec768b2017-09-06 03:35:58 +00006442 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6443 _.FRC:$src1, _.FRC:$src2))), 1>;
Craig Topper2caa97c2017-02-25 19:36:28 +00006444 }
Igor Breger15820b02015-07-01 13:24:28 +00006445}
6446
6447multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006448 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6449 SDNode OpNodeRnds1, SDNode OpNodes3,
Craig Toppera55b4832016-12-09 06:42:28 +00006450 SDNode OpNodeRnds3> {
Igor Breger15820b02015-07-01 13:24:28 +00006451 let Predicates = [HasAVX512] in {
6452 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006453 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6454 f32x_info, "SS">,
Craig Toppera55b4832016-12-09 06:42:28 +00006455 EVEX_CD8<32, CD8VT1>, VEX_LIG;
Igor Breger15820b02015-07-01 13:24:28 +00006456 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006457 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6458 f64x_info, "SD">,
Craig Toppera55b4832016-12-09 06:42:28 +00006459 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
Igor Breger15820b02015-07-01 13:24:28 +00006460 }
6461}
6462
Craig Topper07dac552017-11-06 05:48:25 +00006463defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6464 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6465defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6466 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6467defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6468 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6469defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6470 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006471
6472//===----------------------------------------------------------------------===//
Asaf Badouh655822a2016-01-25 11:14:24 +00006473// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6474//===----------------------------------------------------------------------===//
6475let Constraints = "$src1 = $dst" in {
6476multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006477 OpndItins itins, X86VectorVTInfo _> {
Craig Topper47e14ea2017-09-24 19:30:55 +00006478 // NOTE: The SDNode have the multiply operands first with the add last.
6479 // This enables commuted load patterns to be autogenerated by tablegen.
Craig Topper6bf9b802017-02-26 06:45:45 +00006480 let ExeDomain = _.ExeDomain in {
Asaf Badouh655822a2016-01-25 11:14:24 +00006481 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6482 (ins _.RC:$src2, _.RC:$src3),
6483 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006484 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6485 AVX512FMA3Base, Sched<[itins.Sched]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006486
Craig Toppere1cac152016-06-07 07:27:54 +00006487 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6488 (ins _.RC:$src2, _.MemOp:$src3),
6489 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006490 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6491 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006492
Craig Toppere1cac152016-06-07 07:27:54 +00006493 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6494 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6495 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6496 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper47e14ea2017-09-24 19:30:55 +00006497 (OpNode _.RC:$src2,
6498 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006499 _.RC:$src1), itins.rm>,
6500 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper6bf9b802017-02-26 06:45:45 +00006501 }
Asaf Badouh655822a2016-01-25 11:14:24 +00006502}
6503} // Constraints = "$src1 = $dst"
6504
6505multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006506 OpndItins itins, AVX512VLVectorVTInfo _> {
Asaf Badouh655822a2016-01-25 11:14:24 +00006507 let Predicates = [HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006508 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006509 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6510 }
6511 let Predicates = [HasVLX, HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006512 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006513 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006514 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006515 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6516 }
6517}
6518
6519defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006520 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006521defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006522 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006523
6524//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006525// AVX-512 Scalar convert from sign integer to float/double
6526//===----------------------------------------------------------------------===//
6527
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006528multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6529 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6530 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006531 let hasSideEffects = 0 in {
6532 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6533 (ins DstVT.FRC:$src1, SrcRC:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006534 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6535 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006536 let mayLoad = 1 in
6537 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6538 (ins DstVT.FRC:$src1, x86memop:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006539 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6540 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006541 } // hasSideEffects = 0
6542 let isCodeGenOnly = 1 in {
6543 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6544 (ins DstVT.RC:$src1, SrcRC:$src2),
6545 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6546 [(set DstVT.RC:$dst,
6547 (OpNode (DstVT.VT DstVT.RC:$src1),
6548 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006549 (i32 FROUND_CURRENT)))], itins.rr>,
6550 EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006551
6552 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6553 (ins DstVT.RC:$src1, x86memop:$src2),
6554 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6555 [(set DstVT.RC:$dst,
6556 (OpNode (DstVT.VT DstVT.RC:$src1),
6557 (ld_frag addr:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006558 (i32 FROUND_CURRENT)))], itins.rm>,
6559 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006560 }//isCodeGenOnly = 1
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006561}
Elena Demikhovskyd8fda622015-03-30 09:29:28 +00006562
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006563multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6564 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
Igor Bregerabe4a792015-06-14 12:44:55 +00006565 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6566 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006567 !strconcat(asm,
6568 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
Igor Bregerabe4a792015-06-14 12:44:55 +00006569 [(set DstVT.RC:$dst,
6570 (OpNode (DstVT.VT DstVT.RC:$src1),
6571 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006572 (i32 imm:$rc)))], itins.rr>,
6573 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Bregerabe4a792015-06-14 12:44:55 +00006574}
6575
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006576multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6577 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6578 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6579 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6580 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6581 ld_frag, asm>, VEX_LIG;
Igor Bregerabe4a792015-06-14 12:44:55 +00006582}
6583
Andrew Trick15a47742013-10-09 05:11:10 +00006584let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006585defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006586 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6587 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006588defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006589 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6590 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006591defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006592 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6593 XD, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006594defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006595 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6596 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006597
Craig Topper8f85ad12016-11-14 02:46:58 +00006598def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6599 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6600def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6601 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6602
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006603def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6604 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6605def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006606 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006607def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6608 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6609def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006610 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006611
6612def : Pat<(f32 (sint_to_fp GR32:$src)),
6613 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6614def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006615 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006616def : Pat<(f64 (sint_to_fp GR32:$src)),
6617 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6618def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006619 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6620
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006621defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006622 v4f32x_info, i32mem, loadi32,
6623 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006624defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006625 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6626 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006627defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006628 i32mem, loadi32, "cvtusi2sd{l}">,
6629 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006630defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006631 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6632 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006633
Craig Topper8f85ad12016-11-14 02:46:58 +00006634def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6635 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6636def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6637 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6638
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006639def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6640 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6641def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6642 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6643def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6644 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6645def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6646 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6647
6648def : Pat<(f32 (uint_to_fp GR32:$src)),
6649 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6650def : Pat<(f32 (uint_to_fp GR64:$src)),
6651 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6652def : Pat<(f64 (uint_to_fp GR32:$src)),
6653 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6654def : Pat<(f64 (uint_to_fp GR64:$src)),
6655 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00006656}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006657
6658//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006659// AVX-512 Scalar convert from float/double to integer
6660//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006661
6662multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6663 X86VectorVTInfo DstVT, SDNode OpNode,
Craig Toppera49c3542018-01-06 19:20:33 +00006664 OpndItins itins, string asm,
6665 string aliasStr,
6666 bit CodeGenOnly = 1> {
Craig Toppere1cac152016-06-07 07:27:54 +00006667 let Predicates = [HasAVX512] in {
Craig Toppera0be5a02017-12-10 19:47:56 +00006668 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006669 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006670 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6671 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
Craig Toppera0be5a02017-12-10 19:47:56 +00006672 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
Craig Topper1de942b2017-12-10 17:42:44 +00006673 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6674 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6675 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6676 Sched<[itins.Sched]>;
Craig Toppera49c3542018-01-06 19:20:33 +00006677 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Toppera0be5a02017-12-10 19:47:56 +00006678 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006679 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006680 [(set DstVT.RC:$dst, (OpNode
Craig Topper5a63ca22017-03-13 03:59:06 +00006681 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006682 (i32 FROUND_CURRENT)))], itins.rm>,
6683 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere2659d82018-01-05 23:13:54 +00006684
6685 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6686 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>;
6687 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
6688 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0>;
Craig Toppera49c3542018-01-06 19:20:33 +00006689 } // Predicates = [HasAVX512]
6690}
6691
6692multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
6693 X86VectorVTInfo DstVT, SDNode OpNode,
6694 OpndItins itins, string asm,
6695 string aliasStr> :
6696 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, itins, asm, aliasStr, 0> {
6697 let Predicates = [HasAVX512] in {
Craig Toppere2659d82018-01-05 23:13:54 +00006698 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6699 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
6700 SrcVT.IntScalarMemOp:$src), 0>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006701 } // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006702}
Asaf Badouh2744d212015-09-20 14:31:19 +00006703
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006704// Convert float/double to signed/unsigned int 32/64
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006705defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006706 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006707 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006708defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006709 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006710 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006711defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006712 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006713 XS, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006714defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006715 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006716 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006717defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006718 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006719 XD, EVEX_CD8<64, CD8VT1>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006720defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006721 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006722 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006723defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006724 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006725 XD, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006726defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006727 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006728 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006729
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006730// The SSE version of these instructions are disabled for AVX512.
6731// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6732let Predicates = [HasAVX512] in {
6733 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006734 (VCVTSS2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006735 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006736 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006737 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006738 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006739 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006740 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006741 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006742 (VCVTSD2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006743 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006744 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006745 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006746 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006747 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006748 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006749} // HasAVX512
6750
Craig Topperac941b92016-09-25 16:33:53 +00006751let Predicates = [HasAVX512] in {
6752 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6753 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6754 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6755 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6756 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6757 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6758 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6759 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6760 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6761 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6762 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6763 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6764 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6765 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6766 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6767 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6768 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6769 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6770 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6771 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6772} // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006773
Elad Cohen0c260102017-01-11 09:11:48 +00006774// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6775// which produce unnecessary vmovs{s,d} instructions
6776let Predicates = [HasAVX512] in {
6777def : Pat<(v4f32 (X86Movss
6778 (v4f32 VR128X:$dst),
6779 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6780 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6781
6782def : Pat<(v4f32 (X86Movss
6783 (v4f32 VR128X:$dst),
6784 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6785 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6786
6787def : Pat<(v2f64 (X86Movsd
6788 (v2f64 VR128X:$dst),
6789 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6790 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6791
6792def : Pat<(v2f64 (X86Movsd
6793 (v2f64 VR128X:$dst),
6794 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6795 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6796} // Predicates = [HasAVX512]
6797
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006798// Convert float/double to signed/unsigned int 32/64 with truncation
Simon Pilgrim18bcf932016-02-03 09:41:59 +00006799multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6800 X86VectorVTInfo _DstRC, SDNode OpNode,
Craig Topper61d8a602018-01-06 21:27:25 +00006801 SDNode OpNodeRnd, OpndItins itins, string aliasStr,
6802 bit CodeGenOnly = 1>{
Asaf Badouh2744d212015-09-20 14:31:19 +00006803let Predicates = [HasAVX512] in {
Craig Topper90353a92018-01-06 21:02:22 +00006804 let isCodeGenOnly = 1 in {
Igor Bregerc59b3a22016-08-03 10:58:05 +00006805 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006806 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006807 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6808 EVEX, Sched<[itins.Sched]>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006809 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006810 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006811 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6812 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper90353a92018-01-06 21:02:22 +00006813 }
6814
6815 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6816 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6817 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6818 (i32 FROUND_CURRENT)))], itins.rr>,
6819 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6820 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6821 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6822 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6823 (i32 FROUND_NO_EXC)))], itins.rr>,
6824 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
Craig Topper61d8a602018-01-06 21:27:25 +00006825 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Topper0f4ccb72018-01-06 21:02:26 +00006826 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6827 (ins _SrcRC.IntScalarMemOp:$src),
6828 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6829 [(set _DstRC.RC:$dst, (OpNodeRnd
6830 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
6831 (i32 FROUND_CURRENT)))], itins.rm>,
6832 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Simon Pilgrim916485c2016-08-18 11:22:22 +00006833
Igor Bregerc59b3a22016-08-03 10:58:05 +00006834 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
Craig Topper90353a92018-01-06 21:02:22 +00006835 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Craig Toppere2659d82018-01-05 23:13:54 +00006836 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
Craig Topper90353a92018-01-06 21:02:22 +00006837 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006838} //HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006839}
6840
Craig Topper61d8a602018-01-06 21:27:25 +00006841multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
6842 X86VectorVTInfo _SrcRC,
6843 X86VectorVTInfo _DstRC, SDNode OpNode,
6844 SDNode OpNodeRnd, OpndItins itins,
6845 string aliasStr> :
6846 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, itins,
6847 aliasStr, 0> {
6848let Predicates = [HasAVX512] in {
6849 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6850 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
6851 _SrcRC.IntScalarMemOp:$src), 0>;
6852}
6853}
Asaf Badouh2744d212015-09-20 14:31:19 +00006854
Igor Bregerc59b3a22016-08-03 10:58:05 +00006855defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006856 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006857 XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006858defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006859 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006860 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006861defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006862 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006863 XD, EVEX_CD8<64, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006864defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006865 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006866 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6867
Craig Topper61d8a602018-01-06 21:27:25 +00006868defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006869 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006870 XS, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006871defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006872 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006873 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006874defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006875 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006876 XD, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006877defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006878 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006879 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006880
Asaf Badouh2744d212015-09-20 14:31:19 +00006881let Predicates = [HasAVX512] in {
6882 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006883 (VCVTTSS2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006884 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6885 (VCVTTSS2SIZrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006886 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006887 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006888 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6889 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006890 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006891 (VCVTTSD2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006892 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6893 (VCVTTSD2SIZrm_Int sdmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006894 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006895 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006896 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6897 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00006898} // HasAVX512
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006899
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006900//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006901// AVX-512 Convert form float to double and back
6902//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006903
Asaf Badouh2744d212015-09-20 14:31:19 +00006904multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006905 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006906 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006907 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006908 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006909 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Toppera02e3942016-09-23 06:24:43 +00006910 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006911 (i32 FROUND_CURRENT))), itins.rr>,
6912 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006913 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper08b413a2017-03-13 05:14:44 +00006914 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006915 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006916 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Topper08b413a2017-03-13 05:14:44 +00006917 (_Src.VT _Src.ScalarIntMemCPat:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006918 (i32 FROUND_CURRENT))), itins.rm>,
6919 EVEX_4V, VEX_LIG,
6920 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006921
Craig Topperd2011e32017-02-25 18:43:42 +00006922 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6923 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6924 (ins _.FRC:$src1, _Src.FRC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006925 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6926 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006927 let mayLoad = 1 in
6928 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6929 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006930 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6931 itins.rm>, EVEX_4V, VEX_LIG,
6932 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006933 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006934}
6935
Asaf Badouh2744d212015-09-20 14:31:19 +00006936// Scalar Coversion with SAE - suppress all exceptions
6937multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006938 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006939 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006940 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006941 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Toppera58abd12016-05-09 05:34:12 +00006942 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Asaf Badouh2744d212015-09-20 14:31:19 +00006943 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006944 (i32 FROUND_NO_EXC))), itins.rr>,
6945 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006946}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006947
Asaf Badouh2744d212015-09-20 14:31:19 +00006948// Scalar Conversion with rounding control (RC)
6949multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006950 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006951 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006952 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006953 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Toppera58abd12016-05-09 05:34:12 +00006954 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006955 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
Craig Toppera2f55282017-12-10 03:16:36 +00006956 itins.rr>,
6957 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006958 EVEX_B, EVEX_RC;
6959}
Craig Toppera02e3942016-09-23 06:24:43 +00006960multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006961 SDNode OpNodeRnd, OpndItins itins,
6962 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006963 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006964 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006965 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006966 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
Asaf Badouh2744d212015-09-20 14:31:19 +00006967 }
6968}
6969
Craig Toppera02e3942016-09-23 06:24:43 +00006970multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006971 SDNode OpNodeRnd, OpndItins itins,
6972 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006973 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006974 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6975 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Michael Zuckerman4b88a772016-12-18 14:29:00 +00006976 EVEX_CD8<32, CD8VT1>, XS;
Asaf Badouh2744d212015-09-20 14:31:19 +00006977 }
6978}
Craig Toppera02e3942016-09-23 06:24:43 +00006979defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006980 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6981 f32x_info>, NotMemoryFoldable;
Craig Toppera02e3942016-09-23 06:24:43 +00006982defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006983 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6984 f64x_info>, NotMemoryFoldable;
Asaf Badouh2744d212015-09-20 14:31:19 +00006985
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006986def : Pat<(f64 (fpextend FR32X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00006987 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006988 Requires<[HasAVX512]>;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006989def : Pat<(f64 (fpextend (loadf32 addr:$src))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006990 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006991 Requires<[HasAVX512]>;
6992
6993def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006994 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006995 Requires<[HasAVX512, OptForSize]>;
6996
Asaf Badouh2744d212015-09-20 14:31:19 +00006997def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006998 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006999 Requires<[HasAVX512, OptForSpeed]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007000
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007001def : Pat<(f32 (fpround FR64X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00007002 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007003 Requires<[HasAVX512]>;
Elad Cohen0c260102017-01-11 09:11:48 +00007004
7005def : Pat<(v4f32 (X86Movss
7006 (v4f32 VR128X:$dst),
7007 (v4f32 (scalar_to_vector
7008 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007009 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00007010 Requires<[HasAVX512]>;
7011
7012def : Pat<(v2f64 (X86Movsd
7013 (v2f64 VR128X:$dst),
7014 (v2f64 (scalar_to_vector
7015 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007016 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00007017 Requires<[HasAVX512]>;
7018
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007019//===----------------------------------------------------------------------===//
7020// AVX-512 Vector convert from signed/unsigned integer to float/double
7021// and from float/double to signed/unsigned integer
7022//===----------------------------------------------------------------------===//
7023
7024multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007025 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007026 string Broadcast = _.BroadcastStr,
Coby Tayree97e9cf62016-11-20 17:09:56 +00007027 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007028
7029 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7030 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007031 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
7032 EVEX, Sched<[itins.Sched]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007033
7034 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Coby Tayree97e9cf62016-11-20 17:09:56 +00007035 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007036 (_.VT (OpNode (_Src.VT
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007037 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
7038 EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007039
7040 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007041 (ins _Src.ScalarMemOp:$src), OpcodeStr,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007042 "${src}"##Broadcast, "${src}"##Broadcast,
7043 (_.VT (OpNode (_Src.VT
7044 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007045 )), itins.rm>, EVEX, EVEX_B,
7046 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007047}
7048// Coversion with SAE - suppress all exceptions
7049multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007050 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7051 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007052 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7053 (ins _Src.RC:$src), OpcodeStr,
7054 "{sae}, $src", "$src, {sae}",
7055 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007056 (i32 FROUND_NO_EXC))), itins.rr>,
7057 EVEX, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007058}
7059
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007060// Conversion with rounding control (RC)
7061multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007062 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7063 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007064 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7065 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7066 "$rc, $src", "$src, $rc",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007067 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
7068 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007069}
7070
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007071// Extend Float to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007072multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7073 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007074 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007075 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7076 fpextend, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007077 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007078 X86vfpextRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007079 }
7080 let Predicates = [HasVLX] in {
7081 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007082 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
7083 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7084 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007085 }
7086}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007087
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007088// Truncate Double to Float
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007089multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007090 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007091 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007092 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007093 X86vfproundRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007094 }
7095 let Predicates = [HasVLX] in {
7096 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007097 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007098 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007099 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007100
7101 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7102 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7103 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7104 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7105 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7106 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7107 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7108 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007109 }
7110}
7111
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007112defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007113 VEX_W, PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007114defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007115 PS, EVEX_CD8<32, CD8VH>;
7116
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007117def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7118 (VCVTPS2PDZrm addr:$src)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007119
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007120let Predicates = [HasVLX] in {
Craig Topperee277e12017-10-14 05:55:42 +00007121 let AddedComplexity = 15 in {
7122 def : Pat<(X86vzmovl (v2f64 (bitconvert
7123 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7124 (VCVTPD2PSZ128rr VR128X:$src)>;
7125 def : Pat<(X86vzmovl (v2f64 (bitconvert
7126 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7127 (VCVTPD2PSZ128rm addr:$src)>;
7128 }
Craig Topper5471fc22016-11-06 04:12:52 +00007129 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7130 (VCVTPS2PDZ128rm addr:$src)>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007131 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7132 (VCVTPS2PDZ256rm addr:$src)>;
7133}
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00007134
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007135// Convert Signed/Unsigned Doubleword to Double
7136multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007137 SDNode OpNode128, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007138 // No rounding in this op
7139 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007140 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7141 itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007142
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007143 let Predicates = [HasVLX] in {
7144 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007145 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
7146 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7147 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007148 }
7149}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007150
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007151// Convert Signed/Unsigned Doubleword to Float
7152multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007153 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007154 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007155 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7156 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007157 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007158 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007159
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007160 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007161 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7162 itins>, EVEX_V128;
7163 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7164 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007165 }
7166}
7167
7168// Convert Float to Signed/Unsigned Doubleword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007169multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7170 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007171 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007172 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7173 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007174 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007175 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007176 }
7177 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007178 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7179 itins>, EVEX_V128;
7180 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7181 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007182 }
7183}
7184
7185// Convert Float to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007186multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7187 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007188 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007189 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7190 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007191 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007192 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007193 }
7194 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007195 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7196 itins>, EVEX_V128;
7197 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7198 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007199 }
7200}
7201
7202// Convert Double to Signed/Unsigned Doubleword with truncation
Craig Topper731bf9c2016-11-09 07:31:32 +00007203multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007204 SDNode OpNode128, SDNode OpNodeRnd,
7205 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007206 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007207 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7208 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007209 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007210 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007211 }
7212 let Predicates = [HasVLX] in {
7213 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
Craig Topper731bf9c2016-11-09 07:31:32 +00007214 // memory forms of these instructions in Asm Parser. They have the same
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007215 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7216 // due to the same reason.
Craig Topper731bf9c2016-11-09 07:31:32 +00007217 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007218 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007219 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007220 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007221
7222 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7223 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7224 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7225 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7226 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7227 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7228 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7229 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007230 }
7231}
7232
7233// Convert Double to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007234multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7235 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007236 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007237 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7238 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007239 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007240 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007241 }
7242 let Predicates = [HasVLX] in {
7243 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7244 // memory forms of these instructions in Asm Parcer. They have the same
7245 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7246 // due to the same reason.
7247 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007248 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007249 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007250 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007251
7252 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7253 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7254 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7255 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7256 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7257 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7258 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7259 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007260 }
7261}
7262
7263// Convert Double to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007264multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7265 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007266 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007267 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7268 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007269 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007270 OpNodeRnd,itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007271 }
7272 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007273 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7274 itins>, EVEX_V128;
7275 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7276 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007277 }
7278}
7279
7280// Convert Double to Signed/Unsigned Quardword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007281multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7282 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007283 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007284 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7285 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007286 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007287 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007288 }
7289 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007290 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7291 itins>, EVEX_V128;
7292 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7293 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007294 }
7295}
7296
7297// Convert Signed/Unsigned Quardword to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007298multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7299 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007300 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007301 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7302 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007303 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007304 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007305 }
7306 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007307 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7308 itins>, EVEX_V128;
7309 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7310 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007311 }
7312}
7313
7314// Convert Float to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007315multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7316 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007317 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007318 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7319 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007320 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007321 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007322 }
7323 let Predicates = [HasDQI, HasVLX] in {
7324 // Explicitly specified broadcast string, since we take only 2 elements
7325 // from v4f32x_info source
7326 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007327 itins, "{1to2}", "", f64mem>, EVEX_V128;
7328 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7329 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007330 }
7331}
7332
7333// Convert Float to Signed/Unsigned Quardword with truncation
Craig Toppera39b6502016-12-10 06:02:48 +00007334multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007335 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007336 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007337 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7338 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007339 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007340 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007341 }
7342 let Predicates = [HasDQI, HasVLX] in {
7343 // Explicitly specified broadcast string, since we take only 2 elements
7344 // from v4f32x_info source
Craig Toppera39b6502016-12-10 06:02:48 +00007345 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007346 itins, "{1to2}", "", f64mem>, EVEX_V128;
7347 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7348 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007349 }
7350}
7351
7352// Convert Signed/Unsigned Quardword to Float
Simon Pilgrima3af7962016-11-24 12:13:46 +00007353multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007354 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007355 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007356 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7357 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007358 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007359 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007360 }
7361 let Predicates = [HasDQI, HasVLX] in {
7362 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7363 // memory forms of these instructions in Asm Parcer. They have the same
7364 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7365 // due to the same reason.
Simon Pilgrima3af7962016-11-24 12:13:46 +00007366 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007367 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007368 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007369 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007370
7371 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7372 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7373 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7374 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7375 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7376 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7377 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7378 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007379 }
7380}
7381
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007382defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7383 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007384
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007385defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007386 X86VSintToFpRnd, SSE_CVT_I2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007387 PS, EVEX_CD8<32, CD8VF>;
7388
7389defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007390 X86cvttp2siRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007391 XS, EVEX_CD8<32, CD8VF>;
7392
Simon Pilgrima3af7962016-11-24 12:13:46 +00007393defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007394 X86cvttp2siRnd, SSE_CVT_PD2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007395 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7396
7397defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007398 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007399 EVEX_CD8<32, CD8VF>;
7400
Craig Topperf334ac192016-11-09 07:48:51 +00007401defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007402 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7403 PS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007404
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007405defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7406 X86VUintToFP, SSE_CVT_I2PD>, XS,
7407 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007408
7409defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007410 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007411 EVEX_CD8<32, CD8VF>;
7412
Craig Topper19e04b62016-05-19 06:13:58 +00007413defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007414 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7415 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007416
Craig Topper19e04b62016-05-19 06:13:58 +00007417defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007418 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7419 VEX_W, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007420
Craig Topper19e04b62016-05-19 06:13:58 +00007421defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007422 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007423 PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007424
Craig Topper19e04b62016-05-19 06:13:58 +00007425defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007426 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007427 PS, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007428
Craig Topper19e04b62016-05-19 06:13:58 +00007429defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007430 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007431 PD, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007432
Craig Topper19e04b62016-05-19 06:13:58 +00007433defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007434 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7435 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007436
Craig Topper19e04b62016-05-19 06:13:58 +00007437defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007438 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007439 PD, EVEX_CD8<64, CD8VF>;
7440
Craig Topper19e04b62016-05-19 06:13:58 +00007441defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007442 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7443 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007444
7445defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007446 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007447 PD, EVEX_CD8<64, CD8VF>;
7448
Craig Toppera39b6502016-12-10 06:02:48 +00007449defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007450 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7451 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007452
7453defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007454 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007455 PD, EVEX_CD8<64, CD8VF>;
7456
Craig Toppera39b6502016-12-10 06:02:48 +00007457defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007458 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7459 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007460
7461defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007462 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7463 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007464
7465defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007466 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7467 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007468
Simon Pilgrima3af7962016-11-24 12:13:46 +00007469defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007470 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7471 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007472
Simon Pilgrima3af7962016-11-24 12:13:46 +00007473defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007474 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7475 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007476
Craig Toppere38c57a2015-11-27 05:44:02 +00007477let Predicates = [HasAVX512, NoVLX] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007478def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
Michael Liao5bf95782014-12-04 05:20:33 +00007479 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007480 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7481 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007482
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007483def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7484 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007485 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7486 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007487
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007488def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7489 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007490 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7491 VR256X:$src1, sub_ymm)))), sub_xmm)>;
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007492
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007493def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7494 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007495 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7496 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007497
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007498def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7499 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007500 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7501 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007502
Cameron McInallyf10a7c92014-06-18 14:04:37 +00007503def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7504 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
Craig Topper61403202016-09-19 02:53:43 +00007505 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7506 VR128X:$src1, sub_xmm)))), sub_ymm)>;
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007507
Simon Pilgrima3af7962016-11-24 12:13:46 +00007508def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007509 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7510 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7511 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007512}
7513
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007514let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007515 let AddedComplexity = 15 in {
7516 def : Pat<(X86vzmovl (v2i64 (bitconvert
7517 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007518 (VCVTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007519 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007520 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7521 (VCVTPD2DQZ128rm addr:$src)>;
7522 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007523 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007524 (VCVTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007525 def : Pat<(X86vzmovl (v2i64 (bitconvert
Simon Pilgrima3af7962016-11-24 12:13:46 +00007526 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007527 (VCVTTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007528 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007529 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7530 (VCVTTPD2DQZ128rm addr:$src)>;
7531 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007532 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007533 (VCVTTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007534 }
Craig Topperd7467472017-10-14 04:18:09 +00007535
7536 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7537 (VCVTDQ2PDZ128rm addr:$src)>;
7538 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7539 (VCVTDQ2PDZ128rm addr:$src)>;
7540
7541 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7542 (VCVTUDQ2PDZ128rm addr:$src)>;
7543 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7544 (VCVTUDQ2PDZ128rm addr:$src)>;
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007545}
7546
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007547let Predicates = [HasAVX512] in {
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007548 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007549 (VCVTPD2PSZrm addr:$src)>;
7550 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7551 (VCVTPS2PDZrm addr:$src)>;
7552}
7553
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007554let Predicates = [HasDQI, HasVLX] in {
7555 let AddedComplexity = 15 in {
7556 def : Pat<(X86vzmovl (v2f64 (bitconvert
7557 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007558 (VCVTQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007559 def : Pat<(X86vzmovl (v2f64 (bitconvert
7560 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007561 (VCVTUQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007562 }
7563}
7564
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007565let Predicates = [HasDQI, NoVLX] in {
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007566def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7567 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7568 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7569 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7570
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007571def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7572 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7573 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7574 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7575
7576def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7577 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7578 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7579 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7580
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007581def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7582 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7583 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7584 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7585
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007586def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7587 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7588 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7589 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7590
7591def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7592 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7593 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7594 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7595
7596def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7597 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7598 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7599 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7600
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007601def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7602 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7603 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7604 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7605
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007606def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7607 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7608 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7609 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7610
7611def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7612 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7613 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7614 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7615
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007616def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7617 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7618 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7619 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7620
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007621def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7622 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7623 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7624 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7625}
7626
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007627//===----------------------------------------------------------------------===//
7628// Half precision conversion instructions
7629//===----------------------------------------------------------------------===//
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007630
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007631multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007632 X86MemOperand x86memop, PatFrag ld_frag,
7633 OpndItins itins> {
Craig Toppercf8e6d02017-11-07 07:13:03 +00007634 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7635 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007636 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7637 T8PD, Sched<[itins.Sched]>;
Craig Toppercf8e6d02017-11-07 07:13:03 +00007638 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7639 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7640 (X86cvtph2ps (_src.VT
7641 (bitconvert
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007642 (ld_frag addr:$src)))), itins.rm>,
7643 T8PD, Sched<[itins.Sched.Folded]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007644}
7645
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007646multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7647 OpndItins itins> {
Craig Topperc89e2822017-12-10 09:14:38 +00007648 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7649 (ins _src.RC:$src), "vcvtph2ps",
7650 "{sae}, $src", "$src, {sae}",
7651 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7652 (i32 FROUND_NO_EXC)), itins.rr>,
7653 T8PD, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007654}
7655
Craig Toppere7fb3002017-11-07 07:13:07 +00007656let Predicates = [HasAVX512] in
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007657 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7658 SSE_CVT_PH2PS>,
7659 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
Asaf Badouh7c522452015-10-22 14:01:16 +00007660 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007661
7662let Predicates = [HasVLX] in {
7663 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007664 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7665 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007666 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007667 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7668 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007669
7670 // Pattern match vcvtph2ps of a scalar i64 load.
7671 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7672 (VCVTPH2PSZ128rm addr:$src)>;
7673 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7674 (VCVTPH2PSZ128rm addr:$src)>;
7675 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7676 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7677 (VCVTPH2PSZ128rm addr:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007678}
7679
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007680multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007681 X86MemOperand x86memop, OpndItins itins> {
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007682 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007683 (ins _src.RC:$src1, i32u8imm:$src2),
7684 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007685 (X86cvtps2ph (_src.VT _src.RC:$src1),
Craig Topperd8688702016-09-21 03:58:44 +00007686 (i32 imm:$src2)),
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007687 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007688 let hasSideEffects = 0, mayStore = 1 in {
7689 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7690 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7691 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007692 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007693 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7694 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7695 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007696 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007697 }
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007698}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007699
7700multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7701 OpndItins itins> {
Craig Topperd8688702016-09-21 03:58:44 +00007702 let hasSideEffects = 0 in
Craig Topper1de942b2017-12-10 17:42:44 +00007703 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
Craig Topperd8688702016-09-21 03:58:44 +00007704 (outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007705 (ins _src.RC:$src1, i32u8imm:$src2),
7706 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007707 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007708}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007709
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007710let Predicates = [HasAVX512] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007711 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7712 SSE_CVT_PS2PH>,
7713 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7714 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7715 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007716 let Predicates = [HasVLX] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007717 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7718 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7719 EVEX_CD8<32, CD8VH>;
7720 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7721 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7722 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007723 }
Craig Topper65e6d0b2017-11-08 04:00:31 +00007724
7725 def : Pat<(store (f64 (extractelt
7726 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7727 (iPTR 0))), addr:$dst),
7728 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7729 def : Pat<(store (i64 (extractelt
7730 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7731 (iPTR 0))), addr:$dst),
7732 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7733 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7734 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7735 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7736 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007737}
Asaf Badouh2489f352015-12-02 08:17:51 +00007738
Craig Topper9820e342016-09-20 05:44:47 +00007739// Patterns for matching conversions from float to half-float and vice versa.
Craig Topperb3b50332016-09-19 02:53:37 +00007740let Predicates = [HasVLX] in {
7741 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7742 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7743 // configurations we support (the default). However, falling back to MXCSR is
7744 // more consistent with other instructions, which are always controlled by it.
7745 // It's encoded as 0b100.
7746 def : Pat<(fp_to_f16 FR32X:$src),
7747 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7748 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7749
7750 def : Pat<(f16_to_fp GR16:$src),
7751 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7752 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7753
7754 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7755 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7756 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7757}
7758
Asaf Badouh2489f352015-12-02 08:17:51 +00007759// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
Craig Topper7e664da2016-09-24 21:42:43 +00007760multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007761 string OpcodeStr, OpndItins itins> {
Craig Topper07a7d562017-07-23 03:59:39 +00007762 let hasSideEffects = 0 in
Craig Topperc89e2822017-12-10 09:14:38 +00007763 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7764 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7765 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7766 Sched<[itins.Sched]>;
Asaf Badouh2489f352015-12-02 08:17:51 +00007767}
7768
7769let Defs = [EFLAGS], Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007770 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007771 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007772 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007773 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007774 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007775 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007776 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007777 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7778}
7779
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007780let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7781 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007782 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007783 EVEX_CD8<32, CD8VT1>;
7784 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007785 "ucomisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007786 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7787 let Pattern = []<dag> in {
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007788 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007789 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007790 EVEX_CD8<32, CD8VT1>;
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007791 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007792 "comisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007793 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7794 }
Craig Topper9dd48c82014-01-02 17:28:14 +00007795 let isCodeGenOnly = 1 in {
Craig Topper00265772018-01-23 21:37:51 +00007796 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7797 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7798 EVEX_CD8<32, CD8VT1>;
7799 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7800 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7801 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007802
Craig Topper00265772018-01-23 21:37:51 +00007803 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7804 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7805 EVEX_CD8<32, CD8VT1>;
7806 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7807 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7808 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00007809 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007810}
Michael Liao5bf95782014-12-04 05:20:33 +00007811
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007812/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
Asaf Badouheaf2da12015-09-21 10:23:53 +00007813multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007814 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007815 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Asaf Badouheaf2da12015-09-21 10:23:53 +00007816 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7817 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7818 "$src2, $src1", "$src1, $src2",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007819 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7820 EVEX_4V, Sched<[itins.Sched]>;
Asaf Badouheaf2da12015-09-21 10:23:53 +00007821 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00007822 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouheaf2da12015-09-21 10:23:53 +00007823 "$src2, $src1", "$src1, $src2",
7824 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007825 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7826 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007827}
7828}
7829
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007830defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007831 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007832defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007833 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007834defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007835 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007836defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007837 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007838
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007839/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7840multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007841 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007842 let ExeDomain = _.ExeDomain in {
Robert Khasanov3e534c92014-10-28 16:37:13 +00007843 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7844 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007845 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7846 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007847 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7848 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7849 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007850 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7851 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007852 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7853 (ins _.ScalarMemOp:$src), OpcodeStr,
7854 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7855 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007856 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7857 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007858 }
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007859}
Robert Khasanov3e534c92014-10-28 16:37:13 +00007860
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007861multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7862 SizeItins itins> {
7863 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7864 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7865 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7866 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov3e534c92014-10-28 16:37:13 +00007867
7868 // Define only if AVX512VL feature is present.
7869 let Predicates = [HasVLX] in {
7870 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007871 OpNode, itins.s, v4f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007872 EVEX_V128, EVEX_CD8<32, CD8VF>;
7873 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007874 OpNode, itins.s, v8f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007875 EVEX_V256, EVEX_CD8<32, CD8VF>;
7876 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007877 OpNode, itins.d, v2f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007878 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7879 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007880 OpNode, itins.d, v4f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007881 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7882 }
7883}
7884
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007885defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7886defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007887
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007888/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007889multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007890 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007891 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007892 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7893 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7894 "$src2, $src1", "$src1, $src2",
7895 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007896 (i32 FROUND_CURRENT)), itins.rr>,
7897 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007898
7899 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7900 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00007901 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007902 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007903 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
Craig Toppera2f55282017-12-10 03:16:36 +00007904 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007905
7906 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper512e9e72017-11-19 05:42:54 +00007907 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007908 "$src2, $src1", "$src1, $src2",
Craig Topper512e9e72017-11-19 05:42:54 +00007909 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007910 (i32 FROUND_CURRENT)), itins.rm>,
7911 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007912 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007913}
7914
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007915multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7916 SizeItins itins> {
7917 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007918 EVEX_CD8<32, CD8VT1>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007919 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007920 EVEX_CD8<64, CD8VT1>, VEX_W;
7921}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007922
Craig Toppere1cac152016-06-07 07:27:54 +00007923let Predicates = [HasERI] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007924 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7925 T8PD, EVEX_4V;
7926 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7927 T8PD, EVEX_4V;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007928}
Igor Breger8352a0d2015-07-28 06:53:28 +00007929
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007930defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7931 T8PD, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007932/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007933
7934multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007935 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007936 let ExeDomain = _.ExeDomain in {
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007937 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7938 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007939 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7940 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007941
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007942 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7943 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7944 (OpNode (_.FloatVT
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007945 (bitconvert (_.LdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007946 (i32 FROUND_CURRENT)), itins.rm>,
7947 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007948
7949 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007950 (ins _.ScalarMemOp:$src), OpcodeStr,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007951 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007952 (OpNode (_.FloatVT
7953 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007954 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7955 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007956 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007957}
Asaf Badouh402ebb32015-06-03 13:41:48 +00007958multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007959 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007960 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00007961 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7962 (ins _.RC:$src), OpcodeStr,
7963 "{sae}, $src", "$src, {sae}",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007964 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7965 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007966}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007967
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007968multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7969 SizeItins itins> {
7970 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7971 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007972 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007973 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7974 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007975 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007976}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007977
Asaf Badouh402ebb32015-06-03 13:41:48 +00007978multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007979 SDNode OpNode, SizeItins itins> {
Asaf Badouh402ebb32015-06-03 13:41:48 +00007980 // Define only if AVX512VL feature is present.
7981 let Predicates = [HasVLX] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007982 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007983 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007984 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007985 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007986 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007987 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007988 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007989 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7990 }
7991}
Craig Toppere1cac152016-06-07 07:27:54 +00007992let Predicates = [HasERI] in {
Michael Liao5bf95782014-12-04 05:20:33 +00007993
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007994 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7995 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7996 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007997}
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007998defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7999 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8000 SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00008001
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008002multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00008003 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00008004 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00008005 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8006 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008007 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
8008 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00008009}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00008010
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008011multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00008012 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00008013 let ExeDomain = _.ExeDomain in {
Robert Khasanov1cf354c2014-10-28 18:22:41 +00008014 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Robert Khasanoveb126392014-10-28 18:15:20 +00008015 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008016 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
8017 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008018 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8019 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper80405072017-11-11 08:24:12 +00008020 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008021 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
8022 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008023 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8024 (ins _.ScalarMemOp:$src), OpcodeStr,
8025 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Craig Topper80405072017-11-11 08:24:12 +00008026 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008027 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
8028 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00008029 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008030}
8031
Craig Topper80405072017-11-11 08:24:12 +00008032multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008033 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008034 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008035 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008036 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8037 // Define only if AVX512VL feature is present.
8038 let Predicates = [HasVLX] in {
8039 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008040 SSE_SQRTPS, v4f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008041 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8042 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008043 SSE_SQRTPS, v8f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008044 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8045 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008046 SSE_SQRTPD, v2f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008047 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8048 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008049 SSE_SQRTPD, v4f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008050 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8051 }
8052}
8053
Craig Topper80405072017-11-11 08:24:12 +00008054multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008055 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008056 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008057 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008058 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8059}
8060
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008061multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
8062 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
Craig Topper176f3312017-02-25 19:18:11 +00008063 let ExeDomain = _.ExeDomain in {
Clement Courbet41a13742018-01-15 12:05:33 +00008064 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008065 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8066 "$src2, $src1", "$src1, $src2",
Craig Topper80405072017-11-11 08:24:12 +00008067 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008068 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008069 (i32 FROUND_CURRENT)), itins.rr>,
8070 Sched<[itins.Sched]>;
Clement Courbet41a13742018-01-15 12:05:33 +00008071 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8072 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8073 "$src2, $src1", "$src1, $src2",
8074 (X86fsqrtRnds (_.VT _.RC:$src1),
8075 _.ScalarIntMemCPat:$src2,
8076 (i32 FROUND_CURRENT)), itins.rm>,
8077 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8078 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008079 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8080 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Topper80405072017-11-11 08:24:12 +00008081 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008082 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008083 (i32 imm:$rc)), itins.rr>,
Craig Toppera2f55282017-12-10 03:16:36 +00008084 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008085
Clement Courbet41a13742018-01-15 12:05:33 +00008086 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8087 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8088 (ins _.FRC:$src1, _.FRC:$src2),
8089 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8090 itins.rr>, Sched<[itins.Sched]>;
8091 let mayLoad = 1 in
8092 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8093 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8094 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8095 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
8096 }
Craig Topper176f3312017-02-25 19:18:11 +00008097 }
Igor Breger4c4cd782015-09-20 09:13:41 +00008098
Clement Courbet41a13742018-01-15 12:05:33 +00008099 let Predicates = [HasAVX512] in {
8100 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8101 (!cast<Instruction>(NAME#SUFF#Zr)
8102 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008103
Clement Courbet41a13742018-01-15 12:05:33 +00008104 def : Pat<(Intr VR128X:$src),
8105 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
Craig Toppereff606c2017-11-06 04:04:01 +00008106 VR128X:$src)>;
Clement Courbet41a13742018-01-15 12:05:33 +00008107 }
Craig Toppereff606c2017-11-06 04:04:01 +00008108
Clement Courbet41a13742018-01-15 12:05:33 +00008109 let Predicates = [HasAVX512, OptForSize] in {
8110 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8111 (!cast<Instruction>(NAME#SUFF#Zm)
8112 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
Craig Toppereff606c2017-11-06 04:04:01 +00008113
Clement Courbet41a13742018-01-15 12:05:33 +00008114 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
8115 (!cast<Instruction>(NAME#SUFF#Zm_Int)
8116 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
8117 }
Craig Topperd6471cb2017-11-05 21:14:06 +00008118}
Igor Breger4c4cd782015-09-20 09:13:41 +00008119
8120multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008121 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
Craig Topper80405072017-11-11 08:24:12 +00008122 int_x86_sse_sqrt_ss>,
Craig Toppereff606c2017-11-06 04:04:01 +00008123 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008124 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
Craig Topper80405072017-11-11 08:24:12 +00008125 int_x86_sse2_sqrt_sd>,
Craig Toppereff606c2017-11-06 04:04:01 +00008126 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00008127 NotMemoryFoldable;
Igor Breger4c4cd782015-09-20 09:13:41 +00008128}
8129
Craig Topper80405072017-11-11 08:24:12 +00008130defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
8131 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008132
Igor Breger4c4cd782015-09-20 09:13:41 +00008133defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008134
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008135multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8136 OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008137 let ExeDomain = _.ExeDomain in {
Craig Topper0ccec702017-11-11 08:24:15 +00008138 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008139 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8140 "$src3, $src2, $src1", "$src1, $src2, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008141 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008142 (i32 imm:$src3))), itins.rr>,
8143 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008144
Craig Topper0ccec702017-11-11 08:24:15 +00008145 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008146 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008147 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
Craig Topper0af48f12017-11-13 02:02:58 +00008148 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008149 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
8150 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008151
Craig Topper0ccec702017-11-11 08:24:15 +00008152 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperbece74c2017-11-19 06:24:26 +00008153 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008154 OpcodeStr,
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008155 "$src3, $src2, $src1", "$src1, $src2, $src3",
Craig Topperdeee24b2017-11-13 02:03:01 +00008156 (_.VT (X86RndScales _.RC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008157 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
8158 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008159
Clement Courbetda1fad32018-01-15 14:24:07 +00008160 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
Craig Topper0ccec702017-11-11 08:24:15 +00008161 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8162 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8163 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008164 [], itins.rr>, Sched<[itins.Sched]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008165
8166 let mayLoad = 1 in
8167 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8168 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8169 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008170 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008171 }
8172 }
8173
8174 let Predicates = [HasAVX512] in {
8175 def : Pat<(ffloor _.FRC:$src),
8176 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8177 _.FRC:$src, (i32 0x9)))>;
8178 def : Pat<(fceil _.FRC:$src),
8179 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8180 _.FRC:$src, (i32 0xa)))>;
8181 def : Pat<(ftrunc _.FRC:$src),
8182 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8183 _.FRC:$src, (i32 0xb)))>;
8184 def : Pat<(frint _.FRC:$src),
8185 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8186 _.FRC:$src, (i32 0x4)))>;
8187 def : Pat<(fnearbyint _.FRC:$src),
8188 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8189 _.FRC:$src, (i32 0xc)))>;
8190 }
8191
8192 let Predicates = [HasAVX512, OptForSize] in {
8193 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8194 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8195 addr:$src, (i32 0x9)))>;
8196 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8197 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8198 addr:$src, (i32 0xa)))>;
8199 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8200 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8201 addr:$src, (i32 0xb)))>;
8202 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8203 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8204 addr:$src, (i32 0x4)))>;
8205 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8206 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8207 addr:$src, (i32 0xc)))>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008208 }
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00008209}
8210
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008211defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8212 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008213
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008214defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8215 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8216 EVEX_CD8<64, CD8VT1>;
Eric Christopher0d94fa92015-02-20 00:45:28 +00008217
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008218//-------------------------------------------------
8219// Integer truncate and extend operations
8220//-------------------------------------------------
8221
Simon Pilgrim833c2602017-12-05 19:21:28 +00008222let Sched = WriteShuffle256 in
8223def AVX512_EXTEND : OpndItins<
8224 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8225>;
8226
8227let Sched = WriteShuffle256 in
8228def AVX512_TRUNCATE : OpndItins<
8229 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8230>;
8231
Igor Breger074a64e2015-07-24 17:24:15 +00008232multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008233 OpndItins itins, X86VectorVTInfo SrcInfo,
8234 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
Craig Topper52e2e832016-07-22 05:46:44 +00008235 let ExeDomain = DestInfo.ExeDomain in
Igor Breger074a64e2015-07-24 17:24:15 +00008236 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8237 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008238 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8239 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
Igor Breger074a64e2015-07-24 17:24:15 +00008240
Craig Topper52e2e832016-07-22 05:46:44 +00008241 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8242 ExeDomain = DestInfo.ExeDomain in {
Igor Breger074a64e2015-07-24 17:24:15 +00008243 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8244 (ins x86memop:$dst, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008245 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008246 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008247
Igor Breger074a64e2015-07-24 17:24:15 +00008248 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8249 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008250 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008251 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
Craig Topper99f6b622016-05-01 01:03:56 +00008252 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008253}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008254
Igor Breger074a64e2015-07-24 17:24:15 +00008255multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8256 X86VectorVTInfo DestInfo,
8257 PatFrag truncFrag, PatFrag mtruncFrag > {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008258
Igor Breger074a64e2015-07-24 17:24:15 +00008259 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8260 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8261 addr:$dst, SrcInfo.RC:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008262
Igor Breger074a64e2015-07-24 17:24:15 +00008263 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8264 (SrcInfo.VT SrcInfo.RC:$src)),
8265 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8266 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8267}
8268
Craig Topperb2868232018-01-14 08:11:36 +00008269multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
8270 SDNode OpNode256, SDNode OpNode512, OpndItins itins,
8271 AVX512VLVectorVTInfo VTSrcInfo,
8272 X86VectorVTInfo DestInfoZ128,
8273 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8274 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8275 X86MemOperand x86memopZ, PatFrag truncFrag,
8276 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
Igor Breger074a64e2015-07-24 17:24:15 +00008277
8278 let Predicates = [HasVLX, prd] in {
Craig Topperb2868232018-01-14 08:11:36 +00008279 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008280 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
Igor Breger074a64e2015-07-24 17:24:15 +00008281 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8282 truncFrag, mtruncFrag>, EVEX_V128;
8283
Craig Topperb2868232018-01-14 08:11:36 +00008284 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008285 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
Igor Breger074a64e2015-07-24 17:24:15 +00008286 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8287 truncFrag, mtruncFrag>, EVEX_V256;
8288 }
8289 let Predicates = [prd] in
Craig Topperb2868232018-01-14 08:11:36 +00008290 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008291 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
Igor Breger074a64e2015-07-24 17:24:15 +00008292 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8293 truncFrag, mtruncFrag>, EVEX_V512;
8294}
8295
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008296multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008297 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008298 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8299 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, itins,
8300 avx512vl_i64_info, v16i8x_info, v16i8x_info,
8301 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
8302 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
Igor Breger074a64e2015-07-24 17:24:15 +00008303}
8304
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008305multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008306 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008307 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8308 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8309 avx512vl_i64_info, v8i16x_info, v8i16x_info,
8310 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
8311 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008312}
8313
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008314multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008315 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008316 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8317 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8318 avx512vl_i64_info, v4i32x_info, v4i32x_info,
8319 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
8320 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008321}
8322
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008323multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008324 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008325 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8326 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8327 avx512vl_i32_info, v16i8x_info, v16i8x_info,
8328 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
8329 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008330}
8331
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008332multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008333 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008334 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8335 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8336 avx512vl_i32_info, v8i16x_info, v8i16x_info,
8337 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
8338 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008339}
8340
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008341multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008342 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008343 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8344 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
8345 itins, avx512vl_i16_info, v16i8x_info, v16i8x_info,
8346 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
8347 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008348}
8349
Craig Topperb2868232018-01-14 08:11:36 +00008350defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE,
8351 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008352defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008353 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008354defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008355 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008356
Craig Topperb2868232018-01-14 08:11:36 +00008357defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE,
8358 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008359defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008360 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008361defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008362 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008363
Craig Topperb2868232018-01-14 08:11:36 +00008364defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE,
8365 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008366defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008367 truncstore_s_vi32, masked_truncstore_s_vi32>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008368defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008369 truncstore_us_vi32, masked_truncstore_us_vi32>;
Igor Breger074a64e2015-07-24 17:24:15 +00008370
Craig Topperb2868232018-01-14 08:11:36 +00008371defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE,
8372 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008373defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008374 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008375defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008376 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008377
Craig Topperb2868232018-01-14 08:11:36 +00008378defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE,
8379 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008380defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008381 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008382defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008383 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008384
Craig Topperb2868232018-01-14 08:11:36 +00008385defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE,
8386 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008387defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008388 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008389defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008390 truncstore_us_vi8, masked_truncstore_us_vi8>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008391
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008392let Predicates = [HasAVX512, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008393def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008394 (v8i16 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008395 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008396 VR256X:$src, sub_ymm)))), sub_xmm))>;
Craig Topperb2868232018-01-14 08:11:36 +00008397def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008398 (v4i32 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008399 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008400 VR256X:$src, sub_ymm)))), sub_xmm))>;
8401}
8402
8403let Predicates = [HasBWI, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008404def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
Craig Topper61403202016-09-19 02:53:43 +00008405 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008406 VR256X:$src, sub_ymm))), sub_xmm))>;
8407}
8408
Simon Pilgrim833c2602017-12-05 19:21:28 +00008409multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
Igor Breger2ba64ab2016-05-22 10:21:04 +00008410 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
Craig Topper6694df12018-02-25 06:21:04 +00008411 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
Craig Topper52e2e832016-07-22 05:46:44 +00008412 let ExeDomain = DestInfo.ExeDomain in {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008413 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8414 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008415 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8416 EVEX, Sched<[itins.Sched]>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008417
Craig Toppere1cac152016-06-07 07:27:54 +00008418 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8419 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008420 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8421 EVEX, Sched<[itins.Sched.Folded]>;
Craig Topper52e2e832016-07-22 05:46:44 +00008422 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008423}
8424
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008425multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008426 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008427 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008428 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008429 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008430 v16i8x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008431 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008432
Simon Pilgrim833c2602017-12-05 19:21:28 +00008433 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008434 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008435 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008436 }
8437 let Predicates = [HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008438 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
Craig Topper6840f112016-07-14 06:41:34 +00008439 v32i8x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008440 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008441 }
8442}
8443
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008444multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008445 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008446 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008447 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008448 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008449 v16i8x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008450 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008451
Simon Pilgrim833c2602017-12-05 19:21:28 +00008452 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008453 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008454 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008455 }
8456 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008457 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008458 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008459 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008460 }
8461}
8462
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008463multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008464 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008465 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008466 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008467 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008468 v16i8x_info, i16mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008469 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008470
Simon Pilgrim833c2602017-12-05 19:21:28 +00008471 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008472 v16i8x_info, i32mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008473 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008474 }
8475 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008476 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008477 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008478 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008479 }
8480}
8481
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008482multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008483 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008484 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008485 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008486 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008487 v8i16x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008488 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008489
Simon Pilgrim833c2602017-12-05 19:21:28 +00008490 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008491 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008492 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008493 }
8494 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008495 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008496 v16i16x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008497 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008498 }
8499}
8500
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008501multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008502 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008503 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008504 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008505 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008506 v8i16x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008507 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008508
Simon Pilgrim833c2602017-12-05 19:21:28 +00008509 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008510 v8i16x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008511 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008512 }
8513 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008514 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008515 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008516 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008517 }
8518}
8519
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008520multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008521 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008522 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008523
8524 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008525 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008526 v4i32x_info, i64mem, LdFrag, InVecNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008527 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8528
Simon Pilgrim833c2602017-12-05 19:21:28 +00008529 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008530 v4i32x_info, i128mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008531 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8532 }
8533 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008534 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008535 v8i32x_info, i256mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008536 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8537 }
8538}
8539
Simon Pilgrim833c2602017-12-05 19:21:28 +00008540defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8541defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8542defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8543defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8544defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8545defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008546
Simon Pilgrim833c2602017-12-05 19:21:28 +00008547defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8548defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8549defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8550defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8551defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8552defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008553
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008554
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008555multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8556 SDNode InVecOp, PatFrag ExtLoad16> {
Craig Topper64378f42016-10-09 23:08:39 +00008557 // 128-bit patterns
8558 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008559 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008560 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008561 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008562 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008563 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008564 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008565 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008566 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008567 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008568 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8569 }
8570 let Predicates = [HasVLX] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008571 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008572 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008573 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008574 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008575 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008576 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008577 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008578 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8579
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008580 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008581 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008582 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008583 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008584 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008585 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008586 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008587 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8588
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008589 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008590 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008591 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008592 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008593 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008594 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008595 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008596 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008597 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008598 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8599
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008600 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008601 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008602 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008603 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008604 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008605 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008606 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008607 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8608
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008609 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008610 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008611 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008612 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008613 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008614 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008615 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008616 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008617 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008618 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8619 }
8620 // 256-bit patterns
8621 let Predicates = [HasVLX, HasBWI] in {
8622 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8623 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8624 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8625 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8626 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8627 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8628 }
8629 let Predicates = [HasVLX] in {
8630 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8631 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8632 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8633 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8634 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8635 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8636 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8637 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8638
8639 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8640 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8641 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8642 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8643 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8644 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8645 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8646 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8647
8648 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8649 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8650 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8651 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8652 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8653 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8654
8655 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8656 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8657 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8658 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8659 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8660 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8661 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8662 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8663
8664 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8665 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8666 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8667 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8668 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8669 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8670 }
8671 // 512-bit patterns
8672 let Predicates = [HasBWI] in {
8673 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8674 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8675 }
8676 let Predicates = [HasAVX512] in {
8677 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8678 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8679
8680 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8681 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper9ece2f72016-10-10 06:25:48 +00008682 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8683 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper64378f42016-10-09 23:08:39 +00008684
8685 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8686 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8687
8688 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8689 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8690
8691 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8692 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8693 }
8694}
8695
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008696defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8697defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
Craig Topper64378f42016-10-09 23:08:39 +00008698
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008699//===----------------------------------------------------------------------===//
8700// GATHER - SCATTER Operations
8701
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008702// FIXME: Improve scheduling of gather/scatter instructions.
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008703multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper16a91ce2017-11-15 07:46:43 +00008704 X86MemOperand memop, PatFrag GatherNode,
8705 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008706 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8707 ExeDomain = _.ExeDomain in
Craig Topper16a91ce2017-11-15 07:46:43 +00008708 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8709 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008710 !strconcat(OpcodeStr#_.Suffix,
Craig Topperedb09112014-11-25 20:11:23 +00008711 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Craig Topper16a91ce2017-11-15 07:46:43 +00008712 [(set _.RC:$dst, MaskRC:$mask_wb,
8713 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008714 vectoraddr:$src2))]>, EVEX, EVEX_K,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008715 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008716}
Cameron McInally45325962014-03-26 13:50:50 +00008717
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008718multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8719 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8720 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008721 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008722 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008723 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008724let Predicates = [HasVLX] in {
8725 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008726 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008727 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008728 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008729 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008730 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008731 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008732 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008733}
Cameron McInally45325962014-03-26 13:50:50 +00008734}
8735
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008736multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8737 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008738 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008739 mgatherv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008740 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008741 mgatherv8i64>, EVEX_V512;
8742let Predicates = [HasVLX] in {
8743 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008744 vy256xmem, mgatherv8i32>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008745 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008746 vy128xmem, mgatherv4i64>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008747 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008748 vx128xmem, mgatherv4i32>, EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008749 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Craig Topperc1e7b3f2017-11-22 07:11:03 +00008750 vx64xmem, mgatherv2i64, VK2WM>,
Craig Topper16a91ce2017-11-15 07:46:43 +00008751 EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008752}
Cameron McInally45325962014-03-26 13:50:50 +00008753}
Michael Liao5bf95782014-12-04 05:20:33 +00008754
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008755
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008756defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8757 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8758
8759defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8760 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008761
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008762multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper0b590342018-01-11 06:31:28 +00008763 X86MemOperand memop, PatFrag ScatterNode,
8764 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008765
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008766let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008767
Craig Topper0b590342018-01-11 06:31:28 +00008768 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
8769 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008770 !strconcat(OpcodeStr#_.Suffix,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008771 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Craig Topper0b590342018-01-11 06:31:28 +00008772 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8773 MaskRC:$mask, vectoraddr:$dst))]>,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008774 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8775 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008776}
8777
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008778multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8779 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8780 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008781 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008782 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008783 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008784let Predicates = [HasVLX] in {
8785 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008786 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008787 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008788 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008789 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008790 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008791 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008792 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008793}
Cameron McInally45325962014-03-26 13:50:50 +00008794}
8795
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008796multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8797 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008798 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008799 mscatterv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008800 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008801 mscatterv8i64>, EVEX_V512;
8802let Predicates = [HasVLX] in {
8803 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008804 vy256xmem, mscatterv8i32>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008805 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008806 vy128xmem, mscatterv4i64>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008807 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008808 vx128xmem, mscatterv4i32>, EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008809 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Craig Topper0b590342018-01-11 06:31:28 +00008810 vx64xmem, mscatterv2i64, VK2WM>,
8811 EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008812}
Cameron McInally45325962014-03-26 13:50:50 +00008813}
8814
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008815defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8816 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008817
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008818defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8819 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008820
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008821// prefetch
8822multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8823 RegisterClass KRC, X86MemOperand memop> {
8824 let Predicates = [HasPFI], hasSideEffects = 1 in
8825 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008826 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008827 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008828}
8829
8830defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008831 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008832
8833defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008834 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008835
8836defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008837 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008838
8839defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008840 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008841
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008842defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008843 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008844
8845defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008846 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008847
8848defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008849 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008850
8851defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008852 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008853
8854defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008855 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008856
8857defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008858 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008859
8860defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008861 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008862
8863defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008864 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008865
8866defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008867 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008868
8869defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008870 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008871
8872defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008873 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008874
8875defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008876 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008877
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008878multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008879def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008880 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
Craig Topper0321ebc2018-01-24 04:51:17 +00008881 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008882 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008883}
Michael Liao5bf95782014-12-04 05:20:33 +00008884
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008885multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8886 string OpcodeStr, Predicate prd> {
8887let Predicates = [prd] in
8888 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8889
8890 let Predicates = [prd, HasVLX] in {
8891 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8892 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8893 }
8894}
8895
Michael Zuckerman85436ec2017-03-23 09:57:01 +00008896defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8897defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8898defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8899defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008900
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008901multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
Igor Bregerfca0a342016-01-28 13:19:25 +00008902 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8903 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topperf090e8a2018-01-08 06:53:54 +00008904 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008905 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Igor Bregerfca0a342016-01-28 13:19:25 +00008906}
8907
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008908// Use 512bit version to implement 128/256 bit in case NoVLX.
8909multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00008910 X86VectorVTInfo _> {
8911
Craig Topperf090e8a2018-01-08 06:53:54 +00008912 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
Igor Bregerfca0a342016-01-28 13:19:25 +00008913 (_.KVT (COPY_TO_REGCLASS
8914 (!cast<Instruction>(NAME#"Zrr")
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008915 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00008916 _.RC:$src, _.SubRegIdx)),
8917 _.KRC))>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008918}
8919
8920multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
Igor Bregerfca0a342016-01-28 13:19:25 +00008921 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8922 let Predicates = [prd] in
8923 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8924 EVEX_V512;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008925
8926 let Predicates = [prd, HasVLX] in {
8927 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008928 EVEX_V256;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008929 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008930 EVEX_V128;
8931 }
8932 let Predicates = [prd, NoVLX] in {
8933 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8934 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008935 }
8936}
8937
8938defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8939 avx512vl_i8_info, HasBWI>;
8940defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8941 avx512vl_i16_info, HasBWI>, VEX_W;
8942defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8943 avx512vl_i32_info, HasDQI>;
8944defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8945 avx512vl_i64_info, HasDQI>, VEX_W;
8946
Craig Topper0321ebc2018-01-24 04:51:17 +00008947// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
8948// is available, but BWI is not. We can't handle this in lowering because
8949// a target independent DAG combine likes to combine sext and trunc.
8950let Predicates = [HasDQI, NoBWI] in {
8951 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
8952 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8953 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
8954 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8955}
8956
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008957//===----------------------------------------------------------------------===//
8958// AVX-512 - COMPRESS and EXPAND
8959//
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008960
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008961// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8962let Sched = WriteShuffle256 in {
8963def AVX512_COMPRESS : OpndItins<
8964 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8965>;
8966def AVX512_EXPAND : OpndItins<
8967 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8968>;
8969}
8970
Ayman Musad7a5ed42016-09-26 06:22:08 +00008971multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008972 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008973 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00008974 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008975 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8976 Sched<[itins.Sched]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008977
Craig Toppere1cac152016-06-07 07:27:54 +00008978 let mayStore = 1, hasSideEffects = 0 in
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008979 def mr : AVX5128I<opc, MRMDestMem, (outs),
8980 (ins _.MemOp:$dst, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008981 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008982 []>, EVEX_CD8<_.EltSize, CD8VT1>,
8983 Sched<[itins.Sched.Folded]>;
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008984
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008985 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8986 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008987 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Ayman Musad7a5ed42016-09-26 06:22:08 +00008988 []>,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008989 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8990 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008991}
8992
Ayman Musad7a5ed42016-09-26 06:22:08 +00008993multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
Ayman Musad7a5ed42016-09-26 06:22:08 +00008994 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8995 (_.VT _.RC:$src)),
8996 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8997 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8998}
8999
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009000multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009001 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009002 AVX512VLVectorVTInfo VTInfo,
9003 Predicate Pred = HasAVX512> {
9004 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009005 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009006 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009007
Coby Tayree71e37cc2017-11-21 09:48:44 +00009008 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009009 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009010 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009011 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009012 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009013 }
9014}
9015
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009016defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
9017 avx512vl_i32_info>, EVEX;
9018defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
9019 avx512vl_i64_info>, EVEX, VEX_W;
9020defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
9021 avx512vl_f32_info>, EVEX;
9022defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
9023 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009024
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009025// expand
9026multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009027 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009028 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00009029 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009030 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
9031 Sched<[itins.Sched]>;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00009032
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009033 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9034 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9035 (_.VT (X86expand (_.VT (bitconvert
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009036 (_.LdFrag addr:$src1))))), itins.rm>,
9037 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9038 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009039}
9040
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009041multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
9042
9043 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9044 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
9045 _.KRCWM:$mask, addr:$src)>;
9046
9047 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9048 (_.VT _.RC:$src0))),
9049 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
9050 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9051}
9052
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009053multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009054 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009055 AVX512VLVectorVTInfo VTInfo,
9056 Predicate Pred = HasAVX512> {
9057 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009058 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009059 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009060
Coby Tayree71e37cc2017-11-21 09:48:44 +00009061 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009062 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009063 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009064 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009065 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009066 }
9067}
9068
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009069defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
9070 avx512vl_i32_info>, EVEX;
9071defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
9072 avx512vl_i64_info>, EVEX, VEX_W;
9073defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
9074 avx512vl_f32_info>, EVEX;
9075defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
9076 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009077
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009078//handle instruction reg_vec1 = op(reg_vec,imm)
9079// op(mem_vec,imm)
9080// op(broadcast(eltVt),imm)
9081//all instruction created with FROUND_CURRENT
9082multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009083 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009084 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009085 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9086 (ins _.RC:$src1, i32u8imm:$src2),
Igor Breger252c2d92016-02-22 12:37:41 +00009087 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009088 (OpNode (_.VT _.RC:$src1),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009089 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009090 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9091 (ins _.MemOp:$src1, i32u8imm:$src2),
9092 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9093 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009094 (i32 imm:$src2)), itins.rm>,
9095 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009096 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9097 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9098 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9099 "${src1}"##_.BroadcastStr##", $src2",
9100 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009101 (i32 imm:$src2)), itins.rm>, EVEX_B,
9102 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009103 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009104}
9105
9106//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9107multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009108 SDNode OpNode, OpndItins itins,
9109 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009110 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009111 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9112 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009113 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009114 "$src1, {sae}, $src2",
9115 (OpNode (_.VT _.RC:$src1),
9116 (i32 imm:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009117 (i32 FROUND_NO_EXC)), itins.rr>,
9118 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009119}
9120
9121multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009122 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009123 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009124 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009125 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9126 _.info512>,
9127 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9128 itins, _.info512>, EVEX_V512;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009129 }
9130 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009131 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9132 _.info128>, EVEX_V128;
9133 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9134 _.info256>, EVEX_V256;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009135 }
9136}
9137
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009138//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9139// op(reg_vec2,mem_vec,imm)
9140// op(reg_vec2,broadcast(eltVt),imm)
9141//all instruction created with FROUND_CURRENT
9142multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009143 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +00009144 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009145 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009146 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009147 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9148 (OpNode (_.VT _.RC:$src1),
9149 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009150 (i32 imm:$src3)), itins.rr>,
9151 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009152 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9153 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9154 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9155 (OpNode (_.VT _.RC:$src1),
9156 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009157 (i32 imm:$src3)), itins.rm>,
9158 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009159 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9160 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9161 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9162 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9163 (OpNode (_.VT _.RC:$src1),
9164 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009165 (i32 imm:$src3)), itins.rm>, EVEX_B,
9166 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009167 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009168}
9169
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009170//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9171// op(reg_vec2,mem_vec,imm)
Igor Breger2ae0fe32015-08-31 11:14:02 +00009172multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009173 OpndItins itins, X86VectorVTInfo DestInfo,
9174 X86VectorVTInfo SrcInfo>{
Craig Topper05948fb2016-08-02 05:11:15 +00009175 let ExeDomain = DestInfo.ExeDomain in {
Igor Breger2ae0fe32015-08-31 11:14:02 +00009176 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9177 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9178 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9179 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9180 (SrcInfo.VT SrcInfo.RC:$src2),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009181 (i8 imm:$src3))), itins.rr>,
9182 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009183 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9184 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9185 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9186 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9187 (SrcInfo.VT (bitconvert
9188 (SrcInfo.LdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009189 (i8 imm:$src3))), itins.rm>,
9190 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009191 }
Igor Breger2ae0fe32015-08-31 11:14:02 +00009192}
9193
9194//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9195// op(reg_vec2,mem_vec,imm)
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009196// op(reg_vec2,broadcast(eltVt),imm)
9197multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009198 OpndItins itins, X86VectorVTInfo _>:
9199 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
Igor Breger2ae0fe32015-08-31 11:14:02 +00009200
Craig Topper05948fb2016-08-02 05:11:15 +00009201 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00009202 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9203 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9204 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9205 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9206 (OpNode (_.VT _.RC:$src1),
9207 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009208 (i8 imm:$src3)), itins.rm>, EVEX_B,
9209 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009210}
9211
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009212//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9213// op(reg_vec2,mem_scalar,imm)
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009214multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009215 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009216 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009217 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009218 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009219 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9220 (OpNode (_.VT _.RC:$src1),
9221 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009222 (i32 imm:$src3)), itins.rr>,
9223 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009224 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Bregere73ef852016-09-11 12:38:46 +00009225 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
Craig Toppere1cac152016-06-07 07:27:54 +00009226 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9227 (OpNode (_.VT _.RC:$src1),
9228 (_.VT (scalar_to_vector
9229 (_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009230 (i32 imm:$src3)), itins.rm>,
9231 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009232 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009233}
9234
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009235//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9236multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009237 SDNode OpNode, OpndItins itins,
9238 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009239 let ExeDomain = _.ExeDomain in
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009240 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009241 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009242 OpcodeStr, "$src3, {sae}, $src2, $src1",
9243 "$src1, $src2, {sae}, $src3",
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009244 (OpNode (_.VT _.RC:$src1),
9245 (_.VT _.RC:$src2),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009246 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009247 (i32 FROUND_NO_EXC)), itins.rr>,
9248 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009249}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009250
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009251//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009252multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9253 OpndItins itins, X86VectorVTInfo _> {
Craig Toppercac5d692017-02-26 06:45:37 +00009254 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009255 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9256 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009257 OpcodeStr, "$src3, {sae}, $src2, $src1",
9258 "$src1, $src2, {sae}, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009259 (OpNode (_.VT _.RC:$src1),
9260 (_.VT _.RC:$src2),
9261 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009262 (i32 FROUND_NO_EXC)), itins.rr>,
9263 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009264}
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009265
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009266multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009267 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009268 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009269 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009270 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9271 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009272 EVEX_V512;
9273
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009274 }
9275 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009276 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009277 EVEX_V128;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009278 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009279 EVEX_V256;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009280 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009281}
9282
Igor Breger2ae0fe32015-08-31 11:14:02 +00009283multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009284 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9285 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +00009286 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009287 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009288 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9289 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009290 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009291 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009292 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009293 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009294 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9295 }
9296}
9297
Igor Breger00d9f842015-06-08 14:03:17 +00009298multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009299 bits<8> opc, SDNode OpNode, OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009300 Predicate Pred = HasAVX512> {
9301 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009302 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Igor Breger00d9f842015-06-08 14:03:17 +00009303 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009304 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009305 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9306 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Igor Breger00d9f842015-06-08 14:03:17 +00009307 }
9308}
9309
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009310multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009311 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009312 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009313 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009314 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9315 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009316 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009317}
9318
Igor Breger1e58e8a2015-09-02 11:18:55 +00009319multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009320 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009321 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
Igor Breger1e58e8a2015-09-02 11:18:55 +00009322 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009323 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9324 EVEX_CD8<32, CD8VF>;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009325 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009326 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9327 EVEX_CD8<64, CD8VF>, VEX_W;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009328}
9329
Igor Breger1e58e8a2015-09-02 11:18:55 +00009330defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009331 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
Craig Topper0af48f12017-11-13 02:02:58 +00009332 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009333defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009334 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009335 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009336defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009337 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009338 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009339
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009340defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009341 0x50, X86VRange, X86VRangeRnd,
9342 SSE_ALU_F64P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009343 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9344defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009345 0x50, X86VRange, X86VRangeRnd,
9346 SSE_ALU_F32P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009347 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9348
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009349defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9350 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009351 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9352defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009353 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009354 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9355
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009356defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009357 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009358 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9359defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009360 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009361 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009362
Igor Breger1e58e8a2015-09-02 11:18:55 +00009363defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009364 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009365 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9366defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009367 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009368 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9369
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009370let Predicates = [HasAVX512] in {
9371def : Pat<(v16f32 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009372 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009373def : Pat<(v16f32 (fnearbyint VR512:$src)),
9374 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9375def : Pat<(v16f32 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009376 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009377def : Pat<(v16f32 (frint VR512:$src)),
9378 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9379def : Pat<(v16f32 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009380 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009381
9382def : Pat<(v8f64 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009383 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009384def : Pat<(v8f64 (fnearbyint VR512:$src)),
9385 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9386def : Pat<(v8f64 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009387 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009388def : Pat<(v8f64 (frint VR512:$src)),
9389 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9390def : Pat<(v8f64 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009391 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009392}
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009393
Craig Topperac2508252017-11-11 21:44:51 +00009394let Predicates = [HasVLX] in {
9395def : Pat<(v4f32 (ffloor VR128X:$src)),
9396 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9397def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9398 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9399def : Pat<(v4f32 (fceil VR128X:$src)),
9400 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9401def : Pat<(v4f32 (frint VR128X:$src)),
9402 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9403def : Pat<(v4f32 (ftrunc VR128X:$src)),
9404 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9405
9406def : Pat<(v2f64 (ffloor VR128X:$src)),
9407 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9408def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9409 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9410def : Pat<(v2f64 (fceil VR128X:$src)),
9411 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9412def : Pat<(v2f64 (frint VR128X:$src)),
9413 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9414def : Pat<(v2f64 (ftrunc VR128X:$src)),
9415 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9416
9417def : Pat<(v8f32 (ffloor VR256X:$src)),
9418 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9419def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9420 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9421def : Pat<(v8f32 (fceil VR256X:$src)),
9422 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9423def : Pat<(v8f32 (frint VR256X:$src)),
9424 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9425def : Pat<(v8f32 (ftrunc VR256X:$src)),
9426 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9427
9428def : Pat<(v4f64 (ffloor VR256X:$src)),
9429 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9430def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9431 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9432def : Pat<(v4f64 (fceil VR256X:$src)),
9433 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9434def : Pat<(v4f64 (frint VR256X:$src)),
9435 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9436def : Pat<(v4f64 (ftrunc VR256X:$src)),
9437 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9438}
9439
Craig Topper25ceba72018-02-05 06:00:23 +00009440multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
9441 OpndItins itins, X86VectorVTInfo _,
9442 X86VectorVTInfo CastInfo> {
9443 let ExeDomain = _.ExeDomain in {
9444 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9445 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
9446 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9447 (_.VT (bitconvert
9448 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
9449 (i8 imm:$src3))))),
9450 itins.rr>, Sched<[itins.Sched]>;
9451 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9452 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
9453 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9454 (_.VT
9455 (bitconvert
9456 (CastInfo.VT (X86Shuf128 _.RC:$src1,
9457 (bitconvert (_.LdFrag addr:$src2)),
9458 (i8 imm:$src3))))), itins.rm>,
9459 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9460 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9461 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9462 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9463 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9464 (_.VT
9465 (bitconvert
9466 (CastInfo.VT
9467 (X86Shuf128 _.RC:$src1,
9468 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
9469 (i8 imm:$src3))))), itins.rm>, EVEX_B,
9470 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper42a53532017-08-16 23:38:25 +00009471 }
9472}
9473
Craig Topper25ceba72018-02-05 06:00:23 +00009474multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9475 AVX512VLVectorVTInfo _,
9476 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
9477 let Predicates = [HasAVX512] in
9478 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, itins,
9479 _.info512, CastInfo.info512>, EVEX_V512;
9480
9481 let Predicates = [HasAVX512, HasVLX] in
9482 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, itins,
9483 _.info256, CastInfo.info256>, EVEX_V256;
9484}
9485
Simon Pilgrim36be8522017-11-29 18:52:20 +00009486defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009487 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009488defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009489 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009490defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009491 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009492defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009493 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger00d9f842015-06-08 14:03:17 +00009494
Craig Topperb561e662017-01-19 02:34:29 +00009495let Predicates = [HasAVX512] in {
9496// Provide fallback in case the load node that is used in the broadcast
9497// patterns above is used by additional users, which prevents the pattern
9498// selection.
9499def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9500 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9501 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9502 0)>;
9503def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9504 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9505 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9506 0)>;
9507
9508def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9509 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9510 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9511 0)>;
9512def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9513 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9514 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9515 0)>;
9516
9517def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9518 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9519 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9520 0)>;
9521
9522def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9523 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9524 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9525 0)>;
9526}
9527
Simon Pilgrim36be8522017-11-29 18:52:20 +00009528multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9529 AVX512VLVectorVTInfo VTInfo_I> {
9530 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
Igor Breger00d9f842015-06-08 14:03:17 +00009531 AVX512AIi8Base, EVEX_4V;
Igor Breger00d9f842015-06-08 14:03:17 +00009532}
9533
Simon Pilgrim36be8522017-11-29 18:52:20 +00009534defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009535 EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009536defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009537 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009538
Simon Pilgrim36be8522017-11-29 18:52:20 +00009539defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009540 avx512vl_i8_info, avx512vl_i8_info>,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009541 EVEX_CD8<8, CD8VF>;
9542
Craig Topper333897e2017-11-03 06:48:02 +00009543// Fragments to help convert valignq into masked valignd. Or valignq/valignd
9544// into vpalignr.
9545def ValignqImm32XForm : SDNodeXForm<imm, [{
9546 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9547}]>;
9548def ValignqImm8XForm : SDNodeXForm<imm, [{
9549 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9550}]>;
9551def ValigndImm8XForm : SDNodeXForm<imm, [{
9552 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9553}]>;
9554
9555multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9556 X86VectorVTInfo From, X86VectorVTInfo To,
9557 SDNodeXForm ImmXForm> {
9558 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9559 (bitconvert
9560 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9561 imm:$src3))),
9562 To.RC:$src0)),
9563 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9564 To.RC:$src1, To.RC:$src2,
9565 (ImmXForm imm:$src3))>;
9566
9567 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9568 (bitconvert
9569 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9570 imm:$src3))),
9571 To.ImmAllZerosV)),
9572 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9573 To.RC:$src1, To.RC:$src2,
9574 (ImmXForm imm:$src3))>;
9575
9576 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9577 (bitconvert
9578 (From.VT (OpNode From.RC:$src1,
9579 (bitconvert (To.LdFrag addr:$src2)),
9580 imm:$src3))),
9581 To.RC:$src0)),
9582 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9583 To.RC:$src1, addr:$src2,
9584 (ImmXForm imm:$src3))>;
9585
9586 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9587 (bitconvert
9588 (From.VT (OpNode From.RC:$src1,
9589 (bitconvert (To.LdFrag addr:$src2)),
9590 imm:$src3))),
9591 To.ImmAllZerosV)),
9592 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9593 To.RC:$src1, addr:$src2,
9594 (ImmXForm imm:$src3))>;
9595}
9596
9597multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9598 X86VectorVTInfo From,
9599 X86VectorVTInfo To,
9600 SDNodeXForm ImmXForm> :
9601 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9602 def : Pat<(From.VT (OpNode From.RC:$src1,
9603 (bitconvert (To.VT (X86VBroadcast
9604 (To.ScalarLdFrag addr:$src2)))),
9605 imm:$src3)),
9606 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9607 (ImmXForm imm:$src3))>;
9608
9609 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9610 (bitconvert
9611 (From.VT (OpNode From.RC:$src1,
9612 (bitconvert
9613 (To.VT (X86VBroadcast
9614 (To.ScalarLdFrag addr:$src2)))),
9615 imm:$src3))),
9616 To.RC:$src0)),
9617 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9618 To.RC:$src1, addr:$src2,
9619 (ImmXForm imm:$src3))>;
9620
9621 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9622 (bitconvert
9623 (From.VT (OpNode From.RC:$src1,
9624 (bitconvert
9625 (To.VT (X86VBroadcast
9626 (To.ScalarLdFrag addr:$src2)))),
9627 imm:$src3))),
9628 To.ImmAllZerosV)),
9629 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9630 To.RC:$src1, addr:$src2,
9631 (ImmXForm imm:$src3))>;
9632}
9633
9634let Predicates = [HasAVX512] in {
9635 // For 512-bit we lower to the widest element type we can. So we only need
9636 // to handle converting valignq to valignd.
9637 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9638 v16i32_info, ValignqImm32XForm>;
9639}
9640
9641let Predicates = [HasVLX] in {
9642 // For 128-bit we lower to the widest element type we can. So we only need
9643 // to handle converting valignq to valignd.
9644 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9645 v4i32x_info, ValignqImm32XForm>;
9646 // For 256-bit we lower to the widest element type we can. So we only need
9647 // to handle converting valignq to valignd.
9648 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9649 v8i32x_info, ValignqImm32XForm>;
9650}
9651
9652let Predicates = [HasVLX, HasBWI] in {
9653 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9654 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9655 v16i8x_info, ValignqImm8XForm>;
9656 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9657 v16i8x_info, ValigndImm8XForm>;
9658}
9659
Simon Pilgrim36be8522017-11-29 18:52:20 +00009660defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9661 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9662 EVEX_CD8<8, CD8VF>;
Igor Bregerf3ded812015-08-31 13:09:30 +00009663
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009664multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009665 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009666 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009667 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger24cab0f2015-11-16 07:22:00 +00009668 (ins _.RC:$src1), OpcodeStr,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009669 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009670 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9671 Sched<[itins.Sched]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009672
Craig Toppere1cac152016-06-07 07:27:54 +00009673 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9674 (ins _.MemOp:$src1), OpcodeStr,
9675 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009676 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9677 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9678 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009679 }
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009680}
9681
9682multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009683 OpndItins itins, X86VectorVTInfo _> :
9684 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009685 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9686 (ins _.ScalarMemOp:$src1), OpcodeStr,
9687 "${src1}"##_.BroadcastStr,
9688 "${src1}"##_.BroadcastStr,
9689 (_.VT (OpNode (X86VBroadcast
Simon Pilgrim756348c2017-11-29 13:49:51 +00009690 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9691 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9692 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009693}
9694
9695multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009696 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9697 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009698 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009699 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9700 EVEX_V512;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009701
9702 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009703 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009704 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009705 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009706 EVEX_V128;
9707 }
9708}
9709
9710multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009711 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9712 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009713 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009714 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009715 EVEX_V512;
9716
9717 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009718 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009719 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009720 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009721 EVEX_V128;
9722 }
9723}
9724
9725multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009726 SDNode OpNode, OpndItins itins, Predicate prd> {
9727 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9728 avx512vl_i64_info, prd>, VEX_W;
9729 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9730 avx512vl_i32_info, prd>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009731}
9732
9733multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009734 SDNode OpNode, OpndItins itins, Predicate prd> {
9735 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9736 avx512vl_i16_info, prd>, VEX_WIG;
9737 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9738 avx512vl_i8_info, prd>, VEX_WIG;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009739}
9740
9741multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9742 bits<8> opc_d, bits<8> opc_q,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009743 string OpcodeStr, SDNode OpNode,
9744 OpndItins itins> {
9745 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009746 HasAVX512>,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009747 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009748 HasBWI>;
9749}
9750
Simon Pilgrim756348c2017-11-29 13:49:51 +00009751defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
Igor Bregerf2460112015-07-26 14:41:44 +00009752
Simon Pilgrimfea153f2017-05-06 19:11:59 +00009753// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9754let Predicates = [HasAVX512, NoVLX] in {
9755 def : Pat<(v4i64 (abs VR256X:$src)),
9756 (EXTRACT_SUBREG
9757 (VPABSQZrr
9758 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9759 sub_ymm)>;
9760 def : Pat<(v2i64 (abs VR128X:$src)),
9761 (EXTRACT_SUBREG
9762 (VPABSQZrr
9763 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9764 sub_xmm)>;
9765}
9766
Craig Topperc0896052017-12-16 02:40:28 +00009767// Use 512bit version to implement 128/256 bit.
9768multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9769 AVX512VLVectorVTInfo _, Predicate prd> {
9770 let Predicates = [prd, NoVLX] in {
9771 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9772 (EXTRACT_SUBREG
9773 (!cast<Instruction>(InstrStr # "Zrr")
9774 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9775 _.info256.RC:$src1,
9776 _.info256.SubRegIdx)),
9777 _.info256.SubRegIdx)>;
9778
9779 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9780 (EXTRACT_SUBREG
9781 (!cast<Instruction>(InstrStr # "Zrr")
9782 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9783 _.info128.RC:$src1,
9784 _.info128.SubRegIdx)),
9785 _.info128.SubRegIdx)>;
9786 }
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009787}
9788
Simon Pilgrim756348c2017-11-29 13:49:51 +00009789// FIXME: Is there a better scheduler itinerary for VPLZCNT?
Craig Topperc0896052017-12-16 02:40:28 +00009790defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9791 SSE_INTALU_ITINS_P, HasCDI>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009792
9793// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9794defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9795 SSE_INTALU_ITINS_P, HasCDI>;
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009796
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009797// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topperc0896052017-12-16 02:40:28 +00009798defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9799defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009800
Igor Breger24cab0f2015-11-16 07:22:00 +00009801//===---------------------------------------------------------------------===//
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009802// Counts number of ones - VPOPCNTD and VPOPCNTQ
9803//===---------------------------------------------------------------------===//
9804
Simon Pilgrim756348c2017-11-29 13:49:51 +00009805// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
Craig Topperc0896052017-12-16 02:40:28 +00009806defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9807 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009808
Craig Topperc0896052017-12-16 02:40:28 +00009809defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9810defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009811
9812//===---------------------------------------------------------------------===//
Igor Breger24cab0f2015-11-16 07:22:00 +00009813// Replicate Single FP - MOVSHDUP and MOVSLDUP
9814//===---------------------------------------------------------------------===//
Simon Pilgrim756348c2017-11-29 13:49:51 +00009815multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9816 OpndItins itins> {
9817 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9818 avx512vl_f32_info, HasAVX512>, XS;
Igor Breger24cab0f2015-11-16 07:22:00 +00009819}
9820
Simon Pilgrim756348c2017-11-29 13:49:51 +00009821defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9822defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009823
9824//===----------------------------------------------------------------------===//
9825// AVX-512 - MOVDDUP
9826//===----------------------------------------------------------------------===//
9827
9828multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009829 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009830 let ExeDomain = _.ExeDomain in {
Igor Breger1f782962015-11-19 08:26:56 +00009831 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9832 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009833 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9834 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009835 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9836 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9837 (_.VT (OpNode (_.VT (scalar_to_vector
Simon Pilgrim756348c2017-11-29 13:49:51 +00009838 (_.ScalarLdFrag addr:$src))))),
9839 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9840 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009841 }
Igor Breger1f782962015-11-19 08:26:56 +00009842}
9843
9844multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009845 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Igor Breger1f782962015-11-19 08:26:56 +00009846
Simon Pilgrim756348c2017-11-29 13:49:51 +00009847 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
Igor Breger1f782962015-11-19 08:26:56 +00009848
9849 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009850 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
Igor Breger1f782962015-11-19 08:26:56 +00009851 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009852 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
Craig Topperf6c69562017-10-13 21:56:48 +00009853 EVEX_V128;
Igor Breger1f782962015-11-19 08:26:56 +00009854 }
9855}
9856
Simon Pilgrim756348c2017-11-29 13:49:51 +00009857multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9858 OpndItins itins> {
9859 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
Igor Breger1f782962015-11-19 08:26:56 +00009860 avx512vl_f64_info>, XD, VEX_W;
Igor Breger1f782962015-11-19 08:26:56 +00009861}
9862
Simon Pilgrim756348c2017-11-29 13:49:51 +00009863defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009864
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009865let Predicates = [HasVLX] in {
Igor Breger1f782962015-11-19 08:26:56 +00009866def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009867 (VMOVDDUPZ128rm addr:$src)>;
9868def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9869 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topperf6c69562017-10-13 21:56:48 +00009870def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9871 (VMOVDDUPZ128rm addr:$src)>;
Craig Topperda84ff32017-01-07 22:20:23 +00009872
9873def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9874 (v2f64 VR128X:$src0)),
9875 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9876 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9877def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9878 (bitconvert (v4i32 immAllZerosV))),
9879 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9880
9881def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9882 (v2f64 VR128X:$src0)),
9883 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9884def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9885 (bitconvert (v4i32 immAllZerosV))),
9886 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topperf6c69562017-10-13 21:56:48 +00009887
9888def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9889 (v2f64 VR128X:$src0)),
9890 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9891def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9892 (bitconvert (v4i32 immAllZerosV))),
9893 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009894}
Igor Breger1f782962015-11-19 08:26:56 +00009895
Igor Bregerf2460112015-07-26 14:41:44 +00009896//===----------------------------------------------------------------------===//
9897// AVX-512 - Unpack Instructions
9898//===----------------------------------------------------------------------===//
Craig Topper9433f972016-08-02 06:16:53 +00009899defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9900 SSE_ALU_ITINS_S>;
9901defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9902 SSE_ALU_ITINS_S>;
Igor Bregerf2460112015-07-26 14:41:44 +00009903
9904defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9905 SSE_INTALU_ITINS_P, HasBWI>;
9906defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9907 SSE_INTALU_ITINS_P, HasBWI>;
9908defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9909 SSE_INTALU_ITINS_P, HasBWI>;
9910defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9911 SSE_INTALU_ITINS_P, HasBWI>;
9912
9913defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9914 SSE_INTALU_ITINS_P, HasAVX512>;
9915defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9916 SSE_INTALU_ITINS_P, HasAVX512>;
9917defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9918 SSE_INTALU_ITINS_P, HasAVX512>;
9919defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9920 SSE_INTALU_ITINS_P, HasAVX512>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009921
9922//===----------------------------------------------------------------------===//
9923// AVX-512 - Extract & Insert Integer Instructions
9924//===----------------------------------------------------------------------===//
9925
9926multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9927 X86VectorVTInfo _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009928 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9929 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9930 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim1dcb9132017-10-23 16:00:57 +00009931 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9932 addr:$dst)]>,
Craig Topper05af43f2018-01-24 17:58:57 +00009933 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009934}
9935
9936multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9937 let Predicates = [HasBWI] in {
9938 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9939 (ins _.RC:$src1, u8imm:$src2),
9940 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9941 [(set GR32orGR64:$dst,
9942 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009943 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009944
9945 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9946 }
9947}
9948
9949multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9950 let Predicates = [HasBWI] in {
9951 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9952 (ins _.RC:$src1, u8imm:$src2),
9953 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9954 [(set GR32orGR64:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009955 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9956 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009957
Craig Topper99f6b622016-05-01 01:03:56 +00009958 let hasSideEffects = 0 in
Igor Breger55747302015-11-18 08:46:16 +00009959 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9960 (ins _.RC:$src1, u8imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00009961 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9962 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9963 Sched<[WriteShuffle]>;
Igor Breger55747302015-11-18 08:46:16 +00009964
Igor Bregerdefab3c2015-10-08 12:55:01 +00009965 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9966 }
9967}
9968
9969multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9970 RegisterClass GRC> {
9971 let Predicates = [HasDQI] in {
9972 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9973 (ins _.RC:$src1, u8imm:$src2),
9974 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9975 [(set GRC:$dst,
9976 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009977 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009978
Craig Toppere1cac152016-06-07 07:27:54 +00009979 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9980 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9981 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9982 [(store (extractelt (_.VT _.RC:$src1),
9983 imm:$src2),addr:$dst)]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009984 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
Craig Topper05af43f2018-01-24 17:58:57 +00009985 Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009986 }
9987}
9988
Craig Toppera33846a2017-10-22 06:18:23 +00009989defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9990defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009991defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9992defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9993
9994multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9995 X86VectorVTInfo _, PatFrag LdFrag> {
9996 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9997 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9998 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9999 [(set _.RC:$dst,
10000 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010001 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010002}
10003
10004multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10005 X86VectorVTInfo _, PatFrag LdFrag> {
10006 let Predicates = [HasBWI] in {
10007 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10008 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10009 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10010 [(set _.RC:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010011 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10012 Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010013
10014 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10015 }
10016}
10017
10018multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10019 X86VectorVTInfo _, RegisterClass GRC> {
10020 let Predicates = [HasDQI] in {
10021 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10022 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10023 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10024 [(set _.RC:$dst,
10025 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010026 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010027
10028 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10029 _.ScalarLdFrag>, TAPD;
10030 }
10031}
10032
10033defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
Craig Toppera33846a2017-10-22 06:18:23 +000010034 extloadi8>, TAPD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010035defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
Craig Toppera33846a2017-10-22 06:18:23 +000010036 extloadi16>, PD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010037defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10038defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010039
Igor Bregera6297c72015-09-02 10:50:58 +000010040//===----------------------------------------------------------------------===//
10041// VSHUFPS - VSHUFPD Operations
10042//===----------------------------------------------------------------------===//
Simon Pilgrim36be8522017-11-29 18:52:20 +000010043
Igor Bregera6297c72015-09-02 10:50:58 +000010044multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10045 AVX512VLVectorVTInfo VTInfo_FP>{
Simon Pilgrim36be8522017-11-29 18:52:20 +000010046 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10047 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10048 AVX512AIi8Base, EVEX_4V;
Igor Bregera6297c72015-09-02 10:50:58 +000010049}
10050
10051defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10052defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010053
Asaf Badouhd2c35992015-09-02 14:21:54 +000010054//===----------------------------------------------------------------------===//
10055// AVX-512 - Byte shift Left/Right
10056//===----------------------------------------------------------------------===//
10057
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010058let Sched = WriteVecShift in
10059def AVX512_BYTESHIFT : OpndItins<
10060 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
10061>;
10062
Asaf Badouhd2c35992015-09-02 14:21:54 +000010063multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010064 Format MRMm, string OpcodeStr,
10065 OpndItins itins, X86VectorVTInfo _>{
Asaf Badouhd2c35992015-09-02 14:21:54 +000010066 def rr : AVX512<opc, MRMr,
10067 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10068 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010069 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
10070 itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010071 def rm : AVX512<opc, MRMm,
10072 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10073 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10074 [(set _.RC:$dst,(_.VT (OpNode
Simon Pilgrim255fdd02016-06-11 12:54:37 +000010075 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010076 (i8 imm:$src2))))], itins.rm>,
10077 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010078}
10079
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010080multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010081 Format MRMm, string OpcodeStr,
10082 OpndItins itins, Predicate prd>{
Asaf Badouhd2c35992015-09-02 14:21:54 +000010083 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010084 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
10085 OpcodeStr, itins, v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010086 let Predicates = [prd, HasVLX] in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010087 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010088 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010089 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010090 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010091 }
10092}
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010093defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010094 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
10095 EVEX_4V, VEX_WIG;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010096defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010097 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
10098 EVEX_4V, VEX_WIG;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010099
10100
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010101multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010102 string OpcodeStr, OpndItins itins,
10103 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010104 def rr : AVX512BI<opc, MRMSrcReg,
Cong Houdb6220f2015-11-24 19:51:26 +000010105 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
Asaf Badouhd2c35992015-09-02 14:21:54 +000010106 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Cong Houdb6220f2015-11-24 19:51:26 +000010107 [(set _dst.RC:$dst,(_dst.VT
10108 (OpNode (_src.VT _src.RC:$src1),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010109 (_src.VT _src.RC:$src2))))], itins.rr>,
10110 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010111 def rm : AVX512BI<opc, MRMSrcMem,
10112 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10113 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10114 [(set _dst.RC:$dst,(_dst.VT
10115 (OpNode (_src.VT _src.RC:$src1),
10116 (_src.VT (bitconvert
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010117 (_src.LdFrag addr:$src2))))))], itins.rm>,
10118 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010119}
10120
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010121multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010122 string OpcodeStr, OpndItins itins,
10123 Predicate prd> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010124 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010125 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
10126 v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010127 let Predicates = [prd, HasVLX] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010128 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010129 v32i8x_info>, EVEX_V256;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010130 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010131 v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010132 }
10133}
10134
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010135defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010136 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010137
Craig Topper4e794c72017-02-19 19:36:58 +000010138// Transforms to swizzle an immediate to enable better matching when
10139// memory operand isn't in the right place.
10140def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
10141 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
10142 uint8_t Imm = N->getZExtValue();
10143 // Swap bits 1/4 and 3/6.
10144 uint8_t NewImm = Imm & 0xa5;
10145 if (Imm & 0x02) NewImm |= 0x10;
10146 if (Imm & 0x10) NewImm |= 0x02;
10147 if (Imm & 0x08) NewImm |= 0x40;
10148 if (Imm & 0x40) NewImm |= 0x08;
10149 return getI8Imm(NewImm, SDLoc(N));
10150}]>;
10151def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
10152 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10153 uint8_t Imm = N->getZExtValue();
10154 // Swap bits 2/4 and 3/5.
10155 uint8_t NewImm = Imm & 0xc3;
Craig Toppera5fa2e42017-02-20 07:00:34 +000010156 if (Imm & 0x04) NewImm |= 0x10;
10157 if (Imm & 0x10) NewImm |= 0x04;
Craig Topper4e794c72017-02-19 19:36:58 +000010158 if (Imm & 0x08) NewImm |= 0x20;
10159 if (Imm & 0x20) NewImm |= 0x08;
10160 return getI8Imm(NewImm, SDLoc(N));
10161}]>;
Craig Topper48905772017-02-19 21:32:15 +000010162def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
10163 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10164 uint8_t Imm = N->getZExtValue();
10165 // Swap bits 1/2 and 5/6.
10166 uint8_t NewImm = Imm & 0x99;
10167 if (Imm & 0x02) NewImm |= 0x04;
10168 if (Imm & 0x04) NewImm |= 0x02;
10169 if (Imm & 0x20) NewImm |= 0x40;
10170 if (Imm & 0x40) NewImm |= 0x20;
10171 return getI8Imm(NewImm, SDLoc(N));
10172}]>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010173def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
10174 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
10175 uint8_t Imm = N->getZExtValue();
10176 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
10177 uint8_t NewImm = Imm & 0x81;
10178 if (Imm & 0x02) NewImm |= 0x04;
10179 if (Imm & 0x04) NewImm |= 0x10;
10180 if (Imm & 0x08) NewImm |= 0x40;
10181 if (Imm & 0x10) NewImm |= 0x02;
10182 if (Imm & 0x20) NewImm |= 0x08;
10183 if (Imm & 0x40) NewImm |= 0x20;
10184 return getI8Imm(NewImm, SDLoc(N));
10185}]>;
10186def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
10187 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
10188 uint8_t Imm = N->getZExtValue();
10189 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
10190 uint8_t NewImm = Imm & 0x81;
10191 if (Imm & 0x02) NewImm |= 0x10;
10192 if (Imm & 0x04) NewImm |= 0x02;
10193 if (Imm & 0x08) NewImm |= 0x20;
10194 if (Imm & 0x10) NewImm |= 0x04;
10195 if (Imm & 0x20) NewImm |= 0x40;
10196 if (Imm & 0x40) NewImm |= 0x08;
10197 return getI8Imm(NewImm, SDLoc(N));
10198}]>;
Craig Topper4e794c72017-02-19 19:36:58 +000010199
Igor Bregerb4bb1902015-10-15 12:33:24 +000010200multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010201 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010202 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010203 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10204 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
Igor Breger252c2d92016-02-22 12:37:41 +000010205 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
Igor Bregerb4bb1902015-10-15 12:33:24 +000010206 (OpNode (_.VT _.RC:$src1),
10207 (_.VT _.RC:$src2),
10208 (_.VT _.RC:$src3),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010209 (i8 imm:$src4)), itins.rr, 1, 1>,
10210 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010211 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10212 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10213 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10214 (OpNode (_.VT _.RC:$src1),
10215 (_.VT _.RC:$src2),
10216 (_.VT (bitconvert (_.LdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010217 (i8 imm:$src4)), itins.rm, 1, 0>,
10218 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10219 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010220 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10221 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10222 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10223 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10224 (OpNode (_.VT _.RC:$src1),
10225 (_.VT _.RC:$src2),
10226 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010227 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10228 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10229 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010230 }// Constraints = "$src1 = $dst"
Craig Topper4e794c72017-02-19 19:36:58 +000010231
10232 // Additional patterns for matching passthru operand in other positions.
Craig Topper4e794c72017-02-19 19:36:58 +000010233 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10234 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10235 _.RC:$src1)),
10236 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10237 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10238 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10239 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10240 _.RC:$src1)),
10241 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10242 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010243
10244 // Additional patterns for matching loads in other positions.
10245 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10246 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10247 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10248 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10249 def : Pat<(_.VT (OpNode _.RC:$src1,
10250 (bitconvert (_.LdFrag addr:$src3)),
10251 _.RC:$src2, (i8 imm:$src4))),
10252 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10253 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10254
10255 // Additional patterns for matching zero masking with loads in other
10256 // positions.
Craig Topper48905772017-02-19 21:32:15 +000010257 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10258 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10259 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10260 _.ImmAllZerosV)),
10261 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10262 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10263 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10264 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10265 _.RC:$src2, (i8 imm:$src4)),
10266 _.ImmAllZerosV)),
10267 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10268 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010269
10270 // Additional patterns for matching masked loads with different
10271 // operand orders.
Craig Topper48905772017-02-19 21:32:15 +000010272 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10273 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10274 _.RC:$src2, (i8 imm:$src4)),
10275 _.RC:$src1)),
10276 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10277 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010278 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10279 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10280 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10281 _.RC:$src1)),
10282 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10283 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10284 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10285 (OpNode _.RC:$src2, _.RC:$src1,
10286 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10287 _.RC:$src1)),
10288 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10289 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10290 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10291 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10292 _.RC:$src1, (i8 imm:$src4)),
10293 _.RC:$src1)),
10294 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10295 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10296 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10297 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10298 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10299 _.RC:$src1)),
10300 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10301 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Craig Topper5b4e36a2017-02-20 02:47:42 +000010302
10303 // Additional patterns for matching broadcasts in other positions.
10304 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10305 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10306 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10307 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10308 def : Pat<(_.VT (OpNode _.RC:$src1,
10309 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10310 _.RC:$src2, (i8 imm:$src4))),
10311 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10312 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10313
10314 // Additional patterns for matching zero masking with broadcasts in other
10315 // positions.
10316 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10317 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10318 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10319 _.ImmAllZerosV)),
10320 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10321 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10322 (VPTERNLOG321_imm8 imm:$src4))>;
10323 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10324 (OpNode _.RC:$src1,
10325 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10326 _.RC:$src2, (i8 imm:$src4)),
10327 _.ImmAllZerosV)),
10328 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10329 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10330 (VPTERNLOG132_imm8 imm:$src4))>;
10331
10332 // Additional patterns for matching masked broadcasts with different
10333 // operand orders.
10334 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10335 (OpNode _.RC:$src1,
10336 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10337 _.RC:$src2, (i8 imm:$src4)),
10338 _.RC:$src1)),
10339 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10340 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper2012dda2017-02-20 17:44:09 +000010341 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10342 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10343 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10344 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010345 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010346 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10347 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10348 (OpNode _.RC:$src2, _.RC:$src1,
10349 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10350 (i8 imm:$src4)), _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010351 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010352 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10353 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10354 (OpNode _.RC:$src2,
10355 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10356 _.RC:$src1, (i8 imm:$src4)),
10357 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010358 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010359 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10360 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10361 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10362 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10363 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010364 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010365 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010366}
10367
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010368multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10369 AVX512VLVectorVTInfo _> {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010370 let Predicates = [HasAVX512] in
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010371 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010372 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010373 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10374 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010375 }
10376}
10377
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010378defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10379 avx512vl_i32_info>;
10380defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10381 avx512vl_i64_info>, VEX_W;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010382
Craig Topper8a444ee2018-01-26 22:17:40 +000010383
10384// Patterns to implement vnot using vpternlog instead of creating all ones
10385// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
10386// so that the result is only dependent on src0. But we use the same source
10387// for all operands to prevent a false dependency.
10388// TODO: We should maybe have a more generalized algorithm for folding to
10389// vpternlog.
10390let Predicates = [HasAVX512] in {
10391 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
10392 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
10393}
10394
10395let Predicates = [HasAVX512, NoVLX] in {
10396 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10397 (EXTRACT_SUBREG
10398 (VPTERNLOGQZrri
10399 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10400 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10401 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10402 (i8 15)), sub_xmm)>;
10403 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10404 (EXTRACT_SUBREG
10405 (VPTERNLOGQZrri
10406 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10407 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10408 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10409 (i8 15)), sub_ymm)>;
10410}
10411
10412let Predicates = [HasVLX] in {
10413 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10414 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
10415 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10416 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
10417}
10418
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010419//===----------------------------------------------------------------------===//
10420// AVX-512 - FixupImm
10421//===----------------------------------------------------------------------===//
10422
10423multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010424 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010425 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010426 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10427 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10428 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10429 (OpNode (_.VT _.RC:$src1),
10430 (_.VT _.RC:$src2),
10431 (_.IntVT _.RC:$src3),
10432 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010433 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010434 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10435 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10436 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10437 (OpNode (_.VT _.RC:$src1),
10438 (_.VT _.RC:$src2),
10439 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10440 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010441 (i32 FROUND_CURRENT)), itins.rm>,
10442 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010443 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10444 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10445 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10446 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10447 (OpNode (_.VT _.RC:$src1),
10448 (_.VT _.RC:$src2),
10449 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10450 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010451 (i32 FROUND_CURRENT)), itins.rm>,
10452 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010453 } // Constraints = "$src1 = $dst"
10454}
10455
10456multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010457 SDNode OpNode, OpndItins itins,
10458 X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010459let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010460 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10461 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010462 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010463 "$src2, $src3, {sae}, $src4",
10464 (OpNode (_.VT _.RC:$src1),
10465 (_.VT _.RC:$src2),
10466 (_.IntVT _.RC:$src3),
10467 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010468 (i32 FROUND_NO_EXC)), itins.rr>,
10469 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010470 }
10471}
10472
10473multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010474 OpndItins itins, X86VectorVTInfo _,
10475 X86VectorVTInfo _src3VT> {
Craig Topper05948fb2016-08-02 05:11:15 +000010476 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10477 ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010478 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10479 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10480 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10481 (OpNode (_.VT _.RC:$src1),
10482 (_.VT _.RC:$src2),
10483 (_src3VT.VT _src3VT.RC:$src3),
10484 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010485 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010486 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10487 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10488 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10489 "$src2, $src3, {sae}, $src4",
10490 (OpNode (_.VT _.RC:$src1),
10491 (_.VT _.RC:$src2),
10492 (_src3VT.VT _src3VT.RC:$src3),
10493 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010494 (i32 FROUND_NO_EXC)), itins.rm>,
10495 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010496 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10497 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10498 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10499 (OpNode (_.VT _.RC:$src1),
10500 (_.VT _.RC:$src2),
10501 (_src3VT.VT (scalar_to_vector
10502 (_src3VT.ScalarLdFrag addr:$src3))),
10503 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010504 (i32 FROUND_CURRENT)), itins.rm>,
10505 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010506 }
10507}
10508
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010509multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010510 let Predicates = [HasAVX512] in
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010511 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10512 _Vec.info512>,
10513 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10514 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010515 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010516 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10517 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10518 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10519 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010520 }
10521}
10522
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010523defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010524 SSE_ALU_F32S, f32x_info, v4i32x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010525 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010526defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010527 SSE_ALU_F64S, f64x_info, v2i64x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010528 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010529defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010530 EVEX_CD8<32, CD8VF>;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010531defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010532 EVEX_CD8<64, CD8VF>, VEX_W;
Craig Topper5625d242016-07-29 06:06:00 +000010533
10534
10535
10536// Patterns used to select SSE scalar fp arithmetic instructions from
10537// either:
10538//
10539// (1) a scalar fp operation followed by a blend
10540//
10541// The effect is that the backend no longer emits unnecessary vector
10542// insert instructions immediately after SSE scalar fp instructions
10543// like addss or mulss.
10544//
10545// For example, given the following code:
10546// __m128 foo(__m128 A, __m128 B) {
10547// A[0] += B[0];
10548// return A;
10549// }
10550//
10551// Previously we generated:
10552// addss %xmm0, %xmm1
10553// movss %xmm1, %xmm0
10554//
10555// We now generate:
10556// addss %xmm1, %xmm0
10557//
10558// (2) a vector packed single/double fp operation followed by a vector insert
10559//
10560// The effect is that the backend converts the packed fp instruction
10561// followed by a vector insert into a single SSE scalar fp instruction.
10562//
10563// For example, given the following code:
10564// __m128 foo(__m128 A, __m128 B) {
10565// __m128 C = A + B;
10566// return (__m128) {c[0], a[1], a[2], a[3]};
10567// }
10568//
10569// Previously we generated:
10570// addps %xmm0, %xmm1
10571// movss %xmm1, %xmm0
10572//
10573// We now generate:
10574// addss %xmm1, %xmm0
10575
10576// TODO: Some canonicalization in lowering would simplify the number of
10577// patterns we have to try to match.
10578multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10579 let Predicates = [HasAVX512] in {
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010580 // extracted scalar math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010581 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10582 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10583 FR32X:$src))))),
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010584 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010585 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010586
Craig Topper5625d242016-07-29 06:06:00 +000010587 // vector math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010588 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10589 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010590 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10591
Craig Topper83f21452016-12-27 01:56:24 +000010592 // extracted masked scalar math op with insert via movss
10593 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10594 (scalar_to_vector
10595 (X86selects VK1WM:$mask,
10596 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10597 FR32X:$src2),
10598 FR32X:$src0))),
10599 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10600 VK1WM:$mask, v4f32:$src1,
10601 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010602 }
10603}
10604
10605defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10606defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10607defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10608defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10609
10610multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10611 let Predicates = [HasAVX512] in {
10612 // extracted scalar math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010613 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10614 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10615 FR64X:$src))))),
Craig Topper5625d242016-07-29 06:06:00 +000010616 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010617 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010618
Craig Topper5625d242016-07-29 06:06:00 +000010619 // vector math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010620 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10621 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010622 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10623
Craig Topper83f21452016-12-27 01:56:24 +000010624 // extracted masked scalar math op with insert via movss
10625 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10626 (scalar_to_vector
10627 (X86selects VK1WM:$mask,
10628 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10629 FR64X:$src2),
10630 FR64X:$src0))),
10631 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10632 VK1WM:$mask, v2f64:$src1,
10633 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010634 }
10635}
10636
10637defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10638defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10639defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10640defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010641
10642//===----------------------------------------------------------------------===//
10643// AES instructions
10644//===----------------------------------------------------------------------===//
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010645
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010646multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10647 let Predicates = [HasVLX, HasVAES] in {
10648 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10649 !cast<Intrinsic>(IntPrefix),
10650 loadv2i64, 0, VR128X, i128mem>,
10651 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10652 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10653 !cast<Intrinsic>(IntPrefix##"_256"),
10654 loadv4i64, 0, VR256X, i256mem>,
10655 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10656 }
10657 let Predicates = [HasAVX512, HasVAES] in
10658 defm Z : AESI_binop_rm_int<Op, OpStr,
10659 !cast<Intrinsic>(IntPrefix##"_512"),
10660 loadv8i64, 0, VR512, i512mem>,
10661 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10662}
10663
10664defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10665defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10666defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10667defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10668
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010669//===----------------------------------------------------------------------===//
10670// PCLMUL instructions - Carry less multiplication
10671//===----------------------------------------------------------------------===//
10672
10673let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10674defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10675 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10676
10677let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10678defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10679 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10680
10681defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10682 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10683 EVEX_CD8<64, CD8VF>, VEX_WIG;
10684}
10685
10686// Aliases
10687defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10688defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10689defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10690
Coby Tayree71e37cc2017-11-21 09:48:44 +000010691//===----------------------------------------------------------------------===//
10692// VBMI2
10693//===----------------------------------------------------------------------===//
10694
10695multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010696 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010697 let Constraints = "$src1 = $dst",
10698 ExeDomain = VTI.ExeDomain in {
10699 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10700 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10701 "$src3, $src2", "$src2, $src3",
Simon Pilgrim36be8522017-11-29 18:52:20 +000010702 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10703 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010704 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10705 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10706 "$src3, $src2", "$src2, $src3",
10707 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010708 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10709 itins.rm>, AVX512FMA3Base,
10710 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010711 }
10712}
10713
10714multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010715 OpndItins itins, X86VectorVTInfo VTI>
10716 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010717 let Constraints = "$src1 = $dst",
10718 ExeDomain = VTI.ExeDomain in
10719 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10720 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10721 "${src3}"##VTI.BroadcastStr##", $src2",
10722 "$src2, ${src3}"##VTI.BroadcastStr,
10723 (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010724 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10725 itins.rm>, AVX512FMA3Base, EVEX_B,
10726 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010727}
10728
10729multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010730 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010731 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010732 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010733 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010734 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10735 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010736 }
10737}
10738
10739multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010740 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010741 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010742 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010743 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010744 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10745 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010746 }
10747}
10748multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010749 SDNode OpNode, OpndItins itins> {
10750 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010751 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010752 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010753 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010754 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010755 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10756}
10757
10758multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010759 SDNode OpNode, OpndItins itins> {
10760 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10761 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10762 VEX_W, EVEX_CD8<16, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010763 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010764 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010765 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010766 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010767}
10768
10769// Concat & Shift
Simon Pilgrim36be8522017-11-29 18:52:20 +000010770defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10771defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10772defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10773defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10774
Coby Tayree71e37cc2017-11-21 09:48:44 +000010775// Compress
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010776defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10777 avx512vl_i8_info, HasVBMI2>, EVEX;
10778defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10779 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010780// Expand
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010781defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10782 avx512vl_i8_info, HasVBMI2>, EVEX;
10783defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10784 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010785
Coby Tayree3880f2a2017-11-21 10:04:28 +000010786//===----------------------------------------------------------------------===//
10787// VNNI
10788//===----------------------------------------------------------------------===//
10789
10790let Constraints = "$src1 = $dst" in
10791multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010792 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010793 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10794 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10795 "$src3, $src2", "$src2, $src3",
10796 (VTI.VT (OpNode VTI.RC:$src1,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010797 VTI.RC:$src2, VTI.RC:$src3)),
10798 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010799 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10800 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10801 "$src3, $src2", "$src2, $src3",
10802 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10803 (VTI.VT (bitconvert
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010804 (VTI.LdFrag addr:$src3))))),
10805 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10806 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010807 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10808 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10809 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10810 "$src2, ${src3}"##VTI.BroadcastStr,
10811 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10812 (VTI.VT (X86VBroadcast
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010813 (VTI.ScalarLdFrag addr:$src3)))),
10814 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10815 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010816}
10817
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010818multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010819 let Predicates = [HasVNNI] in
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010820 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010821 let Predicates = [HasVNNI, HasVLX] in {
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010822 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10823 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010824 }
10825}
10826
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010827// FIXME: Is there a better scheduler itinerary for VPDP?
10828defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10829defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10830defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10831defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010832
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010833//===----------------------------------------------------------------------===//
10834// Bit Algorithms
10835//===----------------------------------------------------------------------===//
10836
Simon Pilgrim756348c2017-11-29 13:49:51 +000010837// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10838defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010839 avx512vl_i8_info, HasBITALG>;
Simon Pilgrim756348c2017-11-29 13:49:51 +000010840defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010841 avx512vl_i16_info, HasBITALG>, VEX_W;
10842
10843defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10844defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010845
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010846multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010847 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10848 (ins VTI.RC:$src1, VTI.RC:$src2),
10849 "vpshufbitqmb",
10850 "$src2, $src1", "$src1, $src2",
10851 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010852 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10853 Sched<[itins.Sched]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010854 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10855 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10856 "vpshufbitqmb",
10857 "$src2, $src1", "$src1, $src2",
10858 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010859 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10860 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10861 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010862}
10863
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010864multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010865 let Predicates = [HasBITALG] in
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010866 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010867 let Predicates = [HasBITALG, HasVLX] in {
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010868 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10869 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010870 }
10871}
10872
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010873// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10874defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010875
Coby Tayreed8b17be2017-11-26 09:36:41 +000010876//===----------------------------------------------------------------------===//
10877// GFNI
10878//===----------------------------------------------------------------------===//
10879
10880multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10881 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10882 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10883 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10884 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10885 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10886 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10887 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10888 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10889 }
10890}
10891
Craig Topperb18d6222018-01-06 07:18:08 +000010892defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10893 EVEX_CD8<8, CD8VF>, T8PD;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010894
10895multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010896 OpndItins itins, X86VectorVTInfo VTI,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010897 X86VectorVTInfo BcstVTI>
Simon Pilgrim36be8522017-11-29 18:52:20 +000010898 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010899 let ExeDomain = VTI.ExeDomain in
10900 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10901 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10902 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10903 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10904 (OpNode (VTI.VT VTI.RC:$src1),
10905 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
Simon Pilgrim36be8522017-11-29 18:52:20 +000010906 (i8 imm:$src3)), itins.rm>, EVEX_B,
10907 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010908}
10909
Simon Pilgrim36be8522017-11-29 18:52:20 +000010910multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10911 OpndItins itins> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010912 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010913 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010914 v8i64_info>, EVEX_V512;
10915 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010916 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010917 v4i64x_info>, EVEX_V256;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010918 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010919 v2i64x_info>, EVEX_V128;
10920 }
10921}
10922
Craig Topperb18d6222018-01-06 07:18:08 +000010923defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10924 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10925 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10926defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10927 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10928 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010929