blob: 42da15eafd0b7980c778ed2dce66f7b01b8af7b1 [file] [log] [blame]
Eric Christopher06b32cd2015-02-20 00:36:53 +00001//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
Adam Nemet5ed17da2014-08-21 19:50:07 +000016// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT). These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000020// The template is also used for scalar types, in this case numelts is 1.
Robert Khasanov4204c1a2014-12-12 14:21:30 +000021class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Adam Nemet5ed17da2014-08-21 19:50:07 +000022 string suffix = ""> {
23 RegisterClass RC = rc;
Robert Khasanov4204c1a2014-12-12 14:21:30 +000024 ValueType EltVT = eltvt;
Adam Nemet449b3f02014-10-15 23:42:09 +000025 int NumElts = numelts;
Adam Nemet5ed17da2014-08-21 19:50:07 +000026
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
Igor Bregerfca0a342016-01-28 13:19:25 +000033 // The mask VT.
Guy Blank548e22a2017-05-19 12:35:15 +000034 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
Simon Pilgrimb13961d2016-06-11 14:34:10 +000035
Adam Nemet5ed17da2014-08-21 19:50:07 +000036 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
38
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000039 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
Robert Khasanov2ea081d2014-08-25 14:49:34 +000046
Adam Nemet5ed17da2014-08-21 19:50:07 +000047 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000048 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000049
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000052 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000057
58 // Size of RC in bits, e.g. 512 for VR512.
59 int Size = VT.Size;
60
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000063 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
Ayman Musaf77219e2017-02-13 09:55:48 +000064 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
Robert Khasanov2ea081d2014-08-25 14:49:34 +000067
68 // Load patterns
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
Craig Toppera78b7682016-08-11 06:04:07 +000075 !if (!eq (Size, 512), "v8i64",
76 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000077
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
Craig Toppera78b7682016-08-11 06:04:07 +000079 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
83 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000084
Robert Khasanov2ea081d2014-08-25 14:49:34 +000085 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000086
Craig Topperd9fe6642017-02-21 04:26:10 +000087 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
91 ?));
92
Adam Nemet5ed17da2014-08-21 19:50:07 +000093 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000094 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
98 VTName,
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
101 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000102
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +0000103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
105 VTName,
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
108 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +0000111
Adam Nemet449b3f02014-10-15 23:42:09 +0000112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
Adam Nemet55536c62014-09-25 23:48:45 +0000117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
119
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122 SSEPackedInt));
Adam Nemet09377232014-10-08 23:25:31 +0000123
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +0000124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
Craig Topperabe80cc2016-08-28 06:06:28 +0000126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
Adam Nemet09377232014-10-08 23:25:31 +0000130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
Elena Demikhovskyd207f172015-03-03 15:03:35 +0000134
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000137}
138
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000139def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000141def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
Adam Nemet6bddb8c2014-09-29 22:54:41 +0000143def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000145
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000146// "x" in v32i8x_info means RC = VR256X
147def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000151def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000153
154def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000158def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000160
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +0000161// We map scalar types to the smallest (128-bit) vector type
162// with the appropriate element type. This allows to use the same masking logic.
Asaf Badouh2744d212015-09-20 14:31:19 +0000163def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000165def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
167
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000168class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
173}
174
175def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176 v16i8x_info>;
177def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178 v8i16x_info>;
179def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180 v4i32x_info>;
181def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182 v2i64x_info>;
Robert Khasanovaf318f72014-10-30 14:21:47 +0000183def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184 v4f32x_info>;
185def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186 v2f64x_info>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000187
Ayman Musa721d97f2017-06-27 12:08:37 +0000188class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189 ValueType _vt> {
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
192 ValueType KVT = _vt;
193}
194
Michael Zuckerman9e588312017-10-31 10:00:19 +0000195def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
Ayman Musa721d97f2017-06-27 12:08:37 +0000196def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000203// This multiclass generates the masking variants from the non-masking
204// variant. It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
Adam Nemet34801422014-10-08 23:25:39 +0000207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208 dag Outs,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210 string OpcodeStr,
211 string AttSrcAsm, string IntelSrcAsm,
212 list<dag> Pattern,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000215 InstrItinClass itin,
Adam Nemet34801422014-10-08 23:25:39 +0000216 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
Craig Topper9d2cab72016-01-11 01:03:40 +0000222 "$dst, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000223 Pattern, itin>;
224
225 // Prefer over VMOV*rrk Pat<>
Craig Topper63801df2017-02-19 21:44:35 +0000226 let isCommutable = IsKCommutable in
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000230 MaskingPattern, itin>,
231 EVEX_K {
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000234 }
235
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
Craig Topper63801df2017-02-19 21:44:35 +0000238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000242 ZeroMaskingPattern,
243 itin>,
244 EVEX_KZ;
245}
246
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000247
Adam Nemet34801422014-10-08 23:25:39 +0000248// Common base class of AVX512_maskable and AVX512_maskable_3src.
249multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250 dag Outs,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252 string OpcodeStr,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000255 InstrItinClass itin,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000256 SDNode Select = vselect,
Adam Nemet34801422014-10-08 23:25:39 +0000257 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000266 itin, MaskingConstraint, IsCommutable,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000267 IsKCommutable>;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000268
Adam Nemet2e91ee52014-08-14 17:13:19 +0000269// This multiclass generates the unconditional/non-masking, the masking and
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000270// the zero-masking variant of the vector instruction. In the masking case, the
Adam Nemet2e91ee52014-08-14 17:13:19 +0000271// perserved vector elements come from a new dummy input operand tied to $dst.
Craig Topper3a622a12017-08-17 15:40:25 +0000272// This version uses a separate dag for non-masking and masking.
273multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000277 InstrItinClass itin,
Craig Topper3a622a12017-08-17 15:40:25 +0000278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
285 [(set _.RC:$dst,
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287 [(set _.RC:$dst,
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
Craig Topper3a622a12017-08-17 15:40:25 +0000290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the vector instruction. In the masking case, the
293// perserved vector elements come from a new dummy input operand tied to $dst.
Adam Nemet34801422014-10-08 23:25:39 +0000294multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000297 dag RHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000298 InstrItinClass itin,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
Adam Nemet34801422014-10-08 23:25:39 +0000301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000307
308// This multiclass generates the unconditional/non-masking, the masking and
309// the zero-masking variant of the scalar instruction.
310multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000313 dag RHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000314 InstrItinClass itin,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000315 bit IsCommutable = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000318
Adam Nemet34801422014-10-08 23:25:39 +0000319// Similar to AVX512_maskable but in this case one of the source operands
Adam Nemet2e91ee52014-08-14 17:13:19 +0000320// ($src1) is already tied to $dst so we just use that for the preserved
321// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
322// $src1.
Adam Nemet34801422014-10-08 23:25:39 +0000323multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000326 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000327 bit IsCommutable = 0,
Craig Topper1aa49ca2017-09-01 07:58:14 +0000328 bit IsKCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000329 SDNode Select = vselect,
330 bit MaskOnly = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
Craig Topperb16598d2017-09-01 07:58:16 +0000335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000339
Igor Breger15820b02015-07-01 13:24:28 +0000340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000343 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000344 bit IsCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000345 bit IsKCommutable = 0,
346 bit MaskOnly = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
Craig Topperb16598d2017-09-01 07:58:16 +0000349 X86selects, MaskOnly>;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000350
Adam Nemet34801422014-10-08 23:25:39 +0000351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352 dag Outs, dag Ins,
353 string OpcodeStr,
354 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim569e53b2017-12-03 21:43:54 +0000355 list<dag> Pattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000356 InstrItinClass itin> :
Adam Nemet34801422014-10-08 23:25:39 +0000357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000361 itin, "$src0 = $dst">;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000362
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000363
364// Instruction with mask that puts result in mask register,
365// like "compare" and "vptest"
366multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367 dag Outs,
368 dag Ins, dag MaskingIns,
369 string OpcodeStr,
370 string AttSrcAsm, string IntelSrcAsm,
371 list<dag> Pattern,
Craig Topper225da2c2016-08-27 05:22:15 +0000372 list<dag> MaskingPattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000373 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000376 def NAME: AVX512<O, F, Outs, Ins,
Craig Topper156622a2016-01-11 00:44:56 +0000377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000379 Pattern, itin>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000380
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Craig Topper156622a2016-01-11 00:44:56 +0000382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000384 MaskingPattern, itin>, EVEX_K;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000385}
386
387multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388 dag Outs,
389 dag Ins, dag MaskingIns,
390 string OpcodeStr,
391 string AttSrcAsm, string IntelSrcAsm,
Craig Topper225da2c2016-08-27 05:22:15 +0000392 dag RHS, dag MaskingRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000393 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000394 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000399
400multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000403 dag RHS, InstrItinClass itin,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000404 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000409
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000410multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000412 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000413 InstrItinClass itin> :
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000417
Craig Topperabe80cc2016-08-28 06:06:28 +0000418// This multiclass generates the unconditional/non-masking, the masking and
419// the zero-masking variant of the vector instruction. In the masking case, the
420// perserved vector elements come from a new dummy input operand tied to $dst.
421multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000425 InstrItinClass itin,
Craig Topperabe80cc2016-08-28 06:06:28 +0000426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
432 [(set _.RC:$dst,
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434 [(set _.RC:$dst,
435 (Select _.KRCWM:$mask, MaskedRHS,
436 _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000437 itin, "$src0 = $dst", IsCommutable>;
Craig Topperabe80cc2016-08-28 06:06:28 +0000438
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000439
Craig Topper9d9251b2016-05-08 20:10:20 +0000440// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
Marina Yatsina6fc2aaa2018-01-22 10:05:23 +0000442// swizzled by ExecutionDomainFix to pxor.
Craig Topper9d9251b2016-05-08 20:10:20 +0000443// We set canFoldAsLoad because this can be converted to a constant-pool
444// load of an all-zeros value if folding it would be beneficial.
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000445let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper86748492016-07-11 05:36:41 +0000446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000447def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
Craig Topper9d9251b2016-05-08 20:10:20 +0000448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
Craig Topper516e14c2016-07-11 05:36:48 +0000449def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000451}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000452
Craig Topper6393afc2017-01-09 02:44:34 +0000453// Alias instructions that allow VPTERNLOG to be used with a mask to create
454// a mix of all ones and all zeros elements. This is done this way to force
455// the same register to be used as input for all three sources.
Simon Pilgrim26f106f2017-12-08 15:17:32 +0000456let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
Craig Topper6393afc2017-01-09 02:44:34 +0000457def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
467}
468
Craig Toppere5ce84a2016-05-08 21:33:53 +0000469let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Craig Toppere5ce84a2016-05-08 21:33:53 +0000471def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475}
476
Craig Topperadd9cc62016-12-18 06:23:14 +0000477// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478// This is expanded by ExpandPostRAPseudos.
479let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
Craig Topperadd9cc62016-12-18 06:23:14 +0000481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
485}
486
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000487//===----------------------------------------------------------------------===//
488// AVX-512 - VECTOR INSERT
489//
Craig Topper3a622a12017-08-17 15:40:25 +0000490
491// Supports two different pattern operators for mask and unmasked ops. Allows
492// null_frag to be passed for one.
493multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494 X86VectorVTInfo To,
495 SDPatternOperator vinsert_insert,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000496 SDPatternOperator vinsert_for_mask,
497 OpndItins itins> {
Craig Topperc228d792017-09-05 05:49:44 +0000498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
Craig Topper3a622a12017-08-17 15:40:25 +0000505 (iPTR imm)),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Topperc228d792017-09-05 05:49:44 +0000510 let mayLoad = 1 in
Craig Topper3a622a12017-08-17 15:40:25 +0000511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Craig Topper3a622a12017-08-17 15:40:25 +0000517 (iPTR imm)),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemet4e2ef472014-10-02 23:18:28 +0000523 }
Adam Nemet4285c1f2014-10-15 23:42:17 +0000524}
Adam Nemet4e2ef472014-10-02 23:18:28 +0000525
Craig Topper3a622a12017-08-17 15:40:25 +0000526// Passes the same pattern operator for masked and unmasked ops.
527multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000529 SDPatternOperator vinsert_insert,
530 OpndItins itins> :
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000532
Igor Breger0ede3cb2015-09-20 06:52:42 +0000533multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
Adam Nemet4285c1f2014-10-15 23:42:17 +0000537 def : Pat<(vinsert_insert:$ins
Igor Breger0ede3cb2015-09-20 06:52:42 +0000538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543 def : Pat<(vinsert_insert:$ins
544 (To.VT To.RC:$src1),
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
546 (iPTR imm)),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
550 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551}
552
Adam Nemetb1c3ef42014-10-15 23:42:04 +0000553multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000554 ValueType EltVT64, int Opcode256,
555 OpndItins itins> {
Igor Breger0ede3cb2015-09-20 06:52:42 +0000556
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000561 vinsert128_insert, itins>, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000562
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000566 vinsert128_insert, itins>, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000567
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000572
Craig Topper3a622a12017-08-17 15:40:25 +0000573 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000574 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000578 null_frag, vinsert128_insert, itins>,
579 VEX_W, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000580
Craig Topper3a622a12017-08-17 15:40:25 +0000581 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000582 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000586 null_frag, vinsert128_insert, itins>,
587 VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000588
Craig Topper3a622a12017-08-17 15:40:25 +0000589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000592 null_frag, vinsert256_insert, itins>,
593 EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000594 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595}
596
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000597// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598let Sched = WriteFShuffle256 in
599def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
601>;
602let Sched = WriteShuffle256 in
603def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605>;
606
607defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000609
Igor Breger0ede3cb2015-09-20 06:52:42 +0000610// Codegen pattern with the alternative types,
Craig Topper3a622a12017-08-17 15:40:25 +0000611// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000612defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000614defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000616
617defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000619defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000621
622defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000624defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000626
627// Codegen pattern with the alternative types insert VEC128 into VEC256
628defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632// Codegen pattern with the alternative types insert VEC128 into VEC512
633defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637// Codegen pattern with the alternative types insert VEC256 into VEC512
638defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
Craig Topperf7a19db2017-10-08 01:33:40 +0000643
644multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
648 list<Predicate> p> {
649let Predicates = p in {
650 def : Pat<(Cast.VT
651 (vselect Cast.KRCWM:$mask,
652 (bitconvert
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
655 (iPTR imm))),
656 Cast.RC:$src0)),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
660 def : Pat<(Cast.VT
661 (vselect Cast.KRCWM:$mask,
662 (bitconvert
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT
665 (bitconvert
666 (From.LdFrag addr:$src2))),
667 (iPTR imm))),
668 Cast.RC:$src0)),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673 def : Pat<(Cast.VT
674 (vselect Cast.KRCWM:$mask,
675 (bitconvert
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
678 (iPTR imm))),
679 Cast.ImmAllZerosV)),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
683 def : Pat<(Cast.VT
684 (vselect Cast.KRCWM:$mask,
685 (bitconvert
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
687 (From.VT
688 (bitconvert
689 (From.LdFrag addr:$src2))),
690 (iPTR imm))),
691 Cast.ImmAllZerosV)),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
695}
696}
697
698defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
730
731defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
749
750defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
775
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000776// vinsertps - insert f32 to XMM
Craig Topper43973152016-10-09 06:41:47 +0000777let ExeDomain = SSEPackedSingle in {
Craig Topper6189d3e2016-07-19 01:26:19 +0000778def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd255a622017-12-06 18:46:06 +0000781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
Craig Topper6189d3e2016-07-19 01:26:19 +0000783def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
Simon Pilgrimd255a622017-12-06 18:46:06 +0000788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
Craig Topper43973152016-10-09 06:41:47 +0000790}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000791
792//===----------------------------------------------------------------------===//
793// AVX-512 VECTOR EXTRACT
794//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000795
Craig Topper3a622a12017-08-17 15:40:25 +0000796// Supports two different pattern operators for mask and unmasked ops. Allows
797// null_frag to be passed for one.
798multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000801 SDPatternOperator vextract_for_mask,
802 OpndItins itins> {
Igor Breger7f69a992015-09-10 12:54:54 +0000803
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000806 (ins From.RC:$src1, u8imm:$idx),
Igor Breger7f69a992015-09-10 12:54:54 +0000807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
Craig Topper3a622a12017-08-17 15:40:25 +0000809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
Craig Toppere1cac152016-06-07 07:27:54 +0000813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
Ayman Musaf77219e2017-02-13 09:55:48 +0000814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000821
Craig Toppere1cac152016-06-07 07:27:54 +0000822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
Ayman Musaf77219e2017-02-13 09:55:48 +0000825 From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000831 }
Igor Bregerac29a822015-09-09 14:35:09 +0000832}
833
Craig Topper3a622a12017-08-17 15:40:25 +0000834// Passes the same pattern operator for masked and unmasked ops.
835multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000837 SDPatternOperator vextract_extract,
838 OpndItins itins> :
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000840
Igor Bregerdefab3c2015-10-08 12:55:01 +0000841// Codegen pattern for the alternative types
842multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
Craig Topper5f3fef82016-05-22 07:40:58 +0000844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
Craig Topperdb960ed2016-05-21 22:50:14 +0000845 let Predicates = p in {
Igor Bregerdefab3c2015-10-08 12:55:01 +0000846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
848 From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
Craig Topperdb960ed2016-05-21 22:50:14 +0000850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854 }
Igor Breger7f69a992015-09-10 12:54:54 +0000855}
856
857multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000858 ValueType EltVT64, int Opcode256,
859 OpndItins itins> {
Craig Topperaadec702017-08-14 01:53:10 +0000860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000864 vextract128_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000869 vextract256_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871 }
Igor Breger7f69a992015-09-10 12:54:54 +0000872 let Predicates = [HasVLX] in
Igor Bregerdefab3c2015-10-08 12:55:01 +0000873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000876 vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Craig Topper3a622a12017-08-17 15:40:25 +0000878
879 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000880 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000884 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000886
887 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000888 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000892 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
Igor Breger7f69a992015-09-10 12:54:54 +0000895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000897 null_frag, vextract256_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
899 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000900}
901
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000902// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903let Sched = WriteFShuffle256 in
904def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
906>;
907let Sched = WriteShuffle256 in
908def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910>;
911
912defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000914
Igor Bregerdefab3c2015-10-08 12:55:01 +0000915// extract_subvector codegen patterns with the alternative types.
Craig Topper3a622a12017-08-17 15:40:25 +0000916// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Bregerdefab3c2015-10-08 12:55:01 +0000917defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000919defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000921
922defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000924defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000926
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000931
Craig Topper08a68572016-05-21 22:50:04 +0000932// Codegen pattern with the alternative types extract VEC128 from VEC256
Craig Topper02626c02016-05-21 07:08:56 +0000933defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938// Codegen pattern with the alternative types extract VEC128 from VEC512
Igor Bregerdefab3c2015-10-08 12:55:01 +0000939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943// Codegen pattern with the alternative types extract VEC256 from VEC512
944defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
Craig Topper5f3fef82016-05-22 07:40:58 +0000949
Craig Topper48a79172017-08-30 07:26:12 +0000950// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951// smaller extract to enable EVEX->VEX.
952let Predicates = [NoVLX] in {
953def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956 (iPTR 1)))>;
957def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960 (iPTR 1)))>;
961def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964 (iPTR 1)))>;
965def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968 (iPTR 1)))>;
969def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972 (iPTR 1)))>;
973def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 (iPTR 1)))>;
977}
978
979// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980// smaller extract to enable EVEX->VEX.
981let Predicates = [HasVLX] in {
982def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985 (iPTR 1)))>;
986def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989 (iPTR 1)))>;
990def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993 (iPTR 1)))>;
994def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997 (iPTR 1)))>;
998def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001 (iPTR 1)))>;
1002def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005 (iPTR 1)))>;
1006}
1007
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001008
Craig Toppera0883622017-08-26 22:24:57 +00001009// Additional patterns for handling a bitcast between the vselect and the
1010// extract_subvector.
1011multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018 (bitconvert
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1021 To.RC:$src0)),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027 (bitconvert
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034}
1035}
1036
1037defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115// vextractps - extract 32 bits from XMM
Craig Topper03b849e2016-05-21 22:50:11 +00001116def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Craig Topperfc946a02015-01-25 02:21:13 +00001117 (ins VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimd255a622017-12-06 18:46:06 +00001119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001121
Craig Topper03b849e2016-05-21 22:50:11 +00001122def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Craig Topperfc946a02015-01-25 02:21:13 +00001123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00001126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001128
1129//===---------------------------------------------------------------------===//
1130// AVX-512 BROADCAST
1131//---
Igor Breger131008f2016-05-01 08:40:00 +00001132// broadcast with a scalar argument.
1133multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
Craig Topperf6df4a62017-01-30 06:59:06 +00001135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
Igor Breger131008f2016-05-01 08:40:00 +00001149}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001150
Craig Topper17854ec2017-08-30 07:48:39 +00001151// Split version to allow mask and broadcast node to be different types. This
1152// helps support the 32x2 broadcasts.
1153multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001154 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
Igor Breger21296d22015-10-20 11:56:42 +00001162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001163 (MaskInfo.VT
1164 (bitconvert
1165 (DestInfo.VT
Craig Topperbf0de9d2017-10-13 06:07:10 +00001166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167 (MaskInfo.VT
1168 (bitconvert
1169 (DestInfo.VT
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
Craig Topperbf0de9d2017-10-13 06:07:10 +00001172 let mayLoad = 1 in
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
Igor Breger52bd1d52016-05-31 07:43:39 +00001175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001176 (MaskInfo.VT
1177 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1180 (MaskInfo.VT
1181 (bitconvert
Craig Topper17854ec2017-08-30 07:48:39 +00001182 (DestInfo.VT (X86VBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185 Sched<[SchedRM]>;
Craig Topper80934372016-07-16 03:42:59 +00001186 }
Craig Toppere1cac152016-06-07 07:27:54 +00001187
Craig Topper17854ec2017-08-30 07:48:39 +00001188 def : Pat<(MaskInfo.VT
1189 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001190 (DestInfo.VT (UnmaskedOp
Craig Topper17854ec2017-08-30 07:48:39 +00001191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195 (bitconvert
1196 (DestInfo.VT
1197 (X86VBroadcast
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
Craig Topper80934372016-07-16 03:42:59 +00001201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
Craig Topper17854ec2017-08-30 07:48:39 +00001202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204 (bitconvert
1205 (DestInfo.VT
1206 (X86VBroadcast
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001213
Craig Topper17854ec2017-08-30 07:48:39 +00001214// Helper class to force mask and broadcast result to same type.
1215multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001216 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
Craig Topper17854ec2017-08-30 07:48:39 +00001221
Craig Topper80934372016-07-16 03:42:59 +00001222multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
Igor Breger21296d22015-10-20 11:56:42 +00001223 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001224 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001228 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001229 }
Robert Khasanovaf318f72014-10-30 14:21:47 +00001230
1231 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001232 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1233 WriteFShuffle256Ld, _.info256, _.info128>,
Igor Breger131008f2016-05-01 08:40:00 +00001234 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001235 EVEX_V256;
Robert Khasanovaf318f72014-10-30 14:21:47 +00001236 }
1237}
1238
Craig Topper80934372016-07-16 03:42:59 +00001239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001241 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001244 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1245 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001246 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001247
Craig Topper80934372016-07-16 03:42:59 +00001248 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001249 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1250 WriteFShuffle256Ld, _.info256, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001251 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1252 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001253 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1254 WriteFShuffle256Ld, _.info128, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001255 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1256 EVEX_V128;
1257 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001258}
Craig Topper80934372016-07-16 03:42:59 +00001259defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1260 avx512vl_f32_info>;
1261defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1262 avx512vl_f64_info>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001263
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001264def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001265 (VBROADCASTSSZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001266def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001267 (VBROADCASTSDZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001268
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001269multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001271 RegisterClass SrcRC> {
Craig Topperfe259882017-02-26 06:45:51 +00001272 let ExeDomain = _.ExeDomain in
Igor Breger0aeda372016-02-07 08:30:50 +00001273 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001274 (ins SrcRC:$src),
1275 "vpbroadcast"##_.Suffix, "$src", "$src",
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001276 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1277 Sched<[SchedRR]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001278}
1279
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001280multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
Guy Blank7f60c992017-08-09 17:21:01 +00001281 X86VectorVTInfo _, SDPatternOperator OpNode,
1282 RegisterClass SrcRC, SubRegIndex Subreg> {
Craig Topper508aa972017-08-14 05:09:34 +00001283 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
Guy Blank7f60c992017-08-09 17:21:01 +00001284 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1285 (outs _.RC:$dst), (ins GR32:$src),
1286 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1287 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1288 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +00001289 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
Guy Blank7f60c992017-08-09 17:21:01 +00001290
1291 def : Pat <(_.VT (OpNode SrcRC:$src)),
1292 (!cast<Instruction>(Name#r)
1293 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1294
1295 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1296 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1297 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1298
1299 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1300 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1301 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302}
1303
1304multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1305 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1306 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1307 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001308 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1309 OpNode, SrcRC, Subreg>, EVEX_V512;
Guy Blank7f60c992017-08-09 17:21:01 +00001310 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001311 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1312 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1313 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1314 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
Guy Blank7f60c992017-08-09 17:21:01 +00001315 }
1316}
1317
Robert Khasanovcbc57032014-12-09 16:38:41 +00001318multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
Craig Topper49ba3f52017-02-26 06:45:48 +00001319 SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001320 RegisterClass SrcRC, Predicate prd> {
1321 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001322 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1323 SrcRC>, EVEX_V512;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001324 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001325 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1326 SrcRC>, EVEX_V256;
1327 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1328 SrcRC>, EVEX_V128;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001329 }
1330}
1331
Guy Blank7f60c992017-08-09 17:21:01 +00001332defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1333 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1334defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1335 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1336 HasBWI>;
Craig Topper49ba3f52017-02-26 06:45:48 +00001337defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1338 X86VBroadcast, GR32, HasAVX512>;
1339defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1340 X86VBroadcast, GR64, HasAVX512>, VEX_W;
Michael Liao5bf95782014-12-04 05:20:33 +00001341
Igor Breger21296d22015-10-20 11:56:42 +00001342// Provide aliases for broadcast from the same register class that
1343// automatically does the extract.
1344multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1345 X86VectorVTInfo SrcInfo> {
1346 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1347 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1348 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1349}
1350
1351multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1352 AVX512VLVectorVTInfo _, Predicate prd> {
1353 let Predicates = [prd] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001354 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1355 WriteShuffle256Ld, _.info512, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001356 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1357 EVEX_V512;
1358 // Defined separately to avoid redefinition.
1359 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1360 }
1361 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001362 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1363 WriteShuffle256Ld, _.info256, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001364 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1365 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001366 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1367 WriteShuffleLd, _.info128, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001368 EVEX_V128;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00001369 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001370}
1371
Igor Breger21296d22015-10-20 11:56:42 +00001372defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1373 avx512vl_i8_info, HasBWI>;
1374defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1375 avx512vl_i16_info, HasBWI>;
1376defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1377 avx512vl_i32_info, HasAVX512>;
1378defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1379 avx512vl_i64_info, HasAVX512>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001380
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001381multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1382 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001383 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00001384 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1385 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001386 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1387 NoItinerary>, AVX5128IBase, EVEX,
1388 Sched<[WriteShuffleLd]>;
Adam Nemet73f72e12014-06-27 00:43:38 +00001389}
1390
Craig Topperd6f4be92017-08-21 05:29:02 +00001391// This should be used for the AVX512DQ broadcast instructions. It disables
1392// the unmasked patterns so that we only use the DQ instructions when masking
1393// is requested.
1394multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1395 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Craig Topperc228d792017-09-05 05:49:44 +00001396 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperd6f4be92017-08-21 05:29:02 +00001397 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1398 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1399 (null_frag),
1400 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001401 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1402 NoItinerary>, AVX5128IBase, EVEX,
1403 Sched<[WriteShuffleLd]>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001404}
1405
Simon Pilgrim79195582017-02-21 16:41:44 +00001406let Predicates = [HasAVX512] in {
1407 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1408 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1409 (VPBROADCASTQZm addr:$src)>;
1410}
1411
Craig Topperad3d0312017-10-10 21:07:14 +00001412let Predicates = [HasVLX] in {
Simon Pilgrim79195582017-02-21 16:41:44 +00001413 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1414 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1415 (VPBROADCASTQZ128m addr:$src)>;
1416 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1417 (VPBROADCASTQZ256m addr:$src)>;
Craig Topperad3d0312017-10-10 21:07:14 +00001418}
1419let Predicates = [HasVLX, HasBWI] in {
Craig Topperbe351ee2016-10-01 06:01:23 +00001420 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1421 // This means we'll encounter truncated i32 loads; match that here.
1422 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1423 (VPBROADCASTWZ128m addr:$src)>;
1424 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1425 (VPBROADCASTWZ256m addr:$src)>;
1426 def : Pat<(v8i16 (X86VBroadcast
1427 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1428 (VPBROADCASTWZ128m addr:$src)>;
1429 def : Pat<(v16i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ256m addr:$src)>;
1432}
1433
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001434//===----------------------------------------------------------------------===//
1435// AVX-512 BROADCAST SUBVECTORS
1436//
1437
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001438defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1439 v16i32_info, v4i32x_info>,
Adam Nemet73f72e12014-06-27 00:43:38 +00001440 EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001441defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1442 v16f32_info, v4f32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1445 v8i64_info, v4i64x_info>, VEX_W,
Adam Nemet73f72e12014-06-27 00:43:38 +00001446 EVEX_V512, EVEX_CD8<64, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001447defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1448 v8f64_info, v4f64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450
Craig Topper715ad7f2016-10-16 23:29:51 +00001451let Predicates = [HasAVX512] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001452def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1453 (VBROADCASTF64X4rm addr:$src)>;
1454def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1455 (VBROADCASTI64X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001456def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1457 (VBROADCASTI64X4rm addr:$src)>;
1458def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1459 (VBROADCASTI64X4rm addr:$src)>;
1460
1461// Provide fallback in case the load node that is used in the patterns above
1462// is used by additional users, which prevents the pattern selection.
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001463def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1464 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001465 (v4f64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001466def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v8f32 VR256X:$src), 1)>;
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001469def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001471 (v4i64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001472def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v8i32 VR256X:$src), 1)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001475def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v16i16 VR256X:$src), 1)>;
1478def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v32i8 VR256X:$src), 1)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001481
Craig Topperd6f4be92017-08-21 05:29:02 +00001482def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1483 (VBROADCASTF32X4rm addr:$src)>;
1484def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1485 (VBROADCASTI32X4rm addr:$src)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001486def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1487 (VBROADCASTI32X4rm addr:$src)>;
1488def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1489 (VBROADCASTI32X4rm addr:$src)>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001490
1491// Patterns for selects of bitcasted operations.
1492def : Pat<(vselect VK16WM:$mask,
1493 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1494 (bc_v16f32 (v16i32 immAllZerosV))),
1495 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1496def : Pat<(vselect VK16WM:$mask,
1497 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1498 VR512:$src0),
1499 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1500def : Pat<(vselect VK16WM:$mask,
1501 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1502 (v16i32 immAllZerosV)),
1503 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1504def : Pat<(vselect VK16WM:$mask,
1505 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1506 VR512:$src0),
1507 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1508
1509def : Pat<(vselect VK8WM:$mask,
1510 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1511 (bc_v8f64 (v16i32 immAllZerosV))),
1512 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1513def : Pat<(vselect VK8WM:$mask,
1514 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1515 VR512:$src0),
1516 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1517def : Pat<(vselect VK8WM:$mask,
1518 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1519 (bc_v8i64 (v16i32 immAllZerosV))),
1520 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1521def : Pat<(vselect VK8WM:$mask,
1522 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1523 VR512:$src0),
1524 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001525}
1526
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001527let Predicates = [HasVLX] in {
1528defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1529 v8i32x_info, v4i32x_info>,
1530 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1531defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1532 v8f32x_info, v4f32x_info>,
1533 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001534
Craig Topperd6f4be92017-08-21 05:29:02 +00001535def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1536 (VBROADCASTF32X4Z256rm addr:$src)>;
1537def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1538 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001539def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1540 (VBROADCASTI32X4Z256rm addr:$src)>;
1541def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1542 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001543
Craig Topper5a2bd992018-02-05 08:37:37 +00001544// Patterns for selects of bitcasted operations.
1545def : Pat<(vselect VK8WM:$mask,
1546 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1547 (bc_v8f32 (v8i32 immAllZerosV))),
1548 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1549def : Pat<(vselect VK8WM:$mask,
1550 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1551 VR256X:$src0),
1552 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1553def : Pat<(vselect VK8WM:$mask,
1554 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1555 (v8i32 immAllZerosV)),
1556 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1557def : Pat<(vselect VK8WM:$mask,
1558 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1559 VR256X:$src0),
1560 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1561
1562
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001563// Provide fallback in case the load node that is used in the patterns above
1564// is used by additional users, which prevents the pattern selection.
Craig Topperd6f4be92017-08-21 05:29:02 +00001565def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1566 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567 (v2f64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001568def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001569 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001570 (v4f32 VR128X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001571def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573 (v2i64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001574def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001575 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001576 (v4i32 VR128X:$src), 1)>;
1577def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001578 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001579 (v8i16 VR128X:$src), 1)>;
1580def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001581 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001582 (v16i8 VR128X:$src), 1)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001583}
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001584
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001585let Predicates = [HasVLX, HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001586defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001587 v4i64x_info, v2i64x_info>, VEX_W,
1588 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001589defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001590 v4f64x_info, v2f64x_info>, VEX_W,
1591 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001592
1593// Patterns for selects of bitcasted operations.
1594def : Pat<(vselect VK4WM:$mask,
1595 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1596 (bc_v4f64 (v8i32 immAllZerosV))),
1597 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1598def : Pat<(vselect VK4WM:$mask,
1599 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1600 VR256X:$src0),
1601 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1602def : Pat<(vselect VK4WM:$mask,
1603 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1604 (bc_v4i64 (v8i32 immAllZerosV))),
1605 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1606def : Pat<(vselect VK4WM:$mask,
1607 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1608 VR256X:$src0),
1609 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001610}
1611
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001612let Predicates = [HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001613defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001614 v8i64_info, v2i64x_info>, VEX_W,
1615 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001616defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001617 v16i32_info, v8i32x_info>,
1618 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001619defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001620 v8f64_info, v2f64x_info>, VEX_W,
1621 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001622defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001623 v16f32_info, v8f32x_info>,
1624 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001625
1626// Patterns for selects of bitcasted operations.
1627def : Pat<(vselect VK16WM:$mask,
1628 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1629 (bc_v16f32 (v16i32 immAllZerosV))),
1630 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1631def : Pat<(vselect VK16WM:$mask,
1632 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1633 VR512:$src0),
1634 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1635def : Pat<(vselect VK16WM:$mask,
1636 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1637 (v16i32 immAllZerosV)),
1638 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1639def : Pat<(vselect VK16WM:$mask,
1640 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1641 VR512:$src0),
1642 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1643
1644def : Pat<(vselect VK8WM:$mask,
1645 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1646 (bc_v8f64 (v16i32 immAllZerosV))),
1647 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1648def : Pat<(vselect VK8WM:$mask,
1649 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1650 VR512:$src0),
1651 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1652def : Pat<(vselect VK8WM:$mask,
1653 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1654 (bc_v8i64 (v16i32 immAllZerosV))),
1655 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1656def : Pat<(vselect VK8WM:$mask,
1657 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1658 VR512:$src0),
1659 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001660}
Adam Nemet73f72e12014-06-27 00:43:38 +00001661
Igor Bregerfa798a92015-11-02 07:39:36 +00001662multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001663 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001664 let Predicates = [HasDQI] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001665 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1666 WriteShuffle256Ld, _Dst.info512,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001667 _Src.info512, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001668 EVEX_V512;
Igor Bregerfa798a92015-11-02 07:39:36 +00001669 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001670 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1671 WriteShuffle256Ld, _Dst.info256,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001672 _Src.info256, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001673 EVEX_V256;
Igor Bregerfa798a92015-11-02 07:39:36 +00001674}
1675
1676multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001677 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1678 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001679
1680 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001681 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1682 WriteShuffleLd, _Dst.info128,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001683 _Src.info128, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001684 EVEX_V128;
Igor Bregerfa798a92015-11-02 07:39:36 +00001685}
1686
Craig Topper51e052f2016-10-15 16:26:02 +00001687defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1688 avx512vl_i32_info, avx512vl_i64_info>;
1689defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1690 avx512vl_f32_info, avx512vl_f64_info>;
Igor Bregerfa798a92015-11-02 07:39:36 +00001691
Craig Topper52317e82017-01-15 05:47:45 +00001692let Predicates = [HasVLX] in {
1693def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1694 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1695def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1696 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1697}
1698
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001699def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001700 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001701def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1702 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1703
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001704def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001705 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001706def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1707 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001708
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001709//===----------------------------------------------------------------------===//
1710// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1711//---
Asaf Badouh0d957b82015-11-18 09:42:45 +00001712multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1713 X86VectorVTInfo _, RegisterClass KRC> {
1714 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00001715 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001716 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1717 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001718}
1719
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
Asaf Badouh0d957b82015-11-18 09:42:45 +00001721 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1722 let Predicates = [HasCDI] in
1723 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1724 let Predicates = [HasCDI, HasVLX] in {
1725 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1726 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1727 }
1728}
1729
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001730defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001731 avx512vl_i32_info, VK16>;
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001732defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001733 avx512vl_i64_info, VK8>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001734
1735//===----------------------------------------------------------------------===//
Craig Topperaad5f112015-11-30 00:13:24 +00001736// -- VPERMI2 - 3 source operands form --
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001737
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001738let Sched = WriteFShuffle256 in
1739def AVX512_PERM2_F : OpndItins<
1740 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1741>;
1742
1743let Sched = WriteShuffle256 in
1744def AVX512_PERM2_I : OpndItins<
1745 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1746>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001747
1748multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1749 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Craig Topper4fa3b502016-09-06 06:56:59 +00001751 // The index operand in the pattern should really be an integer type. However,
1752 // if we do that and it happens to come from a bitcast, then it becomes
1753 // difficult to find the bitcast needed to convert the index to the
1754 // destination type for the passthru since it will be folded with the bitcast
1755 // of the index operand.
1756 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001757 (ins _.RC:$src2, _.RC:$src3),
1758 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001759 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001760 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001761
Craig Topper4fa3b502016-09-06 06:56:59 +00001762 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001763 (ins _.RC:$src2, _.MemOp:$src3),
1764 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Topper4fa3b502016-09-06 06:56:59 +00001765 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001766 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1767 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001768 }
1769}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001770
1771multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001772 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001773 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Craig Topper4fa3b502016-09-06 06:56:59 +00001774 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001775 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1776 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1777 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper4fa3b502016-09-06 06:56:59 +00001778 (_.VT (X86VPermi2X _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001779 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001780 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1781 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemetefe9c982014-07-02 21:25:58 +00001782}
1783
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001784multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001785 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001786 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1787 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001788 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001789 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1790 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1791 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1792 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001793 }
1794}
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001795
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001796multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001797 OpndItins itins,
1798 AVX512VLVectorVTInfo VTInfo,
1799 Predicate Prd> {
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001800 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001801 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001802 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001803 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1804 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001805 }
1806}
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001807
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001808defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001809 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001810defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001811 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001812defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001813 avx512vl_i16_info, HasBWI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001814 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001815defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001816 avx512vl_i8_info, HasVBMI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001817 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001818defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001819 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001820defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001821 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001822
Craig Topperaad5f112015-11-30 00:13:24 +00001823// VPERMT2
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001824multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001825 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001826let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001827 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1828 (ins IdxVT.RC:$src2, _.RC:$src3),
1829 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001830 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001831 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001832
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001833 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1834 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1835 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Toppera47576f2015-11-26 20:21:29 +00001836 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001837 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1838 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001839 }
1840}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001841multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001842 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001843 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001844 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1845 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1846 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1847 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Toppera47576f2015-11-26 20:21:29 +00001848 (_.VT (X86VPermt2 _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001849 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001850 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1851 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001852}
1853
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001854multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001855 AVX512VLVectorVTInfo VTInfo,
1856 AVX512VLVectorVTInfo ShuffleMask> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001857 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001858 ShuffleMask.info512>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001859 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001860 ShuffleMask.info512>, EVEX_V512;
1861 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001862 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001863 ShuffleMask.info128>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001864 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001865 ShuffleMask.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001866 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001867 ShuffleMask.info256>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001868 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001869 ShuffleMask.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001870 }
1871}
1872
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001874 AVX512VLVectorVTInfo VTInfo,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001875 AVX512VLVectorVTInfo Idx,
1876 Predicate Prd> {
1877 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001878 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Craig Toppera47576f2015-11-26 20:21:29 +00001879 Idx.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001880 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001881 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Craig Toppera47576f2015-11-26 20:21:29 +00001882 Idx.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001883 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001884 Idx.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001885 }
1886}
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001887
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001888defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001889 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001890defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001891 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001892defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001893 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1894 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001895defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001896 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1897 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001899 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001901 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +00001902
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001903//===----------------------------------------------------------------------===//
1904// AVX-512 - BLEND using mask
1905//
Simon Pilgrimd4953012017-12-05 21:05:25 +00001906
Simon Pilgrim75673942017-12-06 11:23:13 +00001907let Sched = WriteFVarBlend in
1908def AVX512_BLENDM : OpndItins<
1909 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001910>;
1911
Simon Pilgrim75673942017-12-06 11:23:13 +00001912let Sched = WriteVarBlend in
1913def AVX512_PBLENDM : OpndItins<
1914 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001915>;
1916
1917multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1918 X86VectorVTInfo _> {
Craig Toppera74e3082017-01-07 22:20:34 +00001919 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001920 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1921 (ins _.RC:$src1, _.RC:$src2),
1922 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001923 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001924 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001925 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1926 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001927 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001928 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001929 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001930 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1931 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1932 !strconcat(OpcodeStr,
1933 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001934 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
Craig Toppera74e3082017-01-07 22:20:34 +00001935 let mayLoad = 1 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001936 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1937 (ins _.RC:$src1, _.MemOp:$src2),
1938 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001939 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001940 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1941 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001942 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1943 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001944 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001946 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1947 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001948 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1949 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1950 !strconcat(OpcodeStr,
1951 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001952 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1953 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001954 }
Craig Toppera74e3082017-01-07 22:20:34 +00001955 }
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001956}
Simon Pilgrimd4953012017-12-05 21:05:25 +00001957multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1958 X86VectorVTInfo _> {
Craig Topper81f20aa2017-01-07 22:20:26 +00001959 let mayLoad = 1, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001960 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1961 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1962 !strconcat(OpcodeStr,
1963 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1964 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001965 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1966 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001967
Craig Topper16b20242018-02-23 20:48:44 +00001968 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1969 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1970 !strconcat(OpcodeStr,
1971 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1972 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"),
1973 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1974 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1975
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001976 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1977 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1978 !strconcat(OpcodeStr,
1979 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1980 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001981 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1982 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper81f20aa2017-01-07 22:20:26 +00001983 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001984}
1985
Simon Pilgrimd4953012017-12-05 21:05:25 +00001986multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001987 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001988 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1989 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001990
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001991 let Predicates = [HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001992 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1993 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1994 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1995 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001996 }
1997}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001998
Simon Pilgrimd4953012017-12-05 21:05:25 +00001999multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002000 AVX512VLVectorVTInfo VTInfo> {
2001 let Predicates = [HasBWI] in
Simon Pilgrimd4953012017-12-05 21:05:25 +00002002 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002003
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002004 let Predicates = [HasBWI, HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00002005 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
2006 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002007 }
2008}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002009
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002010
Simon Pilgrimd4953012017-12-05 21:05:25 +00002011defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
2012defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
2013defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
2014defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
2015defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
2016defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002017
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002018
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002019//===----------------------------------------------------------------------===//
2020// Compare Instructions
2021//===----------------------------------------------------------------------===//
2022
2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002024
Simon Pilgrim71660c62017-12-05 14:34:42 +00002025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2026 OpndItins itins> {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002027 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2028 (outs _.KRC:$dst),
2029 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2030 "vcmp${cc}"#_.Suffix,
2031 "$src2, $src1", "$src1, $src2",
2032 (OpNode (_.VT _.RC:$src1),
2033 (_.VT _.RC:$src2),
Simon Pilgrim71660c62017-12-05 14:34:42 +00002034 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00002035 let mayLoad = 1 in
Craig Toppere1cac152016-06-07 07:27:54 +00002036 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2037 (outs _.KRC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00002038 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
Craig Toppere1cac152016-06-07 07:27:54 +00002039 "vcmp${cc}"#_.Suffix,
2040 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00002041 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002042 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2043 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002044
2045 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2046 (outs _.KRC:$dst),
2047 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2048 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002049 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002050 (OpNodeRnd (_.VT _.RC:$src1),
2051 (_.VT _.RC:$src2),
2052 imm:$cc,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002053 (i32 FROUND_NO_EXC)), itins.rr>,
2054 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002055 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002056 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002057 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2058 (outs VK1:$dst),
2059 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2060 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002061 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
2062 Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00002063 let mayLoad = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002064 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2065 (outs _.KRC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00002066 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002067 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002068 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2069 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002071
2072 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2073 (outs _.KRC:$dst),
2074 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2075 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002076 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
2077 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002078 }// let isAsmParserOnly = 1, hasSideEffects = 0
2079
2080 let isCodeGenOnly = 1 in {
Craig Topper225da2c2016-08-27 05:22:15 +00002081 let isCommutable = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002082 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2083 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2084 !strconcat("vcmp${cc}", _.Suffix,
2085 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2086 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2087 _.FRC:$src2,
2088 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00002089 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002090 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2091 (outs _.KRC:$dst),
2092 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2093 !strconcat("vcmp${cc}", _.Suffix,
2094 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2095 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2096 (_.ScalarLdFrag addr:$src2),
2097 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00002098 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2099 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002100 }
2101}
2102
2103let Predicates = [HasAVX512] in {
Craig Topperd890db62017-02-21 04:26:04 +00002104 let ExeDomain = SSEPackedSingle in
Simon Pilgrim71660c62017-12-05 14:34:42 +00002105 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2106 SSE_ALU_F32S>, AVX512XSIi8Base;
Craig Topperd890db62017-02-21 04:26:04 +00002107 let ExeDomain = SSEPackedDouble in
Simon Pilgrim71660c62017-12-05 14:34:42 +00002108 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2109 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002110}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002111
Craig Topper513d3fa2018-01-27 20:19:02 +00002112multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002113 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
Craig Topper392cd032016-09-03 16:28:03 +00002114 let isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002115 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002116 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2117 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2118 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002119 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002120 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002121 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2122 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2123 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2124 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002125 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1d81032017-06-13 07:13:47 +00002126 let isCommutable = IsCommutable in
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002127 def rrk : AVX512BI<opc, MRMSrcReg,
2128 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2129 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2130 "$dst {${mask}}, $src1, $src2}"),
2131 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2132 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002133 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002134 def rmk : AVX512BI<opc, MRMSrcMem,
2135 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2136 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2137 "$dst {${mask}}, $src1, $src2}"),
2138 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2139 (OpNode (_.VT _.RC:$src1),
2140 (_.VT (bitconvert
2141 (_.LdFrag addr:$src2))))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002142 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002143}
2144
Craig Topper513d3fa2018-01-27 20:19:02 +00002145multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002146 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2147 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002148 def rmb : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2150 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2151 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2152 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2153 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002154 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002155 def rmbk : AVX512BI<opc, MRMSrcMem,
2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2157 _.ScalarMemOp:$src2),
2158 !strconcat(OpcodeStr,
2159 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2160 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2161 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2162 (OpNode (_.VT _.RC:$src1),
2163 (X86VBroadcast
2164 (_.ScalarLdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002165 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2166 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002167}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002168
Craig Topper513d3fa2018-01-27 20:19:02 +00002169multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002170 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2171 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002172 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002173 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002174 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002175
2176 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002177 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002178 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002179 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002180 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002181 }
2182}
2183
2184multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
Craig Topper513d3fa2018-01-27 20:19:02 +00002185 PatFrag OpNode, OpndItins itins,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002186 AVX512VLVectorVTInfo VTInfo,
2187 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002188 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002189 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002190 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002191
2192 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002193 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002194 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002195 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002196 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002197 }
2198}
2199
Craig Topper9471a7c2018-02-19 19:23:31 +00002200// This fragment treats X86cmpm as commutable to help match loads in both
2201// operands for PCMPEQ.
2202def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2203 (X86cmpm_c node:$src1, node:$src2, (i8 0))>;
Craig Topper513d3fa2018-01-27 20:19:02 +00002204def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2205 (X86cmpm node:$src1, node:$src2, (i8 6))>;
2206
Simon Pilgrima2b58622017-12-05 12:02:22 +00002207// FIXME: Is there a better scheduler itinerary for VPCMP?
Craig Topper9471a7c2018-02-19 19:23:31 +00002208defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002209 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002210 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002211
Craig Topper9471a7c2018-02-19 19:23:31 +00002212defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002213 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002214 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002215
Craig Topper9471a7c2018-02-19 19:23:31 +00002216defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002217 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002218 EVEX_CD8<32, CD8VF>;
2219
Craig Topper9471a7c2018-02-19 19:23:31 +00002220defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002221 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002222 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2223
2224defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002225 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002226 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002227
2228defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002229 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002230 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002231
Robert Khasanovf70f7982014-09-18 14:06:55 +00002232defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002233 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002234 EVEX_CD8<32, CD8VF>;
2235
Robert Khasanovf70f7982014-09-18 14:06:55 +00002236defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002237 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002238 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002239
Craig Toppera88306e2017-10-10 06:36:46 +00002240// Transforms to swizzle an immediate to help matching memory operand in first
2241// operand.
2242def CommutePCMPCC : SDNodeXForm<imm, [{
2243 uint8_t Imm = N->getZExtValue() & 0x7;
Craig Topper9b64bf52018-02-20 03:58:11 +00002244 Imm = X86::getSwappedVPCMPImm(Imm);
Craig Toppera88306e2017-10-10 06:36:46 +00002245 return getI8Imm(Imm, SDLoc(N));
2246}]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002247
Robert Khasanov29e3b962014-08-27 09:34:37 +00002248multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002249 OpndItins itins, X86VectorVTInfo _> {
Craig Topper149e6bd2016-09-09 01:36:10 +00002250 let isCommutable = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002251 def rri : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002252 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002253 !strconcat("vpcmp${cc}", Suffix,
2254 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002255 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2256 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002257 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002258 def rmi : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002259 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002260 !strconcat("vpcmp${cc}", Suffix,
2261 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002262 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2263 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002264 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002265 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper8b876762017-06-13 07:13:50 +00002266 let isCommutable = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002267 def rrik : AVX512AIi8<opc, MRMSrcReg,
2268 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002269 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002270 !strconcat("vpcmp${cc}", Suffix,
2271 "\t{$src2, $src1, $dst {${mask}}|",
2272 "$dst {${mask}}, $src1, $src2}"),
2273 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2274 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Craig Topper6e3a5822014-12-27 20:08:45 +00002275 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002276 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002277 def rmik : AVX512AIi8<opc, MRMSrcMem,
2278 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002279 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002280 !strconcat("vpcmp${cc}", Suffix,
2281 "\t{$src2, $src1, $dst {${mask}}|",
2282 "$dst {${mask}}, $src1, $src2}"),
2283 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2284 (OpNode (_.VT _.RC:$src1),
2285 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002286 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002287 itins.rm>, EVEX_4V, EVEX_K,
2288 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002289
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002290 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002291 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002292 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002293 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002294 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2295 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002296 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002297 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002298 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002299 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002300 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2301 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002302 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002303 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2304 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002305 u8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00002306 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002307 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2308 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002309 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002310 let mayLoad = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002311 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2312 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002313 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002314 !strconcat("vpcmp", Suffix,
2315 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2316 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002317 [], itins.rm>, EVEX_4V, EVEX_K,
2318 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002319 }
Craig Toppera88306e2017-10-10 06:36:46 +00002320
2321 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2322 (_.VT _.RC:$src1), imm:$cc),
2323 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2324 (CommutePCMPCC imm:$cc))>;
2325
2326 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2327 (_.VT _.RC:$src1), imm:$cc)),
2328 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2329 _.RC:$src1, addr:$src2,
2330 (CommutePCMPCC imm:$cc))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002331}
2332
Robert Khasanov29e3b962014-08-27 09:34:37 +00002333multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002334 OpndItins itins, X86VectorVTInfo _> :
2335 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002336 def rmib : AVX512AIi8<opc, MRMSrcMem,
2337 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002338 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002339 !strconcat("vpcmp${cc}", Suffix,
2340 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2341 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2342 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2343 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002344 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002345 itins.rm>, EVEX_4V, EVEX_B,
2346 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002347 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2348 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002349 _.ScalarMemOp:$src2, AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002350 !strconcat("vpcmp${cc}", Suffix,
2351 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2352 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2353 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2354 (OpNode (_.VT _.RC:$src1),
2355 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002356 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002357 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2358 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002359
Robert Khasanov29e3b962014-08-27 09:34:37 +00002360 // Accept explicit immediate argument form instead of comparison code.
Craig Topper9f4d4852015-01-20 12:15:30 +00002361 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002362 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2363 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002364 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002365 !strconcat("vpcmp", Suffix,
2366 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2367 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002368 [], itins.rm>, EVEX_4V, EVEX_B,
2369 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002370 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2371 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002372 _.ScalarMemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002373 !strconcat("vpcmp", Suffix,
2374 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2375 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002376 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2377 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002378 }
Craig Toppera88306e2017-10-10 06:36:46 +00002379
2380 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2381 (_.VT _.RC:$src1), imm:$cc),
2382 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2383 (CommutePCMPCC imm:$cc))>;
2384
2385 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2386 (_.ScalarLdFrag addr:$src2)),
2387 (_.VT _.RC:$src1), imm:$cc)),
2388 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2389 _.RC:$src1, addr:$src2,
2390 (CommutePCMPCC imm:$cc))>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002391}
2392
2393multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002394 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2395 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002396 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002397 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2398 EVEX_V512;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002399
2400 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002401 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2402 EVEX_V256;
2403 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2404 EVEX_V128;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002405 }
2406}
2407
2408multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002409 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2410 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002411 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002412 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002413 EVEX_V512;
2414
2415 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002416 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002417 EVEX_V256;
Simon Pilgrimaa911552017-12-05 12:14:36 +00002418 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002419 EVEX_V128;
2420 }
2421}
2422
Simon Pilgrimaa911552017-12-05 12:14:36 +00002423// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2424defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2425 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2426defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2427 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002428
Simon Pilgrimaa911552017-12-05 12:14:36 +00002429defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2430 avx512vl_i16_info, HasBWI>,
2431 VEX_W, EVEX_CD8<16, CD8VF>;
2432defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2433 avx512vl_i16_info, HasBWI>,
2434 VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002435
Simon Pilgrimaa911552017-12-05 12:14:36 +00002436defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2437 avx512vl_i32_info, HasAVX512>,
2438 EVEX_CD8<32, CD8VF>;
2439defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2440 avx512vl_i32_info, HasAVX512>,
2441 EVEX_CD8<32, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002442
Simon Pilgrimaa911552017-12-05 12:14:36 +00002443defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2444 avx512vl_i64_info, HasAVX512>,
2445 VEX_W, EVEX_CD8<64, CD8VF>;
2446defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2447 avx512vl_i64_info, HasAVX512>,
2448 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002449
Ayman Musa721d97f2017-06-27 12:08:37 +00002450
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002451multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002452 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2453 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2454 "vcmp${cc}"#_.Suffix,
2455 "$src2, $src1", "$src1, $src2",
2456 (X86cmpm (_.VT _.RC:$src1),
2457 (_.VT _.RC:$src2),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002458 imm:$cc), itins.rr, 1>,
2459 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002460
Craig Toppere1cac152016-06-07 07:27:54 +00002461 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2462 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2463 "vcmp${cc}"#_.Suffix,
2464 "$src2, $src1", "$src1, $src2",
2465 (X86cmpm (_.VT _.RC:$src1),
2466 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002467 imm:$cc), itins.rm>,
2468 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002469
Craig Toppere1cac152016-06-07 07:27:54 +00002470 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2471 (outs _.KRC:$dst),
2472 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2473 "vcmp${cc}"#_.Suffix,
2474 "${src2}"##_.BroadcastStr##", $src1",
2475 "$src1, ${src2}"##_.BroadcastStr,
2476 (X86cmpm (_.VT _.RC:$src1),
2477 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002478 imm:$cc), itins.rm>,
2479 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002480 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002481 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002482 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2483 (outs _.KRC:$dst),
2484 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2485 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002486 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2487 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002488
2489 let mayLoad = 1 in {
2490 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2491 (outs _.KRC:$dst),
2492 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2493 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002494 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2495 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002496
2497 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2498 (outs _.KRC:$dst),
2499 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2500 "vcmp"#_.Suffix,
2501 "$cc, ${src2}"##_.BroadcastStr##", $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002502 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2503 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002504 }
Craig Topper61956982017-09-30 17:02:39 +00002505 }
2506
2507 // Patterns for selecting with loads in other operand.
2508 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2509 CommutableCMPCC:$cc),
2510 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2511 imm:$cc)>;
2512
2513 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2514 (_.VT _.RC:$src1),
2515 CommutableCMPCC:$cc)),
2516 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2517 _.RC:$src1, addr:$src2,
2518 imm:$cc)>;
2519
2520 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2521 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2522 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2523 imm:$cc)>;
2524
2525 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2526 (_.ScalarLdFrag addr:$src2)),
2527 (_.VT _.RC:$src1),
2528 CommutableCMPCC:$cc)),
2529 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2530 _.RC:$src1, addr:$src2,
2531 imm:$cc)>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002532}
2533
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002534multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002535 // comparison code form (VCMP[EQ/LT/LE/...]
2536 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2537 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2538 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002539 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002540 (X86cmpmRnd (_.VT _.RC:$src1),
2541 (_.VT _.RC:$src2),
2542 imm:$cc,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002543 (i32 FROUND_NO_EXC)), itins.rr>,
2544 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002545
2546 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2547 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2548 (outs _.KRC:$dst),
2549 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2550 "vcmp"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002551 "$cc, {sae}, $src2, $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002552 "$src1, $src2, {sae}, $cc", itins.rr>,
2553 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002554 }
2555}
2556
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002557multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002558 let Predicates = [HasAVX512] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002559 defm Z : avx512_vcmp_common<itins, _.info512>,
2560 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002561
2562 }
2563 let Predicates = [HasAVX512,HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002564 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2565 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002566 }
2567}
2568
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002569defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002570 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002571defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002572 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002573
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00002574
Craig Topper61956982017-09-30 17:02:39 +00002575// Patterns to select fp compares with load as first operand.
2576let Predicates = [HasAVX512] in {
2577 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2578 CommutableCMPCC:$cc)),
2579 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2580
2581 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2582 CommutableCMPCC:$cc)),
2583 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2584}
2585
Asaf Badouh572bbce2015-09-20 08:46:07 +00002586// ----------------------------------------------------------------
2587// FPClass
Asaf Badouh696e8e02015-10-18 11:04:38 +00002588//handle fpclass instruction mask = op(reg_scalar,imm)
2589// op(mem_scalar,imm)
2590multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002591 OpndItins itins, X86VectorVTInfo _,
2592 Predicate prd> {
Craig Topper4a638432017-11-11 06:57:44 +00002593 let Predicates = [prd], ExeDomain = _.ExeDomain in {
Craig Topper702097d2017-08-20 18:30:24 +00002594 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
Asaf Badouh696e8e02015-10-18 11:04:38 +00002595 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002596 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh696e8e02015-10-18 11:04:38 +00002597 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002598 (i32 imm:$src2)))], itins.rr>,
2599 Sched<[itins.Sched]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002600 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2601 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2602 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002603 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002604 [(set _.KRC:$dst,(and _.KRCWM:$mask,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002605 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002606 (i32 imm:$src2))))], itins.rr>,
2607 EVEX_K, Sched<[itins.Sched]>;
Craig Topper63801df2017-02-19 21:44:35 +00002608 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002609 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002610 OpcodeStr##_.Suffix##
2611 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2612 [(set _.KRC:$dst,
Craig Topperca8abed2017-11-13 06:46:48 +00002613 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002614 (i32 imm:$src2)))], itins.rm>,
2615 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper63801df2017-02-19 21:44:35 +00002616 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002617 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002618 OpcodeStr##_.Suffix##
2619 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002620 [(set _.KRC:$dst,(and _.KRCWM:$mask,
Craig Topperca8abed2017-11-13 06:46:48 +00002621 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002622 (i32 imm:$src2))))], itins.rm>,
2623 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002624 }
2625}
2626
Asaf Badouh572bbce2015-09-20 08:46:07 +00002627//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2628// fpclass(reg_vec, mem_vec, imm)
2629// fpclass(reg_vec, broadcast(eltVt), imm)
2630multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002631 OpndItins itins, X86VectorVTInfo _,
2632 string mem, string broadcast>{
Craig Topper4a638432017-11-11 06:57:44 +00002633 let ExeDomain = _.ExeDomain in {
Asaf Badouh572bbce2015-09-20 08:46:07 +00002634 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2635 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002636 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh572bbce2015-09-20 08:46:07 +00002637 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002638 (i32 imm:$src2)))], itins.rr>,
2639 Sched<[itins.Sched]>;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002640 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2641 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2642 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002643 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002644 [(set _.KRC:$dst,(and _.KRCWM:$mask,
Asaf Badouh572bbce2015-09-20 08:46:07 +00002645 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002646 (i32 imm:$src2))))], itins.rr>,
2647 EVEX_K, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002648 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2649 (ins _.MemOp:$src1, i32u8imm:$src2),
2650 OpcodeStr##_.Suffix##mem#
2651 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002652 [(set _.KRC:$dst,(OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002653 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002654 (i32 imm:$src2)))], itins.rm>,
2655 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002656 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2657 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2658 OpcodeStr##_.Suffix##mem#
2659 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002660 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002661 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002662 (i32 imm:$src2))))], itins.rm>,
2663 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002664 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2665 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2666 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2667 _.BroadcastStr##", $dst|$dst, ${src1}"
2668 ##_.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002669 [(set _.KRC:$dst,(OpNode
2670 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002671 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002672 (i32 imm:$src2)))], itins.rm>,
2673 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002674 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2675 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2676 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2677 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2678 _.BroadcastStr##", $src2}",
Craig Topperac799b02018-02-28 06:19:55 +00002679 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002680 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002681 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002682 (i32 imm:$src2))))], itins.rm>,
2683 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper4a638432017-11-11 06:57:44 +00002684 }
Asaf Badouh572bbce2015-09-20 08:46:07 +00002685}
2686
Simon Pilgrim54c60832017-12-01 16:51:48 +00002687multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2688 bits<8> opc, SDNode OpNode,
2689 OpndItins itins, Predicate prd,
2690 string broadcast>{
Asaf Badouh572bbce2015-09-20 08:46:07 +00002691 let Predicates = [prd] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002692 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2693 _.info512, "{z}", broadcast>, EVEX_V512;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002694 }
2695 let Predicates = [prd, HasVLX] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002696 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2697 _.info128, "{x}", broadcast>, EVEX_V128;
2698 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2699 _.info256, "{y}", broadcast>, EVEX_V256;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002700 }
2701}
2702
Simon Pilgrim54c60832017-12-01 16:51:48 +00002703// FIXME: Is there a better scheduler itinerary for VFPCLASS?
Asaf Badouh572bbce2015-09-20 08:46:07 +00002704multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002705 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002706 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002707 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2708 EVEX_CD8<32, CD8VF>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002709 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002710 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2711 EVEX_CD8<64, CD8VF> , VEX_W;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002712 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002713 SSE_ALU_F32S, f32x_info, prd>,
2714 EVEX_CD8<32, CD8VT1>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002715 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002716 SSE_ALU_F64S, f64x_info, prd>,
2717 EVEX_CD8<64, CD8VT1>, VEX_W;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002718}
2719
Asaf Badouh696e8e02015-10-18 11:04:38 +00002720defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2721 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002722
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002723//-----------------------------------------------------------------
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002724// Mask register copy, including
2725// - copy between mask registers
2726// - load/store mask registers
2727// - copy from GPR to mask register and vice versa
2728//
2729multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2730 string OpcodeStr, RegisterClass KRC,
Elena Demikhovskyba846722015-02-17 09:20:12 +00002731 ValueType vvt, X86MemOperand x86memop> {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002732 let hasSideEffects = 0, SchedRW = [WriteMove] in
Craig Toppere1cac152016-06-07 07:27:54 +00002733 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002734 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrim07e13372018-02-12 16:59:04 +00002735 IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002736 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2737 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim07e13372018-02-12 16:59:04 +00002738 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>,
2739 Sched<[WriteLoad]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002740 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2741 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim07e13372018-02-12 16:59:04 +00002742 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>,
2743 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002744}
2745
2746multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2747 string OpcodeStr,
2748 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002749 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002750 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002751 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2752 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002753 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002754 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2755 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002756 }
2757}
2758
Robert Khasanov74acbb72014-07-23 14:49:42 +00002759let Predicates = [HasDQI] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002760 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002761 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2762 VEX, PD;
2763
2764let Predicates = [HasAVX512] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002765 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002766 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002767 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002768
2769let Predicates = [HasBWI] in {
Elena Demikhovskyba846722015-02-17 09:20:12 +00002770 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2771 VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002772 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2773 VEX, XD;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002774 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2775 VEX, PS, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002776 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2777 VEX, XD, VEX_W;
2778}
2779
2780// GR from/to mask register
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002781def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002782 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002783def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002784 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002785
2786def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002787 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002788def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002789 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002790
2791def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002792 (KMOVWrk VK16:$src)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002793def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002794 (COPY_TO_REGCLASS VK16:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002795
2796def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002797 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002798def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002799 (COPY_TO_REGCLASS VK8:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002800
2801def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2802 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2803def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2804 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2805def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2806 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2807def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2808 (COPY_TO_REGCLASS VK64:$src, GR64)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002809
Robert Khasanov74acbb72014-07-23 14:49:42 +00002810// Load/store kreg
2811let Predicates = [HasDQI] in {
Igor Bregerd6c187b2016-01-27 08:43:25 +00002812 def : Pat<(store VK1:$src, addr:$dst),
2813 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002814
Craig Topperbe315852018-03-04 01:48:00 +00002815 def : Pat<(v1i1 (load addr:$src)),
2816 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002817 def : Pat<(v2i1 (load addr:$src)),
2818 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2819 def : Pat<(v4i1 (load addr:$src)),
2820 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002821}
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002822
Robert Khasanov74acbb72014-07-23 14:49:42 +00002823let Predicates = [HasAVX512] in {
Craig Topper876ec0b2017-12-31 07:38:41 +00002824 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2825 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002826}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00002827
Robert Khasanov74acbb72014-07-23 14:49:42 +00002828let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002829 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2830 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2831 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002832
Guy Blank548e22a2017-05-19 12:35:15 +00002833 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2834 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
Guy Blank548e22a2017-05-19 12:35:15 +00002835 }
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002836
Guy Blank548e22a2017-05-19 12:35:15 +00002837 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2838 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2839 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2840 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2841 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2842 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2843 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
Elena Demikhovskyb906df92016-09-13 07:57:00 +00002844
Craig Topper26a701f2018-01-23 05:36:53 +00002845 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2846 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
Guy Blank548e22a2017-05-19 12:35:15 +00002847 (COPY_TO_REGCLASS
Craig Topper26a701f2018-01-23 05:36:53 +00002848 (KMOVWkr (AND32ri8
2849 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2850 (i32 1))), VK16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002851}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002852
2853// Mask unary operation
2854// - KNOT
2855multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002856 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002857 OpndItins itins, Predicate prd> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002858 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002859 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00002860 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002861 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2862 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002863}
2864
Robert Khasanov74acbb72014-07-23 14:49:42 +00002865multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002866 SDPatternOperator OpNode, OpndItins itins> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002867 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002868 itins, HasDQI>, VEX, PD;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002869 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002870 itins, HasAVX512>, VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002871 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002872 itins, HasBWI>, VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002873 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002874 itins, HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002875}
2876
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002877defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002878
Robert Khasanov74acbb72014-07-23 14:49:42 +00002879// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
Craig Topper7b9cc142016-11-03 06:04:28 +00002880let Predicates = [HasAVX512, NoDQI] in
2881def : Pat<(vnot VK8:$src),
2882 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2883
2884def : Pat<(vnot VK4:$src),
2885 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2886def : Pat<(vnot VK2:$src),
2887 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002888
2889// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00002890// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002891multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00002892 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002893 OpndItins itins, Predicate prd, bit IsCommutable> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002894 let Predicates = [prd], isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002895 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2896 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002897 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002898 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2899 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002900}
2901
Robert Khasanov595683d2014-07-28 13:46:45 +00002902multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002903 SDPatternOperator OpNode, OpndItins itins,
2904 bit IsCommutable, Predicate prdW = HasAVX512> {
Robert Khasanov595683d2014-07-28 13:46:45 +00002905 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002906 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002907 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002908 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
Robert Khasanov595683d2014-07-28 13:46:45 +00002909 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002910 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002911 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002912 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002913}
2914
2915def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2916def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002917// These nodes use 'vnot' instead of 'not' to support vectors.
2918def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2919def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002920
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002921defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2922defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2923defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2924defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2925defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
Craig Topper3ce035a2018-02-12 01:33:38 +00002926defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SSE_BIT_ITINS_P, 1, HasDQI>;
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00002927
Craig Topper7b9cc142016-11-03 06:04:28 +00002928multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2929 Instruction Inst> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002930 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2931 // for the DQI set, this type is legal and KxxxB instruction is used
2932 let Predicates = [NoDQI] in
Craig Topper7b9cc142016-11-03 06:04:28 +00002933 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002934 (COPY_TO_REGCLASS
2935 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2936 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2937
2938 // All types smaller than 8 bits require conversion anyway
2939 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2940 (COPY_TO_REGCLASS (Inst
2941 (COPY_TO_REGCLASS VK1:$src1, VK16),
2942 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002943 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002944 (COPY_TO_REGCLASS (Inst
2945 (COPY_TO_REGCLASS VK2:$src1, VK16),
2946 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002947 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002948 (COPY_TO_REGCLASS (Inst
2949 (COPY_TO_REGCLASS VK4:$src1, VK16),
2950 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002951}
2952
Craig Topper7b9cc142016-11-03 06:04:28 +00002953defm : avx512_binop_pat<and, and, KANDWrr>;
2954defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2955defm : avx512_binop_pat<or, or, KORWrr>;
2956defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2957defm : avx512_binop_pat<xor, xor, KXORWrr>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002958
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002959// Mask unpacking
Igor Bregera54a1a82015-09-08 13:10:00 +00002960multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002961 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
Igor Bregera54a1a82015-09-08 13:10:00 +00002962 let Predicates = [prd] in {
Craig Topperad2ce362016-01-05 07:44:08 +00002963 let hasSideEffects = 0 in
Igor Bregera54a1a82015-09-08 13:10:00 +00002964 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2965 (ins KRC:$src1, KRC:$src2),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002966 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2967 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
Igor Bregera54a1a82015-09-08 13:10:00 +00002968
2969 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2970 (!cast<Instruction>(NAME##rr)
2971 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2972 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2973 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002974}
2975
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002976defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2977defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2978defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002979
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002980// Mask bit testing
2981multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002982 SDNode OpNode, OpndItins itins, Predicate prd> {
Igor Breger5ea0a6812015-08-31 13:30:19 +00002983 let Predicates = [prd], Defs = [EFLAGS] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002984 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Craig Topperedb09112014-11-25 20:11:23 +00002985 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002986 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2987 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002988}
2989
Igor Breger5ea0a6812015-08-31 13:30:19 +00002990multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002991 OpndItins itins, Predicate prdW = HasAVX512> {
2992 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002993 VEX, PD;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002994 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002995 VEX, PS;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002996 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002997 VEX, PS, VEX_W;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002998 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002999 VEX, PD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003000}
3001
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003002defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
3003defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003004
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003005// Mask shift
3006multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003007 SDNode OpNode, OpndItins itins> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003008 let Predicates = [HasAVX512] in
Craig Topper7ff6ab32015-01-21 08:43:49 +00003009 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003010 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00003011 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003012 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
3013 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003014}
3015
3016multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003017 SDNode OpNode, OpndItins itins> {
3018 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3019 itins>, VEX, TAPD, VEX_W;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00003020 let Predicates = [HasDQI] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003021 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3022 itins>, VEX, TAPD;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00003023 let Predicates = [HasBWI] in {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003024 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3025 itins>, VEX, TAPD, VEX_W;
3026 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3027 itins>, VEX, TAPD;
Michael Liao66233b72015-08-06 09:06:20 +00003028 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003029}
3030
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003031defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
3032defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003033
Craig Topper513d3fa2018-01-27 20:19:02 +00003034multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00003035 X86VectorVTInfo Narrow,
3036 X86VectorVTInfo Wide> {
Craig Topper5e4b4532018-01-27 23:49:14 +00003037 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00003038 (Narrow.VT Narrow.RC:$src2))),
3039 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00003040 (!cast<Instruction>(InstStr#"Zrr")
Craig Topperd58c1652018-01-07 18:20:37 +00003041 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3042 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3043 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003044
Craig Topper5e4b4532018-01-27 23:49:14 +00003045 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3046 (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00003047 (Narrow.VT Narrow.RC:$src2)))),
Craig Toppereb5c4112017-09-24 05:24:52 +00003048 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00003049 (!cast<Instruction>(InstStr#"Zrrk")
Craig Topperd58c1652018-01-07 18:20:37 +00003050 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3051 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3052 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3053 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003054}
3055
3056multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00003057 X86VectorVTInfo Narrow,
3058 X86VectorVTInfo Wide> {
3059def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3060 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3061 (COPY_TO_REGCLASS
3062 (!cast<Instruction>(InstStr##Zrri)
3063 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3064 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3065 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003066
Craig Topperd58c1652018-01-07 18:20:37 +00003067def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3068 (OpNode (Narrow.VT Narrow.RC:$src1),
3069 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3070 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3071 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3072 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3073 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3074 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003075}
3076
3077let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003078 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003079 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003080
Craig Topperd58c1652018-01-07 18:20:37 +00003081 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003082 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003083
3084 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003085 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003086
3087 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003088 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003089
3090 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3091 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3092 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v8i32x_info, v16i32_info>;
3093
3094 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3095 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3096 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v4i32x_info, v16i32_info>;
3097
3098 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3099 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3100 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v4i64x_info, v8i64_info>;
3101
3102 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3103 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3104 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v2i64x_info, v8i64_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003105}
3106
Craig Toppera2018e792018-01-08 06:53:52 +00003107let Predicates = [HasBWI, NoVLX] in {
3108 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003109 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003110
3111 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003112 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003113
3114 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003115 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003116
3117 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003118 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003119
3120 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3121 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
3122
3123 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v16i8x_info, v64i8_info>;
3125
3126 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3127 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v16i16x_info, v32i16_info>;
3128
3129 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3130 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v8i16x_info, v32i16_info>;
3131}
3132
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003133// Mask setting all 0s or 1s
3134multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3135 let Predicates = [HasAVX512] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003136 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3137 SchedRW = [WriteZero] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003138 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3139 [(set KRC:$dst, (VT Val))]>;
3140}
3141
3142multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003143 defm W : avx512_mask_setop<VK16, v16i1, Val>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003144 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3145 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003146}
3147
3148defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3149defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3150
3151// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3152let Predicates = [HasAVX512] in {
3153 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
Igor Breger86724082016-08-14 05:25:07 +00003154 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3155 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003156 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003157 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003158 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3159 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003160 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003161}
Igor Bregerf1bd7612016-03-06 07:46:03 +00003162
3163// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3164multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3165 RegisterClass RC, ValueType VT> {
3166 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3167 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003168
Igor Bregerf1bd7612016-03-06 07:46:03 +00003169 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003170 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003171}
Guy Blank548e22a2017-05-19 12:35:15 +00003172defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3173defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3174defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3175defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3176defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3177defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003178
3179defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3180defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3181defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3182defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3183defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3184
3185defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3186defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3187defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3188defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3189
3190defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3191defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3192defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3193
3194defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3195defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3196
3197defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003198
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003199//===----------------------------------------------------------------------===//
3200// AVX-512 - Aligned and unaligned load and store
3201//
3202
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003203
Simon Pilgrimdf052512017-12-06 17:59:26 +00003204multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3205 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3206 bit NoRMPattern = 0,
3207 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003208 let hasSideEffects = 0 in {
3209 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003210 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003211 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003212 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3213 (ins _.KRCWM:$mask, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003214 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
Simon Pilgrim18bcf932016-02-03 09:41:59 +00003215 "${dst} {${mask}} {z}, $src}"),
Craig Topper5c46c752017-01-08 05:46:21 +00003216 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
Igor Breger7a000f52016-01-21 14:18:11 +00003217 (_.VT _.RC:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003218 _.ImmAllZerosV)))], _.ExeDomain,
3219 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003220
Simon Pilgrimdf052512017-12-06 17:59:26 +00003221 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003222 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003223 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Toppercb0e7492017-07-31 17:35:44 +00003224 !if(NoRMPattern, [],
3225 [(set _.RC:$dst,
3226 (_.VT (bitconvert (ld_frag addr:$src))))]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003227 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003228
Craig Topper63e2cd62017-01-14 07:50:52 +00003229 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003230 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3231 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3232 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3233 "${dst} {${mask}}, $src1}"),
3234 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3235 (_.VT _.RC:$src1),
3236 (_.VT _.RC:$src0))))], _.ExeDomain,
3237 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003238 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3239 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003240 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3241 "${dst} {${mask}}, $src1}"),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003242 [(set _.RC:$dst, (_.VT
3243 (vselect _.KRCWM:$mask,
3244 (_.VT (bitconvert (ld_frag addr:$src1))),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003245 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3246 EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003247 }
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003248 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3249 (ins _.KRCWM:$mask, _.MemOp:$src),
3250 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3251 "${dst} {${mask}} {z}, $src}",
3252 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3253 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003254 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003255 }
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003256 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3257 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3258
3259 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3260 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3261
3262 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3263 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3264 _.KRCWM:$mask, addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003265}
3266
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003267multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3268 AVX512VLVectorVTInfo _,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003269 Predicate prd,
3270 bit NoRMPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003271 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003272 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003273 _.info512.AlignedLdFrag, masked_load_aligned512,
3274 NoRMPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003275
3276 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003277 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003278 _.info256.AlignedLdFrag, masked_load_aligned256,
3279 NoRMPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003280 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003281 _.info128.AlignedLdFrag, masked_load_aligned128,
3282 NoRMPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003283 }
3284}
3285
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003286multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3287 AVX512VLVectorVTInfo _,
3288 Predicate prd,
Craig Toppercb0e7492017-07-31 17:35:44 +00003289 bit NoRMPattern = 0,
Craig Topperc9293492016-02-26 06:50:29 +00003290 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003291 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003292 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003293 masked_load_unaligned, NoRMPattern,
3294 SelectOprr>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003295
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003296 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003297 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003298 masked_load_unaligned, NoRMPattern,
3299 SelectOprr>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003300 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003301 masked_load_unaligned, NoRMPattern,
3302 SelectOprr>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003303 }
3304}
3305
Simon Pilgrimdf052512017-12-06 17:59:26 +00003306multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3307 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3308 string Name, bit NoMRPattern = 0> {
Craig Topper99f6b622016-05-01 01:03:56 +00003309 let hasSideEffects = 0 in {
Igor Breger81b79de2015-11-19 07:43:43 +00003310 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3311 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003312 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3313 Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003314 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3315 (ins _.KRCWM:$mask, _.RC:$src),
3316 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3317 "${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003318 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3319 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003320 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003321 (ins _.KRCWM:$mask, _.RC:$src),
Igor Breger81b79de2015-11-19 07:43:43 +00003322 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003323 "${dst} {${mask}} {z}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003324 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3325 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
Craig Topper99f6b622016-05-01 01:03:56 +00003326 }
Igor Breger81b79de2015-11-19 07:43:43 +00003327
Craig Topper2462a712017-08-01 15:31:24 +00003328 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003329 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003330 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topper2462a712017-08-01 15:31:24 +00003331 !if(NoMRPattern, [],
3332 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003333 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003334 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003335 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3336 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003337 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003338
3339 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3340 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3341 _.KRCWM:$mask, _.RC:$src)>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003342}
3343
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003344
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003345multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003346 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper2462a712017-08-01 15:31:24 +00003347 string Name, bit NoMRPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003348 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003349 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
Craig Topper2462a712017-08-01 15:31:24 +00003350 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003351
3352 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003353 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
Craig Topper2462a712017-08-01 15:31:24 +00003354 masked_store_unaligned, Name#Z256,
3355 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003356 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
Craig Topper2462a712017-08-01 15:31:24 +00003357 masked_store_unaligned, Name#Z128,
3358 NoMRPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003359 }
3360}
3361
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003362multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003363 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper83b0a982018-01-18 07:44:09 +00003364 string Name, bit NoMRPattern = 0> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003365 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003366 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003367 masked_store_aligned512, Name#Z,
3368 NoMRPattern>, EVEX_V512;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003369
3370 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003371 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003372 masked_store_aligned256, Name#Z256,
3373 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003374 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003375 masked_store_aligned128, Name#Z128,
3376 NoMRPattern>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003377 }
3378}
3379
3380defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3381 HasAVX512>,
3382 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003383 HasAVX512, "VMOVAPS">,
3384 PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003385
3386defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3387 HasAVX512>,
3388 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003389 HasAVX512, "VMOVAPD">,
3390 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003391
Craig Topperc9293492016-02-26 06:50:29 +00003392defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003393 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003394 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3395 "VMOVUPS">,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003396 PS, EVEX_CD8<32, CD8VF>;
3397
Craig Topper4e7b8882016-10-03 02:00:29 +00003398defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003399 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003400 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3401 "VMOVUPD">,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003402 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003403
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003404defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003405 HasAVX512, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003406 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003407 HasAVX512, "VMOVDQA32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003408 PD, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003409
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003410defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3411 HasAVX512>,
3412 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003413 HasAVX512, "VMOVDQA64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003414 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003415
Craig Toppercb0e7492017-07-31 17:35:44 +00003416defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003417 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
Craig Topper2462a712017-08-01 15:31:24 +00003418 HasBWI, "VMOVDQU8", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003419 XD, EVEX_CD8<8, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003420
Craig Toppercb0e7492017-07-31 17:35:44 +00003421defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003422 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
Craig Topper2462a712017-08-01 15:31:24 +00003423 HasBWI, "VMOVDQU16", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003424 XD, VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003425
Craig Topperc9293492016-02-26 06:50:29 +00003426defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003427 1, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003428 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003429 HasAVX512, "VMOVDQU32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003430 XS, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003431
Craig Topperc9293492016-02-26 06:50:29 +00003432defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003433 0, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003434 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003435 HasAVX512, "VMOVDQU64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003436 XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00003437
Craig Topperd875d6b2016-09-29 06:07:09 +00003438// Special instructions to help with spilling when we don't have VLX. We need
3439// to load or store from a ZMM register instead. These are converted in
3440// expandPostRAPseudos.
Craig Toppereab23d32016-10-03 02:22:33 +00003441let isReMaterializable = 1, canFoldAsLoad = 1,
Craig Topperd875d6b2016-09-29 06:07:09 +00003442 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3443def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003444 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003445def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003446 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003447def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003448 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003449def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003450 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003451}
3452
Simon Pilgrimdf052512017-12-06 17:59:26 +00003453let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
Craig Topperf3e671e2016-09-30 05:35:47 +00003454def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003455 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003456def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003457 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003458def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003459 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003460def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003461 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003462}
3463
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003464def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003465 (v8i64 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003466 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003467 VK8), VR512:$src)>;
3468
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003469def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003470 (v16i32 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003471 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyf1de34b2014-12-04 09:40:44 +00003472
Craig Topper33c550c2016-05-22 00:39:30 +00003473// These patterns exist to prevent the above patterns from introducing a second
3474// mask inversion when one already exists.
3475def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3476 (bc_v8i64 (v16i32 immAllZerosV)),
3477 (v8i64 VR512:$src))),
3478 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3479def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3480 (v16i32 immAllZerosV),
3481 (v16i32 VR512:$src))),
3482 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3483
Craig Topperfc3ce492018-01-01 01:11:29 +00003484multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3485 X86VectorVTInfo Wide> {
3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3487 Narrow.RC:$src1, Narrow.RC:$src0)),
3488 (EXTRACT_SUBREG
3489 (Wide.VT
3490 (!cast<Instruction>(InstrStr#"rrk")
3491 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3492 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3493 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3494 Narrow.SubRegIdx)>;
3495
3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3497 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3498 (EXTRACT_SUBREG
3499 (Wide.VT
3500 (!cast<Instruction>(InstrStr#"rrkz")
3501 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3502 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3503 Narrow.SubRegIdx)>;
3504}
3505
Craig Topper96ab6fd2017-01-09 04:19:34 +00003506// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3507// available. Use a 512-bit operation and extract.
3508let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003509 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3510 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
Craig Topperfc3ce492018-01-01 01:11:29 +00003511 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3512 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003513
3514 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3515 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3516 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3517 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
Craig Topper96ab6fd2017-01-09 04:19:34 +00003518}
3519
Craig Toppere9fc0cd2018-01-14 02:05:51 +00003520let Predicates = [HasBWI, NoVLX] in {
3521 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3522 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3523
3524 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3525 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3526}
3527
Craig Topper2462a712017-08-01 15:31:24 +00003528let Predicates = [HasAVX512] in {
3529 // 512-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003530 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3531 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003532 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003533 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003534 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003535 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3536 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3537 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003538 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003539 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003540 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003541 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003542}
3543
3544let Predicates = [HasVLX] in {
3545 // 128-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003546 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3547 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003548 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003549 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003550 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003551 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3552 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3553 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003554 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003555 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003556 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003557 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper14aa2662016-08-11 06:04:04 +00003558
Craig Topper2462a712017-08-01 15:31:24 +00003559 // 256-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003560 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3561 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003562 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003563 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003564 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003565 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3566 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3567 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003568 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003569 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003570 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003571 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper95bdabd2016-05-22 23:44:33 +00003572}
3573
Craig Topper80075a52017-08-27 19:03:36 +00003574multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3575 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3576 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3577 (bitconvert
3578 (To.VT (extract_subvector
3579 (From.VT From.RC:$src), (iPTR 0)))),
3580 To.RC:$src0)),
3581 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3582 Cast.RC:$src0, Cast.KRCWM:$mask,
3583 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3584
3585 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3586 (bitconvert
3587 (To.VT (extract_subvector
3588 (From.VT From.RC:$src), (iPTR 0)))),
3589 Cast.ImmAllZerosV)),
3590 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3591 Cast.KRCWM:$mask,
3592 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3593}
3594
3595
Craig Topperd27386a2017-08-25 23:34:59 +00003596let Predicates = [HasVLX] in {
3597// A masked extract from the first 128-bits of a 256-bit vector can be
3598// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003599defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3600defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3601defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3602defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3603defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3604defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3605defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3606defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3607defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3608defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3609defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3610defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003611
3612// A masked extract from the first 128-bits of a 512-bit vector can be
3613// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003614defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3615defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3616defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3617defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3618defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3619defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3620defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3621defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3622defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3623defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3624defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3625defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003626
3627// A masked extract from the first 256-bits of a 512-bit vector can be
3628// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003629defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3630defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3631defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3632defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3633defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3634defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3635defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3636defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3637defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3638defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3639defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3640defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003641}
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003642
3643// Move Int Doubleword to Packed Double Int
3644//
3645let ExeDomain = SSEPackedInt in {
3646def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3647 "vmovd\t{$src, $dst|$dst, $src}",
3648 [(set VR128X:$dst,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003649 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003650 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003651def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003652 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003653 [(set VR128X:$dst,
3654 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003655 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003656def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003657 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003658 [(set VR128X:$dst,
3659 (v2i64 (scalar_to_vector GR64:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003660 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperc648c9b2015-12-28 06:11:42 +00003661let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3662def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3663 (ins i64mem:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003664 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3665 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00003666let isCodeGenOnly = 1 in {
Craig Topperaf88afb2015-12-28 06:11:45 +00003667def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003668 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003669 [(set FR64X:$dst, (bitconvert GR64:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003670 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper5971b542017-02-12 18:47:44 +00003671def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3672 "vmovq\t{$src, $dst|$dst, $src}",
3673 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003674 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003675def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003676 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003677 [(set GR64:$dst, (bitconvert FR64X:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003678 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003679def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003680 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003681 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003682 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3683 EVEX_CD8<64, CD8VT1>;
3684}
3685} // ExeDomain = SSEPackedInt
3686
3687// Move Int Doubleword to Single Scalar
3688//
3689let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3690def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3691 "vmovd\t{$src, $dst|$dst, $src}",
3692 [(set FR32X:$dst, (bitconvert GR32:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003693 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003694
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003695def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003696 "vmovd\t{$src, $dst|$dst, $src}",
3697 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003698 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003699} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3700
3701// Move doubleword from xmm register to r/m32
3702//
3703let ExeDomain = SSEPackedInt in {
3704def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3705 "vmovd\t{$src, $dst|$dst, $src}",
3706 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003707 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003708 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003709def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003710 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003711 "vmovd\t{$src, $dst|$dst, $src}",
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003712 [(store (i32 (extractelt (v4i32 VR128X:$src),
3713 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003714 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003715} // ExeDomain = SSEPackedInt
3716
3717// Move quadword from xmm1 register to r/m64
3718//
3719let ExeDomain = SSEPackedInt in {
3720def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3721 "vmovq\t{$src, $dst|$dst, $src}",
3722 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003723 (iPTR 0)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003724 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003725 Requires<[HasAVX512, In64BitMode]>;
3726
Craig Topperc648c9b2015-12-28 06:11:42 +00003727let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3728def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3729 "vmovq\t{$src, $dst|$dst, $src}",
Simon Pilgrim75673942017-12-06 11:23:13 +00003730 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003731 Requires<[HasAVX512, In64BitMode]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003732
Craig Topperc648c9b2015-12-28 06:11:42 +00003733def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3734 (ins i64mem:$dst, VR128X:$src),
3735 "vmovq\t{$src, $dst|$dst, $src}",
3736 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3737 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topper401675c2015-12-28 06:32:47 +00003738 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003739 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3740
3741let hasSideEffects = 0 in
3742def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003743 (ins VR128X:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003744 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3745 EVEX, VEX_W, Sched<[WriteMove]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003746} // ExeDomain = SSEPackedInt
3747
3748// Move Scalar Single to Double Int
3749//
3750let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3751def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3752 (ins FR32X:$src),
3753 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003754 [(set GR32:$dst, (bitconvert FR32X:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003755 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003756def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003757 (ins i32mem:$dst, FR32X:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003758 "vmovd\t{$src, $dst|$dst, $src}",
3759 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
Simon Pilgrim75673942017-12-06 11:23:13 +00003760 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003761} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3762
3763// Move Quadword Int to Packed Quadword Int
3764//
3765let ExeDomain = SSEPackedInt in {
3766def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3767 (ins i64mem:$src),
3768 "vmovq\t{$src, $dst|$dst, $src}",
3769 [(set VR128X:$dst,
3770 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003771 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003772} // ExeDomain = SSEPackedInt
3773
Craig Topper29476ab2018-01-05 21:57:23 +00003774// Allow "vmovd" but print "vmovq".
3775def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3776 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3777def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3778 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3779
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003780//===----------------------------------------------------------------------===//
3781// AVX-512 MOVSS, MOVSD
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003782//===----------------------------------------------------------------------===//
3783
Craig Topperc7de3a12016-07-29 02:49:08 +00003784multiclass avx512_move_scalar<string asm, SDNode OpNode,
Asaf Badouh41ecf462015-12-06 13:26:56 +00003785 X86VectorVTInfo _> {
Craig Topperc7de3a12016-07-29 02:49:08 +00003786 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003787 (ins _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003788 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Craig Topper6fb55712017-10-04 17:20:12 +00003789 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003790 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003791 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003792 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003793 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3794 "$dst {${mask}} {z}, $src1, $src2}"),
3795 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003796 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003797 _.ImmAllZerosV)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003798 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003799 let Constraints = "$src0 = $dst" in
3800 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003801 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003802 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3803 "$dst {${mask}}, $src1, $src2}"),
3804 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003805 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003806 (_.VT _.RC:$src0))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003807 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
Craig Toppere4f868e2016-07-29 06:06:04 +00003808 let canFoldAsLoad = 1, isReMaterializable = 1 in
Craig Topperc7de3a12016-07-29 02:49:08 +00003809 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3810 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3811 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003812 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003813 let mayLoad = 1, hasSideEffects = 0 in {
3814 let Constraints = "$src0 = $dst" in
3815 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3816 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3817 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3818 "$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003819 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003820 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3821 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3822 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3823 "$dst {${mask}} {z}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003824 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Asaf Badouh41ecf462015-12-06 13:26:56 +00003825 }
Craig Toppere1cac152016-06-07 07:27:54 +00003826 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3827 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3828 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003829 EVEX, Sched<[WriteStore]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003830 let mayStore = 1, hasSideEffects = 0 in
Craig Toppere1cac152016-06-07 07:27:54 +00003831 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3832 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3833 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003834 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003835}
3836
Asaf Badouh41ecf462015-12-06 13:26:56 +00003837defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3838 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003839
Asaf Badouh41ecf462015-12-06 13:26:56 +00003840defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3841 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003842
Ayman Musa46af8f92016-11-13 14:29:32 +00003843
3844multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3845 PatLeaf ZeroFP, X86VectorVTInfo _> {
3846
3847def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003848 (_.VT (scalar_to_vector
Craig Topper7bcac492018-02-24 00:15:05 +00003849 (_.EltVT (X86selects VK1WM:$mask,
Ayman Musa46af8f92016-11-13 14:29:32 +00003850 (_.EltVT _.FRC:$src1),
3851 (_.EltVT _.FRC:$src2))))))),
Craig Topper00230602017-10-01 23:53:50 +00003852 (!cast<Instruction>(InstrStr#rrk)
3853 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
Craig Topper7bcac492018-02-24 00:15:05 +00003854 VK1WM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003855 (_.VT _.RC:$src0),
3856 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003857
3858def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003859 (_.VT (scalar_to_vector
Craig Topper7bcac492018-02-24 00:15:05 +00003860 (_.EltVT (X86selects VK1WM:$mask,
Ayman Musa46af8f92016-11-13 14:29:32 +00003861 (_.EltVT _.FRC:$src1),
3862 (_.EltVT ZeroFP))))))),
Craig Topper00230602017-10-01 23:53:50 +00003863 (!cast<Instruction>(InstrStr#rrkz)
Craig Topper7bcac492018-02-24 00:15:05 +00003864 VK1WM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003865 (_.VT _.RC:$src0),
3866 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003867}
3868
3869multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3870 dag Mask, RegisterClass MaskRC> {
3871
3872def : Pat<(masked_store addr:$dst, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003873 (_.info512.VT (insert_subvector undef,
Ayman Musa46af8f92016-11-13 14:29:32 +00003874 (_.info256.VT (insert_subvector undef,
3875 (_.info128.VT _.info128.RC:$src),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003876 (iPTR 0))),
3877 (iPTR 0)))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003878 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003879 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003880 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003881
3882}
3883
Craig Topper058f2f62017-03-28 16:35:29 +00003884multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3885 AVX512VLVectorVTInfo _,
3886 dag Mask, RegisterClass MaskRC,
3887 SubRegIndex subreg> {
3888
3889def : Pat<(masked_store addr:$dst, Mask,
3890 (_.info512.VT (insert_subvector undef,
3891 (_.info256.VT (insert_subvector undef,
3892 (_.info128.VT _.info128.RC:$src),
3893 (iPTR 0))),
3894 (iPTR 0)))),
3895 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003896 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003897 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3898
3899}
3900
Ayman Musa46af8f92016-11-13 14:29:32 +00003901multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3902 dag Mask, RegisterClass MaskRC> {
3903
3904def : Pat<(_.info128.VT (extract_subvector
3905 (_.info512.VT (masked_load addr:$srcAddr, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003906 (_.info512.VT (bitconvert
Ayman Musa46af8f92016-11-13 14:29:32 +00003907 (v16i32 immAllZerosV))))),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003908 (iPTR 0))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003909 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003910 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003911 addr:$srcAddr)>;
3912
3913def : Pat<(_.info128.VT (extract_subvector
3914 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3915 (_.info512.VT (insert_subvector undef,
3916 (_.info256.VT (insert_subvector undef,
3917 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003918 (iPTR 0))),
3919 (iPTR 0))))),
3920 (iPTR 0))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003921 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003922 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003923 addr:$srcAddr)>;
3924
3925}
3926
Craig Topper058f2f62017-03-28 16:35:29 +00003927multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3928 AVX512VLVectorVTInfo _,
3929 dag Mask, RegisterClass MaskRC,
3930 SubRegIndex subreg> {
3931
3932def : Pat<(_.info128.VT (extract_subvector
3933 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3934 (_.info512.VT (bitconvert
3935 (v16i32 immAllZerosV))))),
3936 (iPTR 0))),
3937 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003938 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003939 addr:$srcAddr)>;
3940
3941def : Pat<(_.info128.VT (extract_subvector
3942 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3943 (_.info512.VT (insert_subvector undef,
3944 (_.info256.VT (insert_subvector undef,
3945 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3946 (iPTR 0))),
3947 (iPTR 0))))),
3948 (iPTR 0))),
3949 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003950 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003951 addr:$srcAddr)>;
3952
3953}
3954
Ayman Musa46af8f92016-11-13 14:29:32 +00003955defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3956defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3957
3958defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3959 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003960defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3961 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3962defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3963 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003964
3965defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3966 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003967defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3968 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3969defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3970 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003971
Craig Topper61d6ddb2018-02-23 20:13:42 +00003972def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
Guy Blankb169d56d2017-07-31 08:26:14 +00003973 (f32 FR32X:$src1), (f32 FR32X:$src2))),
3974 (COPY_TO_REGCLASS
3975 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3976 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3977 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003978 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3979 FR32X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003980
Craig Topper74ed0872016-05-18 06:55:59 +00003981def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003982 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003983 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3984 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003985
Craig Topper61d6ddb2018-02-23 20:13:42 +00003986def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
Guy Blankb169d56d2017-07-31 08:26:14 +00003987 (f64 FR64X:$src1), (f64 FR64X:$src2))),
3988 (COPY_TO_REGCLASS
3989 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3990 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3991 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003992 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3993 FR64X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003994
Craig Topper74ed0872016-05-18 06:55:59 +00003995def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003996 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003997 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3998 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003999
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00004000def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
Guy Blank548e22a2017-05-19 12:35:15 +00004001 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00004002 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
4003
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004004let hasSideEffects = 0 in {
Simon Pilgrim64fff142017-07-16 18:37:23 +00004005 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004006 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004007 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004008 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
4009 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
Igor Breger4424aaa2015-11-19 07:58:33 +00004010
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004011let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00004012 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4013 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00004014 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004015 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4016 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004017 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4018 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
Simon Pilgrim64fff142017-07-16 18:37:23 +00004019
4020 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004021 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004022 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4023 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004024 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4025 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004026
Simon Pilgrim64fff142017-07-16 18:37:23 +00004027 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004028 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004029 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004030 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
4031 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004032
4033let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00004034 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4035 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00004036 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004037 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4038 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004039 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4040 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004041
Simon Pilgrim64fff142017-07-16 18:37:23 +00004042 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4043 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
Craig Topper6fb55712017-10-04 17:20:12 +00004044 VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004045 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4046 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004047 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4048 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004049}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004050
4051let Predicates = [HasAVX512] in {
4052 let AddedComplexity = 15 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004053 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00004054 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004055 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00004056 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004057 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
Craig Topper6fb55712017-10-04 17:20:12 +00004058 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4059 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
Craig Topper3f8126e2016-08-13 05:43:20 +00004060 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004061
4062 // Move low f32 and clear high bits.
4063 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4064 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004065 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004066 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
4067 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4068 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004069 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004070 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004071 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4072 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004073 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004074 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4075 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4076 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004077 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004078 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004079
4080 let AddedComplexity = 20 in {
4081 // MOVSSrm zeros the high parts of the register; represent this
4082 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4083 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4084 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4085 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4086 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4087 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4088 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004089 def : Pat<(v4f32 (X86vzload addr:$src)),
4090 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004091
4092 // MOVSDrm zeros the high parts of the register; represent this
4093 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4094 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4095 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4096 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4097 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4098 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4099 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4100 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4101 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4102 def : Pat<(v2f64 (X86vzload addr:$src)),
4103 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4104
4105 // Represent the same patterns above but in the form they appear for
4106 // 256-bit types
4107 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4108 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004109 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004110 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4111 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4112 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004113 def : Pat<(v8f32 (X86vzload addr:$src)),
4114 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004115 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4116 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4117 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004118 def : Pat<(v4f64 (X86vzload addr:$src)),
4119 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004120
4121 // Represent the same patterns above but in the form they appear for
4122 // 512-bit types
4123 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4124 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4125 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4126 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4127 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4128 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004129 def : Pat<(v16f32 (X86vzload addr:$src)),
4130 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004131 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4132 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4133 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004134 def : Pat<(v8f64 (X86vzload addr:$src)),
4135 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004136 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004137 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4138 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004139 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004140
4141 // Move low f64 and clear high bits.
4142 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4143 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004144 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004145 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004146 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4147 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004148 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004149 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004150
4151 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004152 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004153 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004154 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004155 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004156 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004157
4158 // Extract and store.
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +00004159 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004160 addr:$dst),
4161 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004162
4163 // Shuffle with VMOVSS
4164 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004165 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4166
4167 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4168 (VMOVSSZrr VR128X:$src1,
4169 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004170
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004171 // Shuffle with VMOVSD
4172 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004173 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4174
4175 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4176 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004177
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004178 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004179 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004180 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004181 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004182}
4183
Simon Pilgrim369e59d2018-02-12 16:18:36 +00004184let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004185let AddedComplexity = 15 in
4186def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4187 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00004188 "vmovq\t{$src, $dst|$dst, $src}",
Michael Liao5bf95782014-12-04 05:20:33 +00004189 [(set VR128X:$dst, (v2i64 (X86vzmovl
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004190 (v2i64 VR128X:$src))))],
4191 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
Simon Pilgrim369e59d2018-02-12 16:18:36 +00004192}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004193
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004194let Predicates = [HasAVX512] in {
Craig Topperde549852016-05-22 06:09:34 +00004195 let AddedComplexity = 15 in {
4196 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4197 (VMOVDI2PDIZrr GR32:$src)>;
4198
4199 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4200 (VMOV64toPQIZrr GR64:$src)>;
4201
4202 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4203 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4204 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004205
4206 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4207 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4208 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperde549852016-05-22 06:09:34 +00004209 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004210 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4211 let AddedComplexity = 20 in {
Simon Pilgrima4c350f2017-02-17 20:43:32 +00004212 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4213 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004214 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4215 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004216 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4217 (VMOVDI2PDIZrm addr:$src)>;
4218 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4219 (VMOVDI2PDIZrm addr:$src)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004220 def : Pat<(v4i32 (X86vzload addr:$src)),
4221 (VMOVDI2PDIZrm addr:$src)>;
4222 def : Pat<(v8i32 (X86vzload addr:$src)),
4223 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004224 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004225 (VMOVQI2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004226 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004227 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00004228 def : Pat<(v2i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004229 (VMOVQI2PQIZrm addr:$src)>;
Craig Topperde549852016-05-22 06:09:34 +00004230 def : Pat<(v4i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004231 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004232 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00004233
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004234 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4235 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4236 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4237 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004238 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4239 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4240 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4241
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004242 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004243 def : Pat<(v16i32 (X86vzload addr:$src)),
4244 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004245 def : Pat<(v8i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004246 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004247}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004248//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00004249// AVX-512 - Non-temporals
4250//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00004251let SchedRW = [WriteLoad] in {
4252 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4253 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004254 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
Robert Khasanoved882972014-08-13 10:46:00 +00004255 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004256
Craig Topper2f90c1f2016-06-07 07:27:57 +00004257 let Predicates = [HasVLX] in {
Robert Khasanoved882972014-08-13 10:46:00 +00004258 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004259 (ins i256mem:$src),
4260 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004261 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004262 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004263
Robert Khasanoved882972014-08-13 10:46:00 +00004264 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004265 (ins i128mem:$src),
4266 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004267 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004268 EVEX_CD8<64, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004269 }
Adam Nemetefd07852014-06-18 16:51:10 +00004270}
4271
Igor Bregerd3341f52016-01-20 13:11:47 +00004272multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4273 PatFrag st_frag = alignednontemporalstore,
4274 InstrItinClass itin = IIC_SSE_MOVNT> {
Craig Toppere1cac152016-06-07 07:27:54 +00004275 let SchedRW = [WriteStore], AddedComplexity = 400 in
Igor Bregerd3341f52016-01-20 13:11:47 +00004276 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanoved882972014-08-13 10:46:00 +00004277 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Igor Bregerd3341f52016-01-20 13:11:47 +00004278 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4279 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004280}
4281
Igor Bregerd3341f52016-01-20 13:11:47 +00004282multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4283 AVX512VLVectorVTInfo VTInfo> {
4284 let Predicates = [HasAVX512] in
4285 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
Robert Khasanoved882972014-08-13 10:46:00 +00004286
Igor Bregerd3341f52016-01-20 13:11:47 +00004287 let Predicates = [HasAVX512, HasVLX] in {
4288 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4289 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
Robert Khasanoved882972014-08-13 10:46:00 +00004290 }
4291}
4292
Igor Bregerd3341f52016-01-20 13:11:47 +00004293defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4294defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4295defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
Robert Khasanoved882972014-08-13 10:46:00 +00004296
Craig Topper707c89c2016-05-08 23:43:17 +00004297let Predicates = [HasAVX512], AddedComplexity = 400 in {
4298 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4299 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4300 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4301 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4302 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4303 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004304
4305 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4306 (VMOVNTDQAZrm addr:$src)>;
4307 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4308 (VMOVNTDQAZrm addr:$src)>;
4309 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4310 (VMOVNTDQAZrm addr:$src)>;
Craig Topper707c89c2016-05-08 23:43:17 +00004311}
4312
Craig Topperc41320d2016-05-08 23:08:45 +00004313let Predicates = [HasVLX], AddedComplexity = 400 in {
4314 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4315 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4316 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4317 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4318 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4319 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4320
Simon Pilgrim9a896232016-06-07 13:34:24 +00004321 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4322 (VMOVNTDQAZ256rm addr:$src)>;
4323 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4324 (VMOVNTDQAZ256rm addr:$src)>;
4325 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4326 (VMOVNTDQAZ256rm addr:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004327
Craig Topperc41320d2016-05-08 23:08:45 +00004328 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4329 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4330 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4331 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4332 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4333 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004334
4335 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4336 (VMOVNTDQAZ128rm addr:$src)>;
4337 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4338 (VMOVNTDQAZ128rm addr:$src)>;
4339 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4340 (VMOVNTDQAZ128rm addr:$src)>;
Craig Topperc41320d2016-05-08 23:08:45 +00004341}
4342
Adam Nemet7f62b232014-06-10 16:39:53 +00004343//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004344// AVX-512 - Integer arithmetic
4345//
4346multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanov44241442014-10-08 14:37:45 +00004347 X86VectorVTInfo _, OpndItins itins,
4348 bit IsCommutable = 0> {
Adam Nemet34801422014-10-08 23:25:39 +00004349 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Bregerf2460112015-07-26 14:41:44 +00004350 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Robert Khasanov44241442014-10-08 14:37:45 +00004351 "$src2, $src1", "$src1, $src2",
4352 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004353 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4354 Sched<[itins.Sched]>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004355
Craig Toppere1cac152016-06-07 07:27:54 +00004356 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4357 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4358 "$src2, $src1", "$src1, $src2",
4359 (_.VT (OpNode _.RC:$src1,
4360 (bitconvert (_.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004361 itins.rm>, AVX512BIBase, EVEX_4V,
4362 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004363}
4364
4365multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4366 X86VectorVTInfo _, OpndItins itins,
4367 bit IsCommutable = 0> :
4368 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
Craig Toppere1cac152016-06-07 07:27:54 +00004369 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4370 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4371 "${src2}"##_.BroadcastStr##", $src1",
4372 "$src1, ${src2}"##_.BroadcastStr,
4373 (_.VT (OpNode _.RC:$src1,
4374 (X86VBroadcast
4375 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004376 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4377 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004378}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004379
Robert Khasanovd5b14f72014-10-09 08:38:48 +00004380multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4381 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4382 Predicate prd, bit IsCommutable = 0> {
4383 let Predicates = [prd] in
4384 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4385 IsCommutable>, EVEX_V512;
4386
4387 let Predicates = [prd, HasVLX] in {
4388 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4389 IsCommutable>, EVEX_V256;
4390 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4391 IsCommutable>, EVEX_V128;
4392 }
4393}
4394
Robert Khasanov545d1b72014-10-14 14:36:19 +00004395multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4396 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4397 Predicate prd, bit IsCommutable = 0> {
4398 let Predicates = [prd] in
4399 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4400 IsCommutable>, EVEX_V512;
4401
4402 let Predicates = [prd, HasVLX] in {
4403 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4404 IsCommutable>, EVEX_V256;
4405 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4406 IsCommutable>, EVEX_V128;
4407 }
4408}
4409
4410multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4411 OpndItins itins, Predicate prd,
4412 bit IsCommutable = 0> {
4413 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4414 itins, prd, IsCommutable>,
4415 VEX_W, EVEX_CD8<64, CD8VF>;
4416}
4417
4418multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4419 OpndItins itins, Predicate prd,
4420 bit IsCommutable = 0> {
4421 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4422 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4423}
4424
4425multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4426 OpndItins itins, Predicate prd,
4427 bit IsCommutable = 0> {
4428 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004429 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4430 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004431}
4432
4433multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4434 OpndItins itins, Predicate prd,
4435 bit IsCommutable = 0> {
4436 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004437 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4438 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004439}
4440
4441multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4442 SDNode OpNode, OpndItins itins, Predicate prd,
4443 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004444 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004445 IsCommutable>;
4446
Igor Bregerf2460112015-07-26 14:41:44 +00004447 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004448 IsCommutable>;
4449}
4450
4451multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4452 SDNode OpNode, OpndItins itins, Predicate prd,
4453 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004454 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004455 IsCommutable>;
4456
Igor Bregerf2460112015-07-26 14:41:44 +00004457 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004458 IsCommutable>;
4459}
4460
4461multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4462 bits<8> opc_d, bits<8> opc_q,
4463 string OpcodeStr, SDNode OpNode,
4464 OpndItins itins, bit IsCommutable = 0> {
4465 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4466 itins, HasAVX512, IsCommutable>,
4467 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4468 itins, HasBWI, IsCommutable>;
4469}
4470
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004471multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
Michael Liao66233b72015-08-06 09:06:20 +00004472 SDNode OpNode,X86VectorVTInfo _Src,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004473 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4474 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004475 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004476 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004477 "$src2, $src1","$src1, $src2",
4478 (_Dst.VT (OpNode
4479 (_Src.VT _Src.RC:$src1),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004480 (_Src.VT _Src.RC:$src2))),
Michael Liao66233b72015-08-06 09:06:20 +00004481 itins.rr, IsCommutable>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004482 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004483 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4484 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4485 "$src2, $src1", "$src1, $src2",
4486 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4487 (bitconvert (_Src.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004488 itins.rm>, AVX512BIBase, EVEX_4V,
4489 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004490
4491 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Coby Tayree99a66392016-11-20 17:19:55 +00004492 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
Craig Toppere1cac152016-06-07 07:27:54 +00004493 OpcodeStr,
4494 "${src2}"##_Brdct.BroadcastStr##", $src1",
Coby Tayree99a66392016-11-20 17:19:55 +00004495 "$src1, ${src2}"##_Brdct.BroadcastStr,
Craig Toppere1cac152016-06-07 07:27:54 +00004496 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4497 (_Brdct.VT (X86VBroadcast
4498 (_Brdct.ScalarLdFrag addr:$src2)))))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004499 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4500 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004501}
4502
Robert Khasanov545d1b72014-10-14 14:36:19 +00004503defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4504 SSE_INTALU_ITINS_P, 1>;
4505defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4506 SSE_INTALU_ITINS_P, 0>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004507defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4508 SSE_INTALU_ITINS_P, HasBWI, 1>;
4509defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4510 SSE_INTALU_ITINS_P, HasBWI, 0>;
4511defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
Michael Liao66233b72015-08-06 09:06:20 +00004512 SSE_INTALU_ITINS_P, HasBWI, 1>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004513defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
Michael Liao66233b72015-08-06 09:06:20 +00004514 SSE_INTALU_ITINS_P, HasBWI, 0>;
Igor Bregerf2460112015-07-26 14:41:44 +00004515defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004516 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004517defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004518 SSE_INTMUL_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004519defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004520 SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4521defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
Asaf Badouh73f26f82015-07-05 12:23:20 +00004522 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004523defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004524 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004525defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004526 HasBWI, 1>, T8PD;
Asaf Badouh81f03c32015-06-18 12:30:53 +00004527defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
Michael Liao66233b72015-08-06 09:06:20 +00004528 SSE_INTALU_ITINS_P, HasBWI, 1>;
Craig Toppera4067962018-03-08 08:02:52 +00004529defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4530 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
4531defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4532 SSE_INTMUL_ITINS_P, HasAVX512, 1>;
Michael Liao66233b72015-08-06 09:06:20 +00004533
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004534multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004535 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4536 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4537 let Predicates = [prd] in
4538 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4539 _SrcVTInfo.info512, _DstVTInfo.info512,
4540 v8i64_info, IsCommutable>,
4541 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4542 let Predicates = [HasVLX, prd] in {
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004543 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004544 _SrcVTInfo.info256, _DstVTInfo.info256,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004545 v4i64x_info, IsCommutable>,
4546 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004547 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004548 _SrcVTInfo.info128, _DstVTInfo.info128,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004549 v2i64x_info, IsCommutable>,
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004550 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4551 }
Michael Liao66233b72015-08-06 09:06:20 +00004552}
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004553
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004554defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4555 avx512vl_i8_info, avx512vl_i8_info,
4556 X86multishift, HasVBMI, 0>, T8PD;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004557
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004558multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004559 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4560 OpndItins itins> {
Craig Toppere1cac152016-06-07 07:27:54 +00004561 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4562 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4563 OpcodeStr,
4564 "${src2}"##_Src.BroadcastStr##", $src1",
4565 "$src1, ${src2}"##_Src.BroadcastStr,
4566 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4567 (_Src.VT (X86VBroadcast
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004568 (_Src.ScalarLdFrag addr:$src2)))))),
4569 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4570 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004571}
4572
Michael Liao66233b72015-08-06 09:06:20 +00004573multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4574 SDNode OpNode,X86VectorVTInfo _Src,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004575 X86VectorVTInfo _Dst, OpndItins itins,
4576 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004577 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004578 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004579 "$src2, $src1","$src1, $src2",
4580 (_Dst.VT (OpNode
4581 (_Src.VT _Src.RC:$src1),
Craig Topper37e8c542016-08-14 17:57:22 +00004582 (_Src.VT _Src.RC:$src2))),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004583 itins.rr, IsCommutable>,
4584 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004585 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4586 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4587 "$src2, $src1", "$src1, $src2",
4588 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004589 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4590 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4591 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004592}
4593
4594multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4595 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004596 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004597 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004598 v32i16_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004599 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004600 v32i16_info, SSE_PACK>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004601 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004602 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004603 v16i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004604 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004605 v16i16x_info, SSE_PACK>, EVEX_V256;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004606 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004607 v8i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004608 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004609 v8i16x_info, SSE_PACK>, EVEX_V128;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004610 }
4611}
4612multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4613 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004614 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004615 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004616 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004617 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004618 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004619 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004620 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004621 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004622 }
4623}
Igor Bregerf7fd5472015-07-21 07:11:28 +00004624
4625multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4626 SDNode OpNode, AVX512VLVectorVTInfo _Src,
Craig Topper37e8c542016-08-14 17:57:22 +00004627 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004628 let Predicates = [HasBWI] in
Igor Bregerf7fd5472015-07-21 07:11:28 +00004629 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004630 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004631 let Predicates = [HasBWI, HasVLX] in {
Igor Bregerf7fd5472015-07-21 07:11:28 +00004632 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004633 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004634 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004635 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004636 }
4637}
4638
Craig Topperb6da6542016-05-01 17:38:32 +00004639defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4640defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4641defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4642defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004643
Craig Topper5acb5a12016-05-01 06:24:57 +00004644defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
Craig Toppera33846a2017-10-22 06:18:23 +00004645 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004646defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
Craig Toppera33846a2017-10-22 06:18:23 +00004647 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004648
Igor Bregerf2460112015-07-26 14:41:44 +00004649defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004650 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004651defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004652 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004653defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004654 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004655
Igor Bregerf2460112015-07-26 14:41:44 +00004656defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004657 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004658defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004659 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004660defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004661 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004662
Igor Bregerf2460112015-07-26 14:41:44 +00004663defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004664 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004665defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004666 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004667defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004668 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004669
Igor Bregerf2460112015-07-26 14:41:44 +00004670defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004671 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004672defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004673 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004674defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004675 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Craig Topperabe80cc2016-08-28 06:06:28 +00004676
Simon Pilgrim47c1ff72016-10-27 17:07:40 +00004677// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4678let Predicates = [HasDQI, NoVLX] in {
4679 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4680 (EXTRACT_SUBREG
4681 (VPMULLQZrr
4682 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4683 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4684 sub_ymm)>;
4685
4686 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4687 (EXTRACT_SUBREG
4688 (VPMULLQZrr
4689 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4690 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4691 sub_xmm)>;
4692}
4693
Craig Topper4520d4f2017-12-04 07:21:01 +00004694// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4695let Predicates = [HasDQI, NoVLX] in {
4696 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4697 (EXTRACT_SUBREG
4698 (VPMULLQZrr
4699 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4700 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4701 sub_ymm)>;
4702
4703 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4704 (EXTRACT_SUBREG
4705 (VPMULLQZrr
4706 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4707 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4708 sub_xmm)>;
4709}
4710
4711multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4712 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4713 (EXTRACT_SUBREG
4714 (Instr
4715 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4716 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4717 sub_ymm)>;
4718
4719 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4720 (EXTRACT_SUBREG
4721 (Instr
4722 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4723 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4724 sub_xmm)>;
4725}
4726
Craig Topper694c73a2018-01-01 01:11:32 +00004727let Predicates = [HasAVX512, NoVLX] in {
Craig Topper4520d4f2017-12-04 07:21:01 +00004728 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4729 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4730 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4731 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4732}
4733
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004734//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004735// AVX-512 Logical Instructions
4736//===----------------------------------------------------------------------===//
4737
Craig Topperafce0ba2017-08-30 16:38:33 +00004738// OpNodeMsk is the OpNode to use when element size is important. OpNode will
4739// be set to null_frag for 32-bit elements.
4740multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4741 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004742 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004743 bit IsCommutable = 0> {
4744 let hasSideEffects = 0 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004745 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4746 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4747 "$src2, $src1", "$src1, $src2",
4748 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4749 (bitconvert (_.VT _.RC:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004750 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4751 _.RC:$src2)))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004752 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4753 Sched<[itins.Sched]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004754
Craig Topperafce0ba2017-08-30 16:38:33 +00004755 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004756 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4757 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4758 "$src2, $src1", "$src1, $src2",
4759 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4760 (bitconvert (_.LdFrag addr:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004761 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004762 (bitconvert (_.LdFrag addr:$src2)))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004763 itins.rm>, AVX512BIBase, EVEX_4V,
4764 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004765}
4766
Craig Topperafce0ba2017-08-30 16:38:33 +00004767// OpNodeMsk is the OpNode to use where element size is important. So use
4768// for all of the broadcast patterns.
4769multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4770 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004771 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004772 bit IsCommutable = 0> :
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004773 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4774 IsCommutable> {
Craig Topperabe80cc2016-08-28 06:06:28 +00004775 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4776 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4777 "${src2}"##_.BroadcastStr##", $src1",
4778 "$src1, ${src2}"##_.BroadcastStr,
Craig Topperafce0ba2017-08-30 16:38:33 +00004779 (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004780 (bitconvert
4781 (_.VT (X86VBroadcast
4782 (_.ScalarLdFrag addr:$src2)))))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004783 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004784 (bitconvert
4785 (_.VT (X86VBroadcast
4786 (_.ScalarLdFrag addr:$src2)))))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004787 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4788 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004789}
4790
Craig Topperafce0ba2017-08-30 16:38:33 +00004791multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4792 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004793 SDNode OpNodeMsk, OpndItins itins,
4794 AVX512VLVectorVTInfo VTInfo,
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004795 bit IsCommutable = 0> {
4796 let Predicates = [HasAVX512] in
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004797 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4798 VTInfo.info512, IsCommutable>, EVEX_V512;
Craig Topperabe80cc2016-08-28 06:06:28 +00004799
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004800 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004801 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004802 VTInfo.info256, IsCommutable>, EVEX_V256;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004803 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004804 VTInfo.info128, IsCommutable>, EVEX_V128;
Craig Topperabe80cc2016-08-28 06:06:28 +00004805 }
4806}
4807
Craig Topperabe80cc2016-08-28 06:06:28 +00004808multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004809 SDNode OpNode, OpndItins itins,
4810 bit IsCommutable = 0> {
4811 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004812 avx512vl_i64_info, IsCommutable>,
4813 VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004814 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004815 avx512vl_i32_info, IsCommutable>,
4816 EVEX_CD8<32, CD8VF>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004817}
4818
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004819defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4820defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4821defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4822defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004823
4824//===----------------------------------------------------------------------===//
4825// AVX-512 FP arithmetic
4826//===----------------------------------------------------------------------===//
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004827multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4828 SDNode OpNode, SDNode VecNode, OpndItins itins,
4829 bit IsCommutable> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004830 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004831 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4832 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4833 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004834 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4835 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004836 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004837
4838 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00004839 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004840 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004841 (_.VT (VecNode _.RC:$src1,
4842 _.ScalarIntMemCPat:$src2,
4843 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004844 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper79011a62016-07-26 08:06:18 +00004845 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004846 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004847 (ins _.FRC:$src1, _.FRC:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004848 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4849 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004850 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004851 let isCommutable = IsCommutable;
4852 }
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004853 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004854 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004855 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4856 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004857 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4858 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004859 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004860 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004861}
4862
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004863multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00004864 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004865 let ExeDomain = _.ExeDomain in
Craig Topperda7e78e2017-12-10 04:07:28 +00004866 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004867 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4868 "$rc, $src2, $src1", "$src1, $src2, $rc",
4869 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004870 (i32 imm:$rc)), itins.rr, IsCommutable>,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004871 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004872}
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004873multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Craig Topper56d40222017-02-22 06:54:18 +00004874 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4875 OpndItins itins, bit IsCommutable> {
4876 let ExeDomain = _.ExeDomain in {
4877 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4878 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4879 "$src2, $src1", "$src1, $src2",
4880 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004881 itins.rr>, Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004882
4883 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4884 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4885 "$src2, $src1", "$src1, $src2",
4886 (_.VT (VecNode _.RC:$src1,
4887 _.ScalarIntMemCPat:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004888 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004889
4890 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4891 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4892 (ins _.FRC:$src1, _.FRC:$src2),
4893 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4894 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004895 itins.rr>, Sched<[itins.Sched]> {
Craig Topper56d40222017-02-22 06:54:18 +00004896 let isCommutable = IsCommutable;
4897 }
4898 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4899 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4900 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4901 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004902 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4903 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004904 }
4905
Craig Topperda7e78e2017-12-10 04:07:28 +00004906 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004907 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004908 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Topper56d40222017-02-22 06:54:18 +00004909 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +00004910 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4911 Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004912 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004913}
4914
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004915multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4916 SDNode VecNode,
4917 SizeItins itins, bit IsCommutable> {
4918 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4919 itins.s, IsCommutable>,
4920 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4921 itins.s, IsCommutable>,
4922 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4923 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4924 itins.d, IsCommutable>,
4925 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4926 itins.d, IsCommutable>,
4927 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4928}
4929
4930multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper56d40222017-02-22 06:54:18 +00004931 SDNode VecNode, SDNode SaeNode,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004932 SizeItins itins, bit IsCommutable> {
Craig Topper56d40222017-02-22 06:54:18 +00004933 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4934 VecNode, SaeNode, itins.s, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004935 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper56d40222017-02-22 06:54:18 +00004936 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4937 VecNode, SaeNode, itins.d, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004938 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4939}
Craig Topper8783bbb2017-02-24 07:21:10 +00004940defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4941defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4942defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4943defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4944defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004945 SSE_ALU_ITINS_S, 0>;
Craig Topper8783bbb2017-02-24 07:21:10 +00004946defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004947 SSE_ALU_ITINS_S, 0>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004948
4949// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4950// X86fminc and X86fmaxc instead of X86fmin and X86fmax
4951multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4952 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
Craig Topper03669332017-02-26 06:45:56 +00004953 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004954 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4955 (ins _.FRC:$src1, _.FRC:$src2),
4956 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4957 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004958 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004959 let isCommutable = 1;
4960 }
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004961 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4962 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4963 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4964 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004965 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4966 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004967 }
4968}
4969defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4970 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4971 EVEX_CD8<32, CD8VT1>;
4972
4973defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4974 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4975 EVEX_CD8<64, CD8VT1>;
4976
4977defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4978 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4979 EVEX_CD8<32, CD8VT1>;
4980
4981defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4982 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4983 EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004984
Craig Topper375aa902016-12-19 00:42:28 +00004985multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00004986 X86VectorVTInfo _, OpndItins itins,
4987 bit IsCommutable> {
Craig Topper375aa902016-12-19 00:42:28 +00004988 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004989 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4990 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4991 "$src2, $src1", "$src1, $src2",
Craig Topper9433f972016-08-02 06:16:53 +00004992 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004993 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper375aa902016-12-19 00:42:28 +00004994 let mayLoad = 1 in {
4995 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4996 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4997 "$src2, $src1", "$src1, $src2",
4998 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004999 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00005000 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5001 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5002 "${src2}"##_.BroadcastStr##", $src1",
5003 "$src1, ${src2}"##_.BroadcastStr,
5004 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5005 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005006 itins.rm>, EVEX_4V, EVEX_B,
5007 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00005008 }
Craig Topper5ec33a92016-07-22 05:00:42 +00005009 }
Robert Khasanov595e5982014-10-29 15:43:02 +00005010}
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005011
Craig Topper375aa902016-12-19 00:42:28 +00005012multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005013 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00005014 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00005015 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005016 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5017 "$rc, $src2, $src1", "$src1, $src2, $rc",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005018 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
5019 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005020}
5021
Craig Topper375aa902016-12-19 00:42:28 +00005022multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005023 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00005024 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00005025 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005026 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5027 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005028 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
5029 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005030}
5031
Craig Topper375aa902016-12-19 00:42:28 +00005032multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00005033 Predicate prd, SizeItins itins,
5034 bit IsCommutable = 0> {
Craig Topperdb290662016-05-01 05:57:06 +00005035 let Predicates = [prd] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005036 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
Craig Topper9433f972016-08-02 06:16:53 +00005037 itins.s, IsCommutable>, EVEX_V512, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005038 EVEX_CD8<32, CD8VF>;
5039 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
Craig Topper9433f972016-08-02 06:16:53 +00005040 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005041 EVEX_CD8<64, CD8VF>;
Craig Topperdb290662016-05-01 05:57:06 +00005042 }
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005043
Robert Khasanov595e5982014-10-29 15:43:02 +00005044 // Define only if AVX512VL feature is present.
Craig Topperdb290662016-05-01 05:57:06 +00005045 let Predicates = [prd, HasVLX] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005046 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005047 itins.s, IsCommutable>, EVEX_V128, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005048 EVEX_CD8<32, CD8VF>;
5049 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005050 itins.s, IsCommutable>, EVEX_V256, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005051 EVEX_CD8<32, CD8VF>;
5052 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005053 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005054 EVEX_CD8<64, CD8VF>;
5055 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005056 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005057 EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005058 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005059}
5060
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005061multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5062 SizeItins itins> {
5063 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005064 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005065 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005066 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5067}
5068
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005069multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5070 SizeItins itins> {
5071 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005072 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005073 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005074 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5075}
5076
Craig Topper9433f972016-08-02 06:16:53 +00005077defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5078 SSE_ALU_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005079 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005080defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5081 SSE_MUL_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005082 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005083defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005084 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005085defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005086 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005087defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5088 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005089 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005090defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5091 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005092 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
Igor Breger58c07802016-05-03 11:51:45 +00005093let isCodeGenOnly = 1 in {
Craig Topper9433f972016-08-02 06:16:53 +00005094 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5095 SSE_ALU_ITINS_P, 1>;
5096 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5097 SSE_ALU_ITINS_P, 1>;
Igor Breger58c07802016-05-03 11:51:45 +00005098}
Craig Topper375aa902016-12-19 00:42:28 +00005099defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005100 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005101defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005102 SSE_ALU_ITINS_P, 0>;
Craig Topper375aa902016-12-19 00:42:28 +00005103defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005104 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005105defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005106 SSE_ALU_ITINS_P, 1>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00005107
Craig Topper8f6827c2016-08-31 05:37:52 +00005108// Patterns catch floating point selects with bitcasted integer logic ops.
Craig Topper45d65032016-09-02 05:29:13 +00005109multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5110 X86VectorVTInfo _, Predicate prd> {
5111let Predicates = [prd] in {
5112 // Masked register-register logical operations.
5113 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5114 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5115 _.RC:$src0)),
5116 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5117 _.RC:$src1, _.RC:$src2)>;
5118 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5119 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5120 _.ImmAllZerosV)),
5121 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5122 _.RC:$src2)>;
5123 // Masked register-memory logical operations.
5124 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5125 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5126 (load addr:$src2)))),
5127 _.RC:$src0)),
5128 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5129 _.RC:$src1, addr:$src2)>;
5130 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5131 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5132 _.ImmAllZerosV)),
5133 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5134 addr:$src2)>;
5135 // Register-broadcast logical operations.
5136 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5137 (bitconvert (_.VT (X86VBroadcast
5138 (_.ScalarLdFrag addr:$src2)))))),
5139 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5140 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5141 (bitconvert
5142 (_.i64VT (OpNode _.RC:$src1,
5143 (bitconvert (_.VT
5144 (X86VBroadcast
5145 (_.ScalarLdFrag addr:$src2))))))),
5146 _.RC:$src0)),
5147 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5148 _.RC:$src1, addr:$src2)>;
5149 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5150 (bitconvert
5151 (_.i64VT (OpNode _.RC:$src1,
5152 (bitconvert (_.VT
5153 (X86VBroadcast
5154 (_.ScalarLdFrag addr:$src2))))))),
5155 _.ImmAllZerosV)),
5156 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5157 _.RC:$src1, addr:$src2)>;
5158}
Craig Topper8f6827c2016-08-31 05:37:52 +00005159}
5160
Craig Topper45d65032016-09-02 05:29:13 +00005161multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5162 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5163 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5164 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5165 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5166 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5167 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
Craig Topper8f6827c2016-08-31 05:37:52 +00005168}
5169
Craig Topper45d65032016-09-02 05:29:13 +00005170defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5171defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5172defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5173defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5174
Craig Topper2baef8f2016-12-18 04:17:00 +00005175let Predicates = [HasVLX,HasDQI] in {
Craig Topperd3295c62016-12-17 19:26:00 +00005176 // Use packed logical operations for scalar ops.
5177 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5178 (COPY_TO_REGCLASS (VANDPDZ128rr
5179 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5180 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5181 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5182 (COPY_TO_REGCLASS (VORPDZ128rr
5183 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5184 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5185 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5186 (COPY_TO_REGCLASS (VXORPDZ128rr
5187 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5188 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5189 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5190 (COPY_TO_REGCLASS (VANDNPDZ128rr
5191 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5192 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5193
5194 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5195 (COPY_TO_REGCLASS (VANDPSZ128rr
5196 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5197 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5198 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5199 (COPY_TO_REGCLASS (VORPSZ128rr
5200 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5201 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5202 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5203 (COPY_TO_REGCLASS (VXORPSZ128rr
5204 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5205 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5206 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5207 (COPY_TO_REGCLASS (VANDNPSZ128rr
5208 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5209 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5210}
5211
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005212multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005213 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005214 let ExeDomain = _.ExeDomain in {
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005215 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5216 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5217 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005218 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5219 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005220 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5221 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5222 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005223 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5224 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005225 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5226 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5227 "${src2}"##_.BroadcastStr##", $src1",
5228 "$src1, ${src2}"##_.BroadcastStr,
5229 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005230 (_.ScalarLdFrag addr:$src2))),
5231 (i32 FROUND_CURRENT)), itins.rm>,
5232 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005233 }
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005234}
5235
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005236multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005237 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005238 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005239 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5240 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5241 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005242 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5243 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005244 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00005245 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
Craig Toppere1cac152016-06-07 07:27:54 +00005246 "$src2, $src1", "$src1, $src2",
Craig Topper75d71542017-11-13 08:07:33 +00005247 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005248 (i32 FROUND_CURRENT)), itins.rm>,
5249 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005250 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005251}
5252
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005253multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005254 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5255 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005256 EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005257 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5258 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005259 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005260 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005261 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005262 EVEX_4V,EVEX_CD8<32, CD8VT1>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005263 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005264 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005265 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5266
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005267 // Define only if AVX512VL feature is present.
5268 let Predicates = [HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005269 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005270 EVEX_V128, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005271 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005272 EVEX_V256, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005273 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005274 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005275 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005276 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5277 }
5278}
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005279defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005280
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005281//===----------------------------------------------------------------------===//
5282// AVX-512 VPTESTM instructions
5283//===----------------------------------------------------------------------===//
5284
Craig Topper15d69732018-01-28 00:56:30 +00005285multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5286 OpndItins itins, X86VectorVTInfo _, string Suffix> {
Craig Topper1a093932017-11-11 06:19:12 +00005287 let ExeDomain = _.ExeDomain in {
Igor Breger639fde72016-03-03 14:18:38 +00005288 let isCommutable = 1 in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005289 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5290 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5291 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005292 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5293 _.ImmAllZerosV), itins.rr>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005294 EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005295 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5296 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5297 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005298 (OpNode (bitconvert
5299 (_.i64VT (and _.RC:$src1,
5300 (bitconvert (_.LdFrag addr:$src2))))),
5301 _.ImmAllZerosV),
5302 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005303 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper1a093932017-11-11 06:19:12 +00005304 }
Craig Topper15d69732018-01-28 00:56:30 +00005305
5306 // Patterns for compare with 0 that just use the same source twice.
5307 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5308 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
5309 _.RC:$src, _.RC:$src))>;
5310
5311 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5312 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
5313 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005314}
5315
Craig Topper15d69732018-01-28 00:56:30 +00005316multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005317 OpndItins itins, X86VectorVTInfo _> {
Craig Topper1a093932017-11-11 06:19:12 +00005318 let ExeDomain = _.ExeDomain in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005319 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5320 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5321 "${src2}"##_.BroadcastStr##", $src1",
5322 "$src1, ${src2}"##_.BroadcastStr,
Craig Topper15d69732018-01-28 00:56:30 +00005323 (OpNode (and _.RC:$src1,
5324 (X86VBroadcast
5325 (_.ScalarLdFrag addr:$src2))),
5326 _.ImmAllZerosV),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005327 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5328 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005329}
Igor Bregerfca0a342016-01-28 13:19:25 +00005330
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005331// Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topper15d69732018-01-28 00:56:30 +00005332multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00005333 X86VectorVTInfo _, string Suffix> {
Craig Topper15d69732018-01-28 00:56:30 +00005334 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5335 _.ImmAllZerosV)),
Craig Topper5e4b4532018-01-27 23:49:14 +00005336 (_.KVT (COPY_TO_REGCLASS
5337 (!cast<Instruction>(NAME # Suffix # "Zrr")
5338 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5339 _.RC:$src1, _.SubRegIdx),
5340 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5341 _.RC:$src2, _.SubRegIdx)),
5342 _.KRC))>;
5343
5344 def : Pat<(_.KVT (and _.KRC:$mask,
Craig Topper15d69732018-01-28 00:56:30 +00005345 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5346 _.ImmAllZerosV))),
Craig Topper5e4b4532018-01-27 23:49:14 +00005347 (COPY_TO_REGCLASS
5348 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5349 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5350 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5351 _.RC:$src1, _.SubRegIdx),
5352 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5353 _.RC:$src2, _.SubRegIdx)),
5354 _.KRC)>;
Craig Topper15d69732018-01-28 00:56:30 +00005355
5356 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5357 (_.KVT (COPY_TO_REGCLASS
5358 (!cast<Instruction>(NAME # Suffix # "Zrr")
5359 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5360 _.RC:$src, _.SubRegIdx),
5361 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5362 _.RC:$src, _.SubRegIdx)),
5363 _.KRC))>;
5364
5365 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5366 (COPY_TO_REGCLASS
5367 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5368 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5369 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5370 _.RC:$src, _.SubRegIdx),
5371 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5372 _.RC:$src, _.SubRegIdx)),
5373 _.KRC)>;
Igor Bregerfca0a342016-01-28 13:19:25 +00005374}
5375
Craig Topper15d69732018-01-28 00:56:30 +00005376multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005377 OpndItins itins, AVX512VLVectorVTInfo _,
5378 string Suffix> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005379 let Predicates = [HasAVX512] in
Craig Topper15d69732018-01-28 00:56:30 +00005380 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005381 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005382
5383 let Predicates = [HasAVX512, HasVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005384 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005385 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005386 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005387 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005388 }
Igor Bregerfca0a342016-01-28 13:19:25 +00005389 let Predicates = [HasAVX512, NoVLX] in {
5390 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5391 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005392 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005393}
5394
Craig Topper15d69732018-01-28 00:56:30 +00005395multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005396 OpndItins itins> {
5397 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005398 avx512vl_i32_info, "D">;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005399 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005400 avx512vl_i64_info, "Q">, VEX_W;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005401}
5402
5403multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005404 PatFrag OpNode, OpndItins itins> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005405 let Predicates = [HasBWI] in {
Craig Topper15d69732018-01-28 00:56:30 +00005406 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005407 EVEX_V512, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005408 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005409 EVEX_V512;
5410 }
5411 let Predicates = [HasVLX, HasBWI] in {
5412
Craig Topper15d69732018-01-28 00:56:30 +00005413 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005414 EVEX_V256, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005415 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005416 EVEX_V128, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005417 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005418 EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005419 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005420 EVEX_V128;
5421 }
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005422
Igor Bregerfca0a342016-01-28 13:19:25 +00005423 let Predicates = [HasAVX512, NoVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005424 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
5425 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
5426 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
5427 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005428 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005429}
5430
Craig Topper9471a7c2018-02-19 19:23:31 +00005431// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5432// as commutable here because we already canonicalized all zeros vectors to the
5433// RHS during lowering.
5434def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5435 (X86cmpm node:$src1, node:$src2, (i8 0))>;
5436def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5437 (X86cmpm node:$src1, node:$src2, (i8 4))>;
5438
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005439multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005440 PatFrag OpNode, OpndItins itins> :
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005441 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5442 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005443
Craig Topper15d69732018-01-28 00:56:30 +00005444defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005445 SSE_BIT_ITINS_P>, T8PD;
Craig Topper15d69732018-01-28 00:56:30 +00005446defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005447 SSE_BIT_ITINS_P>, T8XS;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005448
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005449
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005450//===----------------------------------------------------------------------===//
5451// AVX-512 Shift instructions
5452//===----------------------------------------------------------------------===//
5453multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005454 string OpcodeStr, SDNode OpNode, OpndItins itins,
5455 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005456 let ExeDomain = _.ExeDomain in {
Cameron McInally04400442014-11-14 15:43:00 +00005457 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005458 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005459 "$src2, $src1", "$src1, $src2",
5460 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005461 itins.rr>, Sched<[itins.Sched]>;
Cameron McInally04400442014-11-14 15:43:00 +00005462 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005463 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005464 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005465 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5466 (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005467 itins.rm>, Sched<[itins.Sched.Folded]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005468 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005469}
5470
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005471multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005472 string OpcodeStr, SDNode OpNode, OpndItins itins,
5473 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005474 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005475 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5476 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5477 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5478 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005479 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005480}
5481
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005482multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005483 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5484 X86VectorVTInfo _> {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005485 // src2 is always 128-bit
Craig Topper05948fb2016-08-02 05:11:15 +00005486 let ExeDomain = _.ExeDomain in {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005487 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5488 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5489 "$src2, $src1", "$src1, $src2",
5490 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005491 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005492 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5493 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5494 "$src2, $src1", "$src1, $src2",
Craig Topper820d4922015-02-09 04:04:50 +00005495 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005496 itins.rm>, AVX512BIBase,
5497 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005498 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005499}
5500
Cameron McInally5fb084e2014-12-11 17:13:05 +00005501multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005502 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5503 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005504 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005505 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005506 VTInfo.info512>, EVEX_V512,
5507 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5508 let Predicates = [prd, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005509 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005510 VTInfo.info256>, EVEX_V256,
5511 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005512 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005513 VTInfo.info128>, EVEX_V128,
5514 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5515 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005516}
5517
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005518multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005519 string OpcodeStr, SDNode OpNode,
5520 OpndItins itins> {
5521 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5522 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5523 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5524 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5525 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5526 bc_v2i64, avx512vl_i16_info, HasBWI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005527}
5528
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005529multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005530 string OpcodeStr, SDNode OpNode,
5531 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005532 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005533 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005534 VTInfo.info512>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005535 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005536 VTInfo.info512>, EVEX_V512;
5537 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005538 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005539 VTInfo.info256>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005540 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005541 VTInfo.info256>, EVEX_V256;
5542 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005543 itins, VTInfo.info128>,
5544 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005545 VTInfo.info128>, EVEX_V128;
5546 }
5547}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005548
Michael Liao66233b72015-08-06 09:06:20 +00005549multiclass avx512_shift_rmi_w<bits<8> opcw,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005550 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005551 string OpcodeStr, SDNode OpNode,
5552 OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005553 let Predicates = [HasBWI] in
5554 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005555 itins, v32i16_info>, EVEX_V512, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005556 let Predicates = [HasVLX, HasBWI] in {
5557 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005558 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005559 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005560 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005561 }
5562}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005563
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005564multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5565 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005566 string OpcodeStr, SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005567 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005568 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005569 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005570 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005571}
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005572
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005573defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5574 SSE_INTSHIFT_P>,
5575 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5576 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005577
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005578defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5579 SSE_INTSHIFT_P>,
5580 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5581 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005582
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005583defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5584 SSE_INTSHIFT_P>,
5585 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5586 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005587
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005588defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5589 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5590defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5591 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005592
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005593defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5594defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5595defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005596
Simon Pilgrim5910ebe2017-02-20 12:16:38 +00005597// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5598let Predicates = [HasAVX512, NoVLX] in {
5599 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5600 (EXTRACT_SUBREG (v8i64
5601 (VPSRAQZrr
5602 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5603 VR128X:$src2)), sub_ymm)>;
5604
5605 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5606 (EXTRACT_SUBREG (v8i64
5607 (VPSRAQZrr
5608 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5609 VR128X:$src2)), sub_xmm)>;
5610
5611 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5612 (EXTRACT_SUBREG (v8i64
5613 (VPSRAQZri
5614 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5615 imm:$src2)), sub_ymm)>;
5616
5617 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5618 (EXTRACT_SUBREG (v8i64
5619 (VPSRAQZri
5620 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5621 imm:$src2)), sub_xmm)>;
5622}
5623
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005624//===-------------------------------------------------------------------===//
5625// Variable Bit Shifts
5626//===-------------------------------------------------------------------===//
5627multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005628 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005629 let ExeDomain = _.ExeDomain in {
Cameron McInally5fb084e2014-12-11 17:13:05 +00005630 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5631 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5632 "$src2, $src1", "$src1, $src2",
5633 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005634 itins.rr>, AVX5128IBase, EVEX_4V,
5635 Sched<[itins.Sched]>;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005636 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5637 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5638 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005639 (_.VT (OpNode _.RC:$src1,
5640 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005641 itins.rm>, AVX5128IBase, EVEX_4V,
5642 EVEX_CD8<_.EltSize, CD8VF>,
5643 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005644 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005645}
5646
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005647multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005648 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005649 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005650 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5651 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5652 "${src2}"##_.BroadcastStr##", $src1",
5653 "$src1, ${src2}"##_.BroadcastStr,
5654 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5655 (_.ScalarLdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005656 itins.rm>, AVX5128IBase, EVEX_B,
5657 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5658 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005659}
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005660
Cameron McInally5fb084e2014-12-11 17:13:05 +00005661multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005662 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005663 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005664 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5665 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005666
5667 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005668 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5669 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5670 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5671 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005672 }
Cameron McInally5fb084e2014-12-11 17:13:05 +00005673}
5674
5675multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005676 SDNode OpNode, OpndItins itins> {
5677 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005678 avx512vl_i32_info>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005679 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005680 avx512vl_i64_info>, VEX_W;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005681}
5682
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005683// Use 512bit version to implement 128/256 bit in case NoVLX.
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005684multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5685 SDNode OpNode, list<Predicate> p> {
5686 let Predicates = p in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005687 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005688 (_.info256.VT _.info256.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005689 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005690 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005691 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5692 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5693 sub_ymm)>;
5694
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005695 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005696 (_.info128.VT _.info128.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005697 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005698 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005699 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5700 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5701 sub_xmm)>;
5702 }
5703}
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005704multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005705 SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005706 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005707 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005708 EVEX_V512, VEX_W;
5709 let Predicates = [HasVLX, HasBWI] in {
5710
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005711 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005712 EVEX_V256, VEX_W;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005713 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005714 EVEX_V128, VEX_W;
5715 }
5716}
5717
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005718defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5719 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005720
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005721defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5722 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005723
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005724defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5725 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005726
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005727defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5728defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005729
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005730defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5731defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5732defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5733defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5734
Craig Topper05629d02016-07-24 07:32:45 +00005735// Special handing for handling VPSRAV intrinsics.
5736multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5737 list<Predicate> p> {
5738 let Predicates = p in {
5739 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5740 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5741 _.RC:$src2)>;
5742 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5743 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5744 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005745 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5746 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5747 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5748 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5749 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5750 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5751 _.RC:$src0)),
5752 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5753 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005754 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5755 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5756 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5757 _.RC:$src1, _.RC:$src2)>;
5758 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5759 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5760 _.ImmAllZerosV)),
5761 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5762 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005763 }
5764}
5765
5766multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5767 list<Predicate> p> :
5768 avx512_var_shift_int_lowering<InstrStr, _, p> {
5769 let Predicates = p in {
5770 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5771 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5772 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5773 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005774 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5775 (X86vsrav _.RC:$src1,
5776 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5777 _.RC:$src0)),
5778 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5779 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005780 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5781 (X86vsrav _.RC:$src1,
5782 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5783 _.ImmAllZerosV)),
5784 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5785 _.RC:$src1, addr:$src2)>;
5786 }
5787}
5788
5789defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5790defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5791defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5792defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5793defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5794defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5795defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5796defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5797defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5798
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005799
5800// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5801let Predicates = [HasAVX512, NoVLX] in {
5802 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5803 (EXTRACT_SUBREG (v8i64
5804 (VPROLVQZrr
5805 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005806 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005807 sub_xmm)>;
5808 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5809 (EXTRACT_SUBREG (v8i64
5810 (VPROLVQZrr
5811 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005812 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005813 sub_ymm)>;
5814
5815 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5816 (EXTRACT_SUBREG (v16i32
5817 (VPROLVDZrr
5818 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005819 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005820 sub_xmm)>;
5821 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5822 (EXTRACT_SUBREG (v16i32
5823 (VPROLVDZrr
5824 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005825 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005826 sub_ymm)>;
5827
5828 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5829 (EXTRACT_SUBREG (v8i64
5830 (VPROLQZri
5831 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5832 imm:$src2)), sub_xmm)>;
5833 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5834 (EXTRACT_SUBREG (v8i64
5835 (VPROLQZri
5836 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5837 imm:$src2)), sub_ymm)>;
5838
5839 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5840 (EXTRACT_SUBREG (v16i32
5841 (VPROLDZri
5842 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5843 imm:$src2)), sub_xmm)>;
5844 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5845 (EXTRACT_SUBREG (v16i32
5846 (VPROLDZri
5847 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5848 imm:$src2)), sub_ymm)>;
5849}
5850
5851// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5852let Predicates = [HasAVX512, NoVLX] in {
5853 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5854 (EXTRACT_SUBREG (v8i64
5855 (VPRORVQZrr
5856 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005857 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005858 sub_xmm)>;
5859 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5860 (EXTRACT_SUBREG (v8i64
5861 (VPRORVQZrr
5862 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005863 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005864 sub_ymm)>;
5865
5866 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5867 (EXTRACT_SUBREG (v16i32
5868 (VPRORVDZrr
5869 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005870 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005871 sub_xmm)>;
5872 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5873 (EXTRACT_SUBREG (v16i32
5874 (VPRORVDZrr
5875 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005876 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005877 sub_ymm)>;
5878
5879 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5880 (EXTRACT_SUBREG (v8i64
5881 (VPRORQZri
5882 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5883 imm:$src2)), sub_xmm)>;
5884 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5885 (EXTRACT_SUBREG (v8i64
5886 (VPRORQZri
5887 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5888 imm:$src2)), sub_ymm)>;
5889
5890 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5891 (EXTRACT_SUBREG (v16i32
5892 (VPRORDZri
5893 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5894 imm:$src2)), sub_xmm)>;
5895 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5896 (EXTRACT_SUBREG (v16i32
5897 (VPRORDZri
5898 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5899 imm:$src2)), sub_ymm)>;
5900}
5901
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005902//===-------------------------------------------------------------------===//
5903// 1-src variable permutation VPERMW/D/Q
5904//===-------------------------------------------------------------------===//
5905multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005906 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005907 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005908 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5909 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005910
5911 let Predicates = [HasAVX512, HasVLX] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005912 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5913 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005914}
5915
5916multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5917 string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005918 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005919 let Predicates = [HasAVX512] in
5920 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005921 itins, VTInfo.info512>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005922 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005923 itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005924 let Predicates = [HasAVX512, HasVLX] in
5925 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005926 itins, VTInfo.info256>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005927 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005928 itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005929}
5930
Michael Zuckermand9cac592016-01-19 17:07:43 +00005931multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5932 Predicate prd, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005933 OpndItins itins, AVX512VLVectorVTInfo _> {
Michael Zuckermand9cac592016-01-19 17:07:43 +00005934 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005935 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005936 EVEX_V512 ;
5937 let Predicates = [HasVLX, prd] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005938 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005939 EVEX_V256 ;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005940 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005941 EVEX_V128 ;
5942 }
5943}
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005944
Michael Zuckermand9cac592016-01-19 17:07:43 +00005945defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005946 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
Michael Zuckermand9cac592016-01-19 17:07:43 +00005947defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005948 AVX2_PERMV_I, avx512vl_i8_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005949
5950defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005951 AVX2_PERMV_I, avx512vl_i32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005952defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005953 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005954defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005955 AVX2_PERMV_F, avx512vl_f32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005956defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005957 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005958
5959defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005960 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005961 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5962defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005963 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005964 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger78741a12015-10-04 07:20:41 +00005965//===----------------------------------------------------------------------===//
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005966// AVX-512 - VPERMIL
Igor Breger78741a12015-10-04 07:20:41 +00005967//===----------------------------------------------------------------------===//
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005968
Simon Pilgrim1401a752017-11-29 14:58:34 +00005969multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5970 OpndItins itins, X86VectorVTInfo _,
5971 X86VectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005972 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5973 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5974 "$src2, $src1", "$src1, $src2",
5975 (_.VT (OpNode _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005976 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5977 T8PD, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005978 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5979 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5980 "$src2, $src1", "$src1, $src2",
5981 (_.VT (OpNode
5982 _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005983 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5984 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5985 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005986 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5987 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5988 "${src2}"##_.BroadcastStr##", $src1",
5989 "$src1, ${src2}"##_.BroadcastStr,
5990 (_.VT (OpNode
5991 _.RC:$src1,
5992 (Ctrl.VT (X86VBroadcast
Simon Pilgrim1401a752017-11-29 14:58:34 +00005993 (Ctrl.ScalarLdFrag addr:$src2))))),
5994 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5995 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger78741a12015-10-04 07:20:41 +00005996}
5997
5998multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005999 OpndItins itins, AVX512VLVectorVTInfo _,
6000 AVX512VLVectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00006001 let Predicates = [HasAVX512] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00006002 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6003 _.info512, Ctrl.info512>, EVEX_V512;
Igor Breger78741a12015-10-04 07:20:41 +00006004 }
6005 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00006006 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6007 _.info128, Ctrl.info128>, EVEX_V128;
6008 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6009 _.info256, Ctrl.info256>, EVEX_V256;
Igor Breger78741a12015-10-04 07:20:41 +00006010 }
6011}
6012
6013multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6014 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
Simon Pilgrim1401a752017-11-29 14:58:34 +00006015 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
Igor Breger78741a12015-10-04 07:20:41 +00006016 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006017 X86VPermilpi, AVX_VPERMILV, _>,
Igor Breger78741a12015-10-04 07:20:41 +00006018 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
Igor Breger78741a12015-10-04 07:20:41 +00006019}
6020
Craig Topper05948fb2016-08-02 05:11:15 +00006021let ExeDomain = SSEPackedSingle in
Igor Breger78741a12015-10-04 07:20:41 +00006022defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6023 avx512vl_i32_info>;
Craig Topper05948fb2016-08-02 05:11:15 +00006024let ExeDomain = SSEPackedDouble in
Igor Breger78741a12015-10-04 07:20:41 +00006025defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6026 avx512vl_i64_info>, VEX_W;
Simon Pilgrim1401a752017-11-29 14:58:34 +00006027
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006028//===----------------------------------------------------------------------===//
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006029// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6030//===----------------------------------------------------------------------===//
6031
6032defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006033 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006034 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6035defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006036 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006037defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006038 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
Michael Liao66233b72015-08-06 09:06:20 +00006039
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006040multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6041 OpndItins itins> {
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006042 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006043 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006044
6045 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006046 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
6047 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006048 }
6049}
6050
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006051defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006052
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006053//===----------------------------------------------------------------------===//
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00006054// Move Low to High and High to Low packed FP Instructions
6055//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006056def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6057 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00006058 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006059 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006060 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006061def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6062 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00006063 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006064 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006065 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006066
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006067//===----------------------------------------------------------------------===//
Igor Bregerb6b27af2015-11-10 07:09:07 +00006068// VMOVHPS/PD VMOVLPS Instructions
6069// All patterns was taken from SSS implementation.
6070//===----------------------------------------------------------------------===//
6071multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
6072 X86VectorVTInfo _> {
Craig Toppere70231b2017-02-26 06:45:54 +00006073 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00006074 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6075 (ins _.RC:$src1, f64mem:$src2),
6076 !strconcat(OpcodeStr,
6077 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6078 [(set _.RC:$dst,
6079 (OpNode _.RC:$src1,
6080 (_.VT (bitconvert
6081 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006082 IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd, ReadAfterLd]>, EVEX_4V;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006083}
6084
6085defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
6086 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
Craig Topper3b11fca2017-09-18 00:20:53 +00006087defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006088 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6089defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
6090 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6091defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
6092 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6093
6094let Predicates = [HasAVX512] in {
6095 // VMOVHPS patterns
6096 def : Pat<(X86Movlhps VR128X:$src1,
6097 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
6098 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6099 def : Pat<(X86Movlhps VR128X:$src1,
Craig Topper0a197df2017-09-17 18:59:32 +00006100 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006101 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6102 // VMOVHPD patterns
6103 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006104 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6105 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6106 // VMOVLPS patterns
6107 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
6108 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006109 // VMOVLPD patterns
6110 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6111 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006112 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6113 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6114 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6115}
6116
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006117let SchedRW = [WriteStore] in {
Igor Bregerb6b27af2015-11-10 07:09:07 +00006118def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6119 (ins f64mem:$dst, VR128X:$src),
6120 "vmovhps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006121 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006122 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6123 (bc_v2f64 (v4f32 VR128X:$src))),
6124 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6125 EVEX, EVEX_CD8<32, CD8VT2>;
6126def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6127 (ins f64mem:$dst, VR128X:$src),
6128 "vmovhpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006129 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006130 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6131 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6132 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6133def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6134 (ins f64mem:$dst, VR128X:$src),
6135 "vmovlps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006136 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006137 (iPTR 0))), addr:$dst)],
6138 IIC_SSE_MOV_LH>,
6139 EVEX, EVEX_CD8<32, CD8VT2>;
6140def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6141 (ins f64mem:$dst, VR128X:$src),
6142 "vmovlpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006143 [(store (f64 (extractelt (v2f64 VR128X:$src),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006144 (iPTR 0))), addr:$dst)],
6145 IIC_SSE_MOV_LH>,
6146 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006147} // SchedRW
Craig Toppere1cac152016-06-07 07:27:54 +00006148
Igor Bregerb6b27af2015-11-10 07:09:07 +00006149let Predicates = [HasAVX512] in {
6150 // VMOVHPD patterns
Craig Topperc9b19232016-05-01 04:59:44 +00006151 def : Pat<(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006152 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6153 (iPTR 0))), addr:$dst),
6154 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6155 // VMOVLPS patterns
6156 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6157 addr:$src1),
6158 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006159 // VMOVLPD patterns
6160 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6161 addr:$src1),
6162 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006163}
6164//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006165// FMA - Fused Multiply Operations
6166//
Adam Nemet26371ce2014-10-24 00:02:55 +00006167
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006168multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006169 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006170 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Adam Nemet34801422014-10-08 23:25:39 +00006171 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Adam Nemet6bddb8c2014-09-29 22:54:41 +00006172 (ins _.RC:$src2, _.RC:$src3),
Adam Nemet2e91ee52014-08-14 17:13:19 +00006173 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006174 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006175 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006176
Craig Toppere1cac152016-06-07 07:27:54 +00006177 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6178 (ins _.RC:$src2, _.MemOp:$src3),
6179 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006180 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
Craig Topper468a8132017-12-12 07:06:35 +00006181 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006182
Craig Toppere1cac152016-06-07 07:27:54 +00006183 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6184 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6185 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6186 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper6bcbf532016-07-25 07:20:28 +00006187 (OpNode _.RC:$src2,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006188 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6189 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006190 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006191 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006192}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006193
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006194multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006195 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006196 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006197 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006198 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6199 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006200 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6201 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006202}
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006203
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006204multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006205 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6206 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006207 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006208 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6209 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6210 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006211 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006212 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006213 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006214 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006215 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006216 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006217 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006218}
6219
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006220multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006221 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006222 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006223 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006224 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006225 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006226}
6227
Craig Topperaf0b9922017-09-04 06:59:50 +00006228defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006229defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6230defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6231defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6232defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6233defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6234
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006235
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006236multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006237 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006238 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006239 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6240 (ins _.RC:$src2, _.RC:$src3),
6241 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006242 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6243 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006244
Craig Toppere1cac152016-06-07 07:27:54 +00006245 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6246 (ins _.RC:$src2, _.MemOp:$src3),
6247 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006248 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
Craig Topper468a8132017-12-12 07:06:35 +00006249 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006250
Craig Toppere1cac152016-06-07 07:27:54 +00006251 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6252 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6253 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6254 "$src2, ${src3}"##_.BroadcastStr,
6255 (_.VT (OpNode _.RC:$src2,
6256 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006257 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006258 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006259 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006260}
6261
6262multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006263 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006264 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006265 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6266 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6267 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006268 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6269 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006270 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006271}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006272
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006273multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006274 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6275 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006276 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006277 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6278 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6279 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006280 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006281 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006282 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006283 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006284 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006285 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006286 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006287}
6288
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006289multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006290 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006291 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006292 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006293 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006294 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006295}
6296
Craig Topperaf0b9922017-09-04 06:59:50 +00006297defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006298defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6299defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6300defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6301defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6302defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6303
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006304multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006305 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006306 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006307 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006308 (ins _.RC:$src2, _.RC:$src3),
6309 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006310 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6311 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006312
Craig Topper69e22782017-09-04 07:35:05 +00006313 // Pattern is 312 order so that the load is in a different place from the
6314 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006315 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006316 (ins _.RC:$src2, _.MemOp:$src3),
6317 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006318 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
Craig Topper468a8132017-12-12 07:06:35 +00006319 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006320
Craig Topper69e22782017-09-04 07:35:05 +00006321 // Pattern is 312 order so that the load is in a different place from the
6322 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006323 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006324 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6325 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6326 "$src2, ${src3}"##_.BroadcastStr,
Craig Topper69e22782017-09-04 07:35:05 +00006327 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006328 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
Craig Topper468a8132017-12-12 07:06:35 +00006329 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006330 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006331}
6332
6333multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006334 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006335 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006336 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006337 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6338 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006339 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6340 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006341 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006342}
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006343
6344multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006345 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6346 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006347 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006348 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6349 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6350 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006351 }
6352 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006353 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006354 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006355 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006356 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6357 }
6358}
6359
6360multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006361 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006362 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006363 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006364 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006365 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006366}
6367
Craig Topperaf0b9922017-09-04 06:59:50 +00006368defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006369defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6370defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6371defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6372defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6373defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006374
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006375// Scalar FMA
Igor Breger15820b02015-07-01 13:24:28 +00006376multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6377 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
Craig Topper69e22782017-09-04 07:35:05 +00006378 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
Craig Topperb16598d2017-09-01 07:58:16 +00006379let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
Igor Breger15820b02015-07-01 13:24:28 +00006380 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6381 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006382 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6383 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006384
Craig Toppere1cac152016-06-07 07:27:54 +00006385 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00006386 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006387 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
Craig Topper468a8132017-12-12 07:06:35 +00006388 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006389
6390 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6391 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006392 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6393 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
Craig Toppera2f55282017-12-10 03:16:36 +00006394 Sched<[WriteFMA]>;
Igor Breger15820b02015-07-01 13:24:28 +00006395
Craig Toppereafdbec2016-08-13 06:48:41 +00006396 let isCodeGenOnly = 1, isCommutable = 1 in {
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006397 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
Igor Breger15820b02015-07-01 13:24:28 +00006398 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6399 !strconcat(OpcodeStr,
6400 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Simon Pilgrim97160be2017-11-27 10:41:32 +00006401 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006402 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00006403 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6404 !strconcat(OpcodeStr,
6405 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Craig Topper468a8132017-12-12 07:06:35 +00006406 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006407 }// isCodeGenOnly = 1
Igor Breger15820b02015-07-01 13:24:28 +00006408}// Constraints = "$src1 = $dst"
Craig Topperb16598d2017-09-01 07:58:16 +00006409}
Igor Breger15820b02015-07-01 13:24:28 +00006410
6411multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006412 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6413 SDNode OpNodeRnds1, SDNode OpNodes3,
6414 SDNode OpNodeRnds3, X86VectorVTInfo _,
6415 string SUFF> {
Craig Topper2caa97c2017-02-25 19:36:28 +00006416 let ExeDomain = _.ExeDomain in {
Craig Topperb16598d2017-09-01 07:58:16 +00006417 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
Craig Toppera55b4832016-12-09 06:42:28 +00006418 // Operands for intrinsic are in 123 order to preserve passthu
6419 // semantics.
Craig Topper07dac552017-11-06 05:48:25 +00006420 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6421 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6422 _.ScalarIntMemCPat:$src3)),
Craig Toppera55b4832016-12-09 06:42:28 +00006423 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
Igor Breger15820b02015-07-01 13:24:28 +00006424 (i32 imm:$rc))),
6425 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6426 _.FRC:$src3))),
6427 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
Craig Topper69e22782017-09-04 07:35:05 +00006428 (_.ScalarLdFrag addr:$src3)))), 0>;
Igor Breger15820b02015-07-01 13:24:28 +00006429
Craig Topperb16598d2017-09-01 07:58:16 +00006430 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
Craig Topper07dac552017-11-06 05:48:25 +00006431 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6432 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6433 _.RC:$src1)),
Craig Toppera55b4832016-12-09 06:42:28 +00006434 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
Igor Breger15820b02015-07-01 13:24:28 +00006435 (i32 imm:$rc))),
6436 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6437 _.FRC:$src1))),
6438 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
Craig Topper69e22782017-09-04 07:35:05 +00006439 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
Igor Breger15820b02015-07-01 13:24:28 +00006440
Craig Toppereec768b2017-09-06 03:35:58 +00006441 // One pattern is 312 order so that the load is in a different place from the
6442 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Topperb16598d2017-09-01 07:58:16 +00006443 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
Craig Topper69e22782017-09-04 07:35:05 +00006444 (null_frag),
Craig Topper07dac552017-11-06 05:48:25 +00006445 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6446 _.RC:$src2)),
Craig Topper69e22782017-09-04 07:35:05 +00006447 (null_frag),
Igor Breger15820b02015-07-01 13:24:28 +00006448 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6449 _.FRC:$src2))),
Craig Toppereec768b2017-09-06 03:35:58 +00006450 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6451 _.FRC:$src1, _.FRC:$src2))), 1>;
Craig Topper2caa97c2017-02-25 19:36:28 +00006452 }
Igor Breger15820b02015-07-01 13:24:28 +00006453}
6454
6455multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006456 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6457 SDNode OpNodeRnds1, SDNode OpNodes3,
Craig Toppera55b4832016-12-09 06:42:28 +00006458 SDNode OpNodeRnds3> {
Igor Breger15820b02015-07-01 13:24:28 +00006459 let Predicates = [HasAVX512] in {
6460 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006461 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6462 f32x_info, "SS">,
Craig Toppera55b4832016-12-09 06:42:28 +00006463 EVEX_CD8<32, CD8VT1>, VEX_LIG;
Igor Breger15820b02015-07-01 13:24:28 +00006464 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006465 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6466 f64x_info, "SD">,
Craig Toppera55b4832016-12-09 06:42:28 +00006467 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
Igor Breger15820b02015-07-01 13:24:28 +00006468 }
6469}
6470
Craig Topper07dac552017-11-06 05:48:25 +00006471defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6472 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6473defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6474 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6475defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6476 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6477defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6478 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006479
6480//===----------------------------------------------------------------------===//
Asaf Badouh655822a2016-01-25 11:14:24 +00006481// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6482//===----------------------------------------------------------------------===//
6483let Constraints = "$src1 = $dst" in {
6484multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006485 OpndItins itins, X86VectorVTInfo _> {
Craig Topper47e14ea2017-09-24 19:30:55 +00006486 // NOTE: The SDNode have the multiply operands first with the add last.
6487 // This enables commuted load patterns to be autogenerated by tablegen.
Craig Topper6bf9b802017-02-26 06:45:45 +00006488 let ExeDomain = _.ExeDomain in {
Asaf Badouh655822a2016-01-25 11:14:24 +00006489 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6490 (ins _.RC:$src2, _.RC:$src3),
6491 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006492 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6493 AVX512FMA3Base, Sched<[itins.Sched]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006494
Craig Toppere1cac152016-06-07 07:27:54 +00006495 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6496 (ins _.RC:$src2, _.MemOp:$src3),
6497 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006498 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6499 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006500
Craig Toppere1cac152016-06-07 07:27:54 +00006501 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6502 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6503 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6504 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper47e14ea2017-09-24 19:30:55 +00006505 (OpNode _.RC:$src2,
6506 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006507 _.RC:$src1), itins.rm>,
6508 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper6bf9b802017-02-26 06:45:45 +00006509 }
Asaf Badouh655822a2016-01-25 11:14:24 +00006510}
6511} // Constraints = "$src1 = $dst"
6512
6513multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006514 OpndItins itins, AVX512VLVectorVTInfo _> {
Asaf Badouh655822a2016-01-25 11:14:24 +00006515 let Predicates = [HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006516 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006517 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6518 }
6519 let Predicates = [HasVLX, HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006520 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006521 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006522 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006523 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6524 }
6525}
6526
6527defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006528 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006529defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006530 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006531
6532//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006533// AVX-512 Scalar convert from sign integer to float/double
6534//===----------------------------------------------------------------------===//
6535
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006536multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6537 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6538 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006539 let hasSideEffects = 0 in {
6540 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6541 (ins DstVT.FRC:$src1, SrcRC:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006542 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6543 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006544 let mayLoad = 1 in
6545 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6546 (ins DstVT.FRC:$src1, x86memop:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006547 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6548 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006549 } // hasSideEffects = 0
6550 let isCodeGenOnly = 1 in {
6551 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6552 (ins DstVT.RC:$src1, SrcRC:$src2),
6553 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6554 [(set DstVT.RC:$dst,
6555 (OpNode (DstVT.VT DstVT.RC:$src1),
6556 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006557 (i32 FROUND_CURRENT)))], itins.rr>,
6558 EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006559
6560 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6561 (ins DstVT.RC:$src1, x86memop:$src2),
6562 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6563 [(set DstVT.RC:$dst,
6564 (OpNode (DstVT.VT DstVT.RC:$src1),
6565 (ld_frag addr:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006566 (i32 FROUND_CURRENT)))], itins.rm>,
6567 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006568 }//isCodeGenOnly = 1
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006569}
Elena Demikhovskyd8fda622015-03-30 09:29:28 +00006570
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006571multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6572 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
Igor Bregerabe4a792015-06-14 12:44:55 +00006573 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6574 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006575 !strconcat(asm,
6576 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
Igor Bregerabe4a792015-06-14 12:44:55 +00006577 [(set DstVT.RC:$dst,
6578 (OpNode (DstVT.VT DstVT.RC:$src1),
6579 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006580 (i32 imm:$rc)))], itins.rr>,
6581 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Bregerabe4a792015-06-14 12:44:55 +00006582}
6583
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006584multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6585 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6586 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6587 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6588 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6589 ld_frag, asm>, VEX_LIG;
Igor Bregerabe4a792015-06-14 12:44:55 +00006590}
6591
Andrew Trick15a47742013-10-09 05:11:10 +00006592let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006593defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006594 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6595 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006596defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006597 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6598 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006599defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006600 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6601 XD, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006602defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006603 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6604 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006605
Craig Topper8f85ad12016-11-14 02:46:58 +00006606def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6607 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6608def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6609 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6610
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006611def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6612 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6613def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006614 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006615def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6616 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6617def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006618 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006619
6620def : Pat<(f32 (sint_to_fp GR32:$src)),
6621 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6622def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006623 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006624def : Pat<(f64 (sint_to_fp GR32:$src)),
6625 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6626def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006627 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6628
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006629defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006630 v4f32x_info, i32mem, loadi32,
6631 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006632defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006633 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6634 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006635defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006636 i32mem, loadi32, "cvtusi2sd{l}">,
6637 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006638defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006639 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6640 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006641
Craig Topper8f85ad12016-11-14 02:46:58 +00006642def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6643 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6644def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6645 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6646
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006647def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6648 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6649def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6650 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6651def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6652 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6653def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6654 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6655
6656def : Pat<(f32 (uint_to_fp GR32:$src)),
6657 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6658def : Pat<(f32 (uint_to_fp GR64:$src)),
6659 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6660def : Pat<(f64 (uint_to_fp GR32:$src)),
6661 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6662def : Pat<(f64 (uint_to_fp GR64:$src)),
6663 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00006664}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006665
6666//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006667// AVX-512 Scalar convert from float/double to integer
6668//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006669
6670multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6671 X86VectorVTInfo DstVT, SDNode OpNode,
Craig Toppera49c3542018-01-06 19:20:33 +00006672 OpndItins itins, string asm,
6673 string aliasStr,
6674 bit CodeGenOnly = 1> {
Craig Toppere1cac152016-06-07 07:27:54 +00006675 let Predicates = [HasAVX512] in {
Craig Toppera0be5a02017-12-10 19:47:56 +00006676 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006677 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006678 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6679 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
Craig Toppera0be5a02017-12-10 19:47:56 +00006680 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
Craig Topper1de942b2017-12-10 17:42:44 +00006681 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6682 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6683 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6684 Sched<[itins.Sched]>;
Craig Toppera49c3542018-01-06 19:20:33 +00006685 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Toppera0be5a02017-12-10 19:47:56 +00006686 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006687 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006688 [(set DstVT.RC:$dst, (OpNode
Craig Topper5a63ca22017-03-13 03:59:06 +00006689 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006690 (i32 FROUND_CURRENT)))], itins.rm>,
6691 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere2659d82018-01-05 23:13:54 +00006692
6693 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6694 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>;
6695 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
6696 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0>;
Craig Toppera49c3542018-01-06 19:20:33 +00006697 } // Predicates = [HasAVX512]
6698}
6699
6700multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
6701 X86VectorVTInfo DstVT, SDNode OpNode,
6702 OpndItins itins, string asm,
6703 string aliasStr> :
6704 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, itins, asm, aliasStr, 0> {
6705 let Predicates = [HasAVX512] in {
Craig Toppere2659d82018-01-05 23:13:54 +00006706 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6707 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
6708 SrcVT.IntScalarMemOp:$src), 0>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006709 } // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006710}
Asaf Badouh2744d212015-09-20 14:31:19 +00006711
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006712// Convert float/double to signed/unsigned int 32/64
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006713defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006714 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006715 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006716defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006717 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006718 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006719defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006720 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006721 XS, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006722defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006723 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006724 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006725defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006726 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006727 XD, EVEX_CD8<64, CD8VT1>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006728defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006729 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006730 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006731defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006732 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006733 XD, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006734defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006735 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006736 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006737
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006738// The SSE version of these instructions are disabled for AVX512.
6739// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6740let Predicates = [HasAVX512] in {
6741 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006742 (VCVTSS2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006743 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006744 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006745 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006746 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006747 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006748 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006749 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006750 (VCVTSD2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006751 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006752 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006753 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006754 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006755 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006756 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006757} // HasAVX512
6758
Craig Topperac941b92016-09-25 16:33:53 +00006759let Predicates = [HasAVX512] in {
6760 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6761 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6762 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6763 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6764 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6765 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6766 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6767 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6768 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6769 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6770 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6771 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6772 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6773 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6774 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6775 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6776 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6777 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6778 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6779 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6780} // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006781
Elad Cohen0c260102017-01-11 09:11:48 +00006782// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6783// which produce unnecessary vmovs{s,d} instructions
6784let Predicates = [HasAVX512] in {
6785def : Pat<(v4f32 (X86Movss
6786 (v4f32 VR128X:$dst),
6787 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6788 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6789
6790def : Pat<(v4f32 (X86Movss
6791 (v4f32 VR128X:$dst),
6792 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6793 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6794
6795def : Pat<(v2f64 (X86Movsd
6796 (v2f64 VR128X:$dst),
6797 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6798 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6799
6800def : Pat<(v2f64 (X86Movsd
6801 (v2f64 VR128X:$dst),
6802 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6803 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6804} // Predicates = [HasAVX512]
6805
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006806// Convert float/double to signed/unsigned int 32/64 with truncation
Simon Pilgrim18bcf932016-02-03 09:41:59 +00006807multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6808 X86VectorVTInfo _DstRC, SDNode OpNode,
Craig Topper61d8a602018-01-06 21:27:25 +00006809 SDNode OpNodeRnd, OpndItins itins, string aliasStr,
6810 bit CodeGenOnly = 1>{
Asaf Badouh2744d212015-09-20 14:31:19 +00006811let Predicates = [HasAVX512] in {
Craig Topper90353a92018-01-06 21:02:22 +00006812 let isCodeGenOnly = 1 in {
Igor Bregerc59b3a22016-08-03 10:58:05 +00006813 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006814 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006815 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6816 EVEX, Sched<[itins.Sched]>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006817 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006818 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006819 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6820 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper90353a92018-01-06 21:02:22 +00006821 }
6822
6823 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6824 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6825 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6826 (i32 FROUND_CURRENT)))], itins.rr>,
6827 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6828 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6829 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6830 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6831 (i32 FROUND_NO_EXC)))], itins.rr>,
6832 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
Craig Topper61d8a602018-01-06 21:27:25 +00006833 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Topper0f4ccb72018-01-06 21:02:26 +00006834 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6835 (ins _SrcRC.IntScalarMemOp:$src),
6836 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6837 [(set _DstRC.RC:$dst, (OpNodeRnd
6838 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
6839 (i32 FROUND_CURRENT)))], itins.rm>,
6840 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Simon Pilgrim916485c2016-08-18 11:22:22 +00006841
Igor Bregerc59b3a22016-08-03 10:58:05 +00006842 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
Craig Topper90353a92018-01-06 21:02:22 +00006843 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Craig Toppere2659d82018-01-05 23:13:54 +00006844 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
Craig Topper90353a92018-01-06 21:02:22 +00006845 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006846} //HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006847}
6848
Craig Topper61d8a602018-01-06 21:27:25 +00006849multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
6850 X86VectorVTInfo _SrcRC,
6851 X86VectorVTInfo _DstRC, SDNode OpNode,
6852 SDNode OpNodeRnd, OpndItins itins,
6853 string aliasStr> :
6854 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, itins,
6855 aliasStr, 0> {
6856let Predicates = [HasAVX512] in {
6857 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6858 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
6859 _SrcRC.IntScalarMemOp:$src), 0>;
6860}
6861}
Asaf Badouh2744d212015-09-20 14:31:19 +00006862
Igor Bregerc59b3a22016-08-03 10:58:05 +00006863defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006864 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006865 XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006866defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006867 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006868 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006869defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006870 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006871 XD, EVEX_CD8<64, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006872defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006873 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006874 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6875
Craig Topper61d8a602018-01-06 21:27:25 +00006876defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006877 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006878 XS, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006879defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006880 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006881 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006882defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006883 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006884 XD, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006885defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006886 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006887 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006888
Asaf Badouh2744d212015-09-20 14:31:19 +00006889let Predicates = [HasAVX512] in {
6890 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006891 (VCVTTSS2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006892 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6893 (VCVTTSS2SIZrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006894 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006895 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006896 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6897 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006898 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006899 (VCVTTSD2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006900 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6901 (VCVTTSD2SIZrm_Int sdmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006902 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006903 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006904 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6905 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00006906} // HasAVX512
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006907
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006908//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006909// AVX-512 Convert form float to double and back
6910//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006911
Asaf Badouh2744d212015-09-20 14:31:19 +00006912multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006913 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006914 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006915 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006916 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006917 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Toppera02e3942016-09-23 06:24:43 +00006918 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006919 (i32 FROUND_CURRENT))), itins.rr>,
6920 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006921 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper08b413a2017-03-13 05:14:44 +00006922 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006923 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006924 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Topper08b413a2017-03-13 05:14:44 +00006925 (_Src.VT _Src.ScalarIntMemCPat:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006926 (i32 FROUND_CURRENT))), itins.rm>,
6927 EVEX_4V, VEX_LIG,
6928 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006929
Craig Topperd2011e32017-02-25 18:43:42 +00006930 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6931 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6932 (ins _.FRC:$src1, _Src.FRC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006933 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6934 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006935 let mayLoad = 1 in
6936 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6937 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006938 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6939 itins.rm>, EVEX_4V, VEX_LIG,
6940 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006941 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006942}
6943
Asaf Badouh2744d212015-09-20 14:31:19 +00006944// Scalar Coversion with SAE - suppress all exceptions
6945multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006946 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006947 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006948 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006949 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Toppera58abd12016-05-09 05:34:12 +00006950 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Asaf Badouh2744d212015-09-20 14:31:19 +00006951 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006952 (i32 FROUND_NO_EXC))), itins.rr>,
6953 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006954}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006955
Asaf Badouh2744d212015-09-20 14:31:19 +00006956// Scalar Conversion with rounding control (RC)
6957multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006958 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006959 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006960 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006961 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Toppera58abd12016-05-09 05:34:12 +00006962 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006963 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
Craig Toppera2f55282017-12-10 03:16:36 +00006964 itins.rr>,
6965 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006966 EVEX_B, EVEX_RC;
6967}
Craig Toppera02e3942016-09-23 06:24:43 +00006968multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006969 SDNode OpNodeRnd, OpndItins itins,
6970 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006971 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006972 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006973 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006974 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
Asaf Badouh2744d212015-09-20 14:31:19 +00006975 }
6976}
6977
Craig Toppera02e3942016-09-23 06:24:43 +00006978multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006979 SDNode OpNodeRnd, OpndItins itins,
6980 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006981 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006982 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6983 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Michael Zuckerman4b88a772016-12-18 14:29:00 +00006984 EVEX_CD8<32, CD8VT1>, XS;
Asaf Badouh2744d212015-09-20 14:31:19 +00006985 }
6986}
Craig Toppera02e3942016-09-23 06:24:43 +00006987defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006988 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6989 f32x_info>, NotMemoryFoldable;
Craig Toppera02e3942016-09-23 06:24:43 +00006990defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006991 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6992 f64x_info>, NotMemoryFoldable;
Asaf Badouh2744d212015-09-20 14:31:19 +00006993
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006994def : Pat<(f64 (fpextend FR32X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00006995 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006996 Requires<[HasAVX512]>;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006997def : Pat<(f64 (fpextend (loadf32 addr:$src))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006998 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006999 Requires<[HasAVX512]>;
7000
7001def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007002 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007003 Requires<[HasAVX512, OptForSize]>;
7004
Asaf Badouh2744d212015-09-20 14:31:19 +00007005def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007006 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Asaf Badouh2744d212015-09-20 14:31:19 +00007007 Requires<[HasAVX512, OptForSpeed]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007008
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007009def : Pat<(f32 (fpround FR64X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00007010 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007011 Requires<[HasAVX512]>;
Elad Cohen0c260102017-01-11 09:11:48 +00007012
7013def : Pat<(v4f32 (X86Movss
7014 (v4f32 VR128X:$dst),
7015 (v4f32 (scalar_to_vector
7016 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007017 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00007018 Requires<[HasAVX512]>;
7019
7020def : Pat<(v2f64 (X86Movsd
7021 (v2f64 VR128X:$dst),
7022 (v2f64 (scalar_to_vector
7023 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007024 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00007025 Requires<[HasAVX512]>;
7026
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007027//===----------------------------------------------------------------------===//
7028// AVX-512 Vector convert from signed/unsigned integer to float/double
7029// and from float/double to signed/unsigned integer
7030//===----------------------------------------------------------------------===//
7031
7032multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007033 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007034 string Broadcast = _.BroadcastStr,
Coby Tayree97e9cf62016-11-20 17:09:56 +00007035 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007036
7037 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7038 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007039 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
7040 EVEX, Sched<[itins.Sched]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007041
7042 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Coby Tayree97e9cf62016-11-20 17:09:56 +00007043 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007044 (_.VT (OpNode (_Src.VT
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007045 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
7046 EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007047
7048 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007049 (ins _Src.ScalarMemOp:$src), OpcodeStr,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007050 "${src}"##Broadcast, "${src}"##Broadcast,
7051 (_.VT (OpNode (_Src.VT
7052 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007053 )), itins.rm>, EVEX, EVEX_B,
7054 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007055}
7056// Coversion with SAE - suppress all exceptions
7057multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007058 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7059 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007060 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7061 (ins _Src.RC:$src), OpcodeStr,
7062 "{sae}, $src", "$src, {sae}",
7063 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007064 (i32 FROUND_NO_EXC))), itins.rr>,
7065 EVEX, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007066}
7067
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007068// Conversion with rounding control (RC)
7069multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007070 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7071 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007072 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7073 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7074 "$rc, $src", "$src, $rc",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007075 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
7076 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007077}
7078
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007079// Extend Float to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007080multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7081 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007082 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007083 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7084 fpextend, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007085 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007086 X86vfpextRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007087 }
7088 let Predicates = [HasVLX] in {
7089 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007090 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
7091 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7092 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007093 }
7094}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007095
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007096// Truncate Double to Float
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007097multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007098 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007099 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007100 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007101 X86vfproundRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007102 }
7103 let Predicates = [HasVLX] in {
7104 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007105 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007106 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007107 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007108
7109 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7110 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7111 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7112 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7113 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7114 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7115 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7116 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007117 }
7118}
7119
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007120defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007121 VEX_W, PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007122defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007123 PS, EVEX_CD8<32, CD8VH>;
7124
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007125def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7126 (VCVTPS2PDZrm addr:$src)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007127
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007128let Predicates = [HasVLX] in {
Craig Topperee277e12017-10-14 05:55:42 +00007129 let AddedComplexity = 15 in {
7130 def : Pat<(X86vzmovl (v2f64 (bitconvert
7131 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7132 (VCVTPD2PSZ128rr VR128X:$src)>;
7133 def : Pat<(X86vzmovl (v2f64 (bitconvert
7134 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7135 (VCVTPD2PSZ128rm addr:$src)>;
7136 }
Craig Topper5471fc22016-11-06 04:12:52 +00007137 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7138 (VCVTPS2PDZ128rm addr:$src)>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007139 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7140 (VCVTPS2PDZ256rm addr:$src)>;
7141}
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00007142
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007143// Convert Signed/Unsigned Doubleword to Double
7144multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007145 SDNode OpNode128, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007146 // No rounding in this op
7147 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007148 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7149 itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007150
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007151 let Predicates = [HasVLX] in {
7152 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007153 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
7154 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7155 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007156 }
7157}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007158
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007159// Convert Signed/Unsigned Doubleword to Float
7160multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007161 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007162 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007163 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7164 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007165 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007166 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007167
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007168 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007169 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7170 itins>, EVEX_V128;
7171 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7172 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007173 }
7174}
7175
7176// Convert Float to Signed/Unsigned Doubleword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007177multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7178 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007179 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007180 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7181 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007182 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007183 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007184 }
7185 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007186 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7187 itins>, EVEX_V128;
7188 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7189 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007190 }
7191}
7192
7193// Convert Float to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007194multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7195 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007196 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007197 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7198 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007199 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007200 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007201 }
7202 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007203 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7204 itins>, EVEX_V128;
7205 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7206 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007207 }
7208}
7209
7210// Convert Double to Signed/Unsigned Doubleword with truncation
Craig Topper731bf9c2016-11-09 07:31:32 +00007211multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007212 SDNode OpNode128, SDNode OpNodeRnd,
7213 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007214 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007215 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7216 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007217 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007218 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007219 }
7220 let Predicates = [HasVLX] in {
7221 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
Craig Topper731bf9c2016-11-09 07:31:32 +00007222 // memory forms of these instructions in Asm Parser. They have the same
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007223 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7224 // due to the same reason.
Craig Topper731bf9c2016-11-09 07:31:32 +00007225 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007226 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007227 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007228 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007229
7230 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7231 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7232 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7233 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7234 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7235 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7236 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7237 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007238 }
7239}
7240
7241// Convert Double to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007242multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7243 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007244 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007245 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7246 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007247 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007248 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007249 }
7250 let Predicates = [HasVLX] in {
7251 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7252 // memory forms of these instructions in Asm Parcer. They have the same
7253 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7254 // due to the same reason.
7255 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007256 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007257 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007258 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007259
7260 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7261 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7262 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7263 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7264 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7265 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7266 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7267 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007268 }
7269}
7270
7271// Convert Double to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007272multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7273 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007274 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007275 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7276 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007277 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007278 OpNodeRnd,itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007279 }
7280 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007281 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7282 itins>, EVEX_V128;
7283 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7284 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007285 }
7286}
7287
7288// Convert Double to Signed/Unsigned Quardword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007289multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7290 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007291 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007292 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7293 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007294 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007295 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007296 }
7297 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007298 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7299 itins>, EVEX_V128;
7300 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7301 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007302 }
7303}
7304
7305// Convert Signed/Unsigned Quardword to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007306multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7307 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007308 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007309 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7310 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007311 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007312 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007313 }
7314 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007315 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7316 itins>, EVEX_V128;
7317 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7318 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007319 }
7320}
7321
7322// Convert Float to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007323multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7324 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007325 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007326 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7327 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007328 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007329 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007330 }
7331 let Predicates = [HasDQI, HasVLX] in {
7332 // Explicitly specified broadcast string, since we take only 2 elements
7333 // from v4f32x_info source
7334 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007335 itins, "{1to2}", "", f64mem>, EVEX_V128;
7336 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7337 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007338 }
7339}
7340
7341// Convert Float to Signed/Unsigned Quardword with truncation
Craig Toppera39b6502016-12-10 06:02:48 +00007342multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007343 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007344 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007345 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7346 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007347 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007348 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007349 }
7350 let Predicates = [HasDQI, HasVLX] in {
7351 // Explicitly specified broadcast string, since we take only 2 elements
7352 // from v4f32x_info source
Craig Toppera39b6502016-12-10 06:02:48 +00007353 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007354 itins, "{1to2}", "", f64mem>, EVEX_V128;
7355 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7356 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007357 }
7358}
7359
7360// Convert Signed/Unsigned Quardword to Float
Simon Pilgrima3af7962016-11-24 12:13:46 +00007361multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007362 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007363 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007364 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7365 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007366 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007367 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007368 }
7369 let Predicates = [HasDQI, HasVLX] in {
7370 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7371 // memory forms of these instructions in Asm Parcer. They have the same
7372 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7373 // due to the same reason.
Simon Pilgrima3af7962016-11-24 12:13:46 +00007374 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007375 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007376 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007377 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007378
7379 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7380 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7381 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7382 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7383 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7384 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7385 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7386 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007387 }
7388}
7389
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007390defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7391 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007392
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007393defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007394 X86VSintToFpRnd, SSE_CVT_I2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007395 PS, EVEX_CD8<32, CD8VF>;
7396
7397defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007398 X86cvttp2siRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007399 XS, EVEX_CD8<32, CD8VF>;
7400
Simon Pilgrima3af7962016-11-24 12:13:46 +00007401defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007402 X86cvttp2siRnd, SSE_CVT_PD2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007403 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7404
7405defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007406 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007407 EVEX_CD8<32, CD8VF>;
7408
Craig Topperf334ac192016-11-09 07:48:51 +00007409defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007410 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7411 PS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007412
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007413defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7414 X86VUintToFP, SSE_CVT_I2PD>, XS,
7415 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007416
7417defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007418 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007419 EVEX_CD8<32, CD8VF>;
7420
Craig Topper19e04b62016-05-19 06:13:58 +00007421defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007422 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7423 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007424
Craig Topper19e04b62016-05-19 06:13:58 +00007425defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007426 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7427 VEX_W, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007428
Craig Topper19e04b62016-05-19 06:13:58 +00007429defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007430 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007431 PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007432
Craig Topper19e04b62016-05-19 06:13:58 +00007433defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007434 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007435 PS, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007436
Craig Topper19e04b62016-05-19 06:13:58 +00007437defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007438 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007439 PD, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007440
Craig Topper19e04b62016-05-19 06:13:58 +00007441defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007442 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7443 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007444
Craig Topper19e04b62016-05-19 06:13:58 +00007445defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007446 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007447 PD, EVEX_CD8<64, CD8VF>;
7448
Craig Topper19e04b62016-05-19 06:13:58 +00007449defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007450 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7451 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007452
7453defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007454 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007455 PD, EVEX_CD8<64, CD8VF>;
7456
Craig Toppera39b6502016-12-10 06:02:48 +00007457defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007458 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7459 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007460
7461defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007462 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007463 PD, EVEX_CD8<64, CD8VF>;
7464
Craig Toppera39b6502016-12-10 06:02:48 +00007465defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007466 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7467 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007468
7469defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007470 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7471 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007472
7473defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007474 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7475 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007476
Simon Pilgrima3af7962016-11-24 12:13:46 +00007477defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007478 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7479 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007480
Simon Pilgrima3af7962016-11-24 12:13:46 +00007481defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007482 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7483 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007484
Craig Toppere38c57a2015-11-27 05:44:02 +00007485let Predicates = [HasAVX512, NoVLX] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007486def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
Michael Liao5bf95782014-12-04 05:20:33 +00007487 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007488 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7489 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007490
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007491def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7492 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007493 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7494 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007495
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007496def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7497 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007498 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7499 VR256X:$src1, sub_ymm)))), sub_xmm)>;
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007500
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007501def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7502 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007503 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7504 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007505
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007506def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7507 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007508 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7509 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007510
Cameron McInallyf10a7c92014-06-18 14:04:37 +00007511def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7512 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
Craig Topper61403202016-09-19 02:53:43 +00007513 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7514 VR128X:$src1, sub_xmm)))), sub_ymm)>;
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007515
Simon Pilgrima3af7962016-11-24 12:13:46 +00007516def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007517 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7518 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7519 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007520}
7521
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007522let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007523 let AddedComplexity = 15 in {
7524 def : Pat<(X86vzmovl (v2i64 (bitconvert
7525 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007526 (VCVTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007527 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007528 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7529 (VCVTPD2DQZ128rm addr:$src)>;
7530 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007531 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007532 (VCVTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007533 def : Pat<(X86vzmovl (v2i64 (bitconvert
Simon Pilgrima3af7962016-11-24 12:13:46 +00007534 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007535 (VCVTTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007536 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007537 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7538 (VCVTTPD2DQZ128rm addr:$src)>;
7539 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007540 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007541 (VCVTTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007542 }
Craig Topperd7467472017-10-14 04:18:09 +00007543
7544 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7545 (VCVTDQ2PDZ128rm addr:$src)>;
7546 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7547 (VCVTDQ2PDZ128rm addr:$src)>;
7548
7549 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7550 (VCVTUDQ2PDZ128rm addr:$src)>;
7551 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7552 (VCVTUDQ2PDZ128rm addr:$src)>;
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007553}
7554
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007555let Predicates = [HasAVX512] in {
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007556 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007557 (VCVTPD2PSZrm addr:$src)>;
7558 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7559 (VCVTPS2PDZrm addr:$src)>;
7560}
7561
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007562let Predicates = [HasDQI, HasVLX] in {
7563 let AddedComplexity = 15 in {
7564 def : Pat<(X86vzmovl (v2f64 (bitconvert
7565 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007566 (VCVTQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007567 def : Pat<(X86vzmovl (v2f64 (bitconvert
7568 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007569 (VCVTUQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007570 }
7571}
7572
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007573let Predicates = [HasDQI, NoVLX] in {
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007574def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7575 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7576 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7577 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7578
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007579def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7580 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7581 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7582 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7583
7584def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7585 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7586 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7587 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7588
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007589def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7590 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7591 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7592 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7593
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007594def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7595 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7596 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7597 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7598
7599def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7600 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7601 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7602 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7603
7604def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7605 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7606 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7607 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7608
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007609def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7610 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7611 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7612 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7613
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007614def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7615 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7616 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7617 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7618
7619def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7620 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7621 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7622 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7623
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007624def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7625 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7626 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7627 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7628
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007629def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7630 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7631 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7632 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7633}
7634
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007635//===----------------------------------------------------------------------===//
7636// Half precision conversion instructions
7637//===----------------------------------------------------------------------===//
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007638
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007639multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007640 X86MemOperand x86memop, PatFrag ld_frag,
7641 OpndItins itins> {
Craig Toppercf8e6d02017-11-07 07:13:03 +00007642 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7643 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007644 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7645 T8PD, Sched<[itins.Sched]>;
Craig Toppercf8e6d02017-11-07 07:13:03 +00007646 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7647 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7648 (X86cvtph2ps (_src.VT
7649 (bitconvert
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007650 (ld_frag addr:$src)))), itins.rm>,
7651 T8PD, Sched<[itins.Sched.Folded]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007652}
7653
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007654multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7655 OpndItins itins> {
Craig Topperc89e2822017-12-10 09:14:38 +00007656 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7657 (ins _src.RC:$src), "vcvtph2ps",
7658 "{sae}, $src", "$src, {sae}",
7659 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7660 (i32 FROUND_NO_EXC)), itins.rr>,
7661 T8PD, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007662}
7663
Craig Toppere7fb3002017-11-07 07:13:07 +00007664let Predicates = [HasAVX512] in
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007665 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7666 SSE_CVT_PH2PS>,
7667 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
Asaf Badouh7c522452015-10-22 14:01:16 +00007668 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007669
7670let Predicates = [HasVLX] in {
7671 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007672 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7673 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007674 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007675 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7676 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007677
7678 // Pattern match vcvtph2ps of a scalar i64 load.
7679 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7680 (VCVTPH2PSZ128rm addr:$src)>;
7681 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7682 (VCVTPH2PSZ128rm addr:$src)>;
7683 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7684 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7685 (VCVTPH2PSZ128rm addr:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007686}
7687
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007688multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007689 X86MemOperand x86memop, OpndItins itins> {
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007690 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007691 (ins _src.RC:$src1, i32u8imm:$src2),
7692 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007693 (X86cvtps2ph (_src.VT _src.RC:$src1),
Craig Topperd8688702016-09-21 03:58:44 +00007694 (i32 imm:$src2)),
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007695 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007696 let hasSideEffects = 0, mayStore = 1 in {
7697 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7698 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7699 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007700 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007701 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7702 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7703 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007704 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007705 }
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007706}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007707
7708multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7709 OpndItins itins> {
Craig Topperd8688702016-09-21 03:58:44 +00007710 let hasSideEffects = 0 in
Craig Topper1de942b2017-12-10 17:42:44 +00007711 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
Craig Topperd8688702016-09-21 03:58:44 +00007712 (outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007713 (ins _src.RC:$src1, i32u8imm:$src2),
7714 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007715 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007716}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007717
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007718let Predicates = [HasAVX512] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007719 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7720 SSE_CVT_PS2PH>,
7721 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7722 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7723 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007724 let Predicates = [HasVLX] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007725 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7726 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7727 EVEX_CD8<32, CD8VH>;
7728 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7729 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7730 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007731 }
Craig Topper65e6d0b2017-11-08 04:00:31 +00007732
7733 def : Pat<(store (f64 (extractelt
7734 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7735 (iPTR 0))), addr:$dst),
7736 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7737 def : Pat<(store (i64 (extractelt
7738 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7739 (iPTR 0))), addr:$dst),
7740 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7741 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7742 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7743 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7744 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007745}
Asaf Badouh2489f352015-12-02 08:17:51 +00007746
Craig Topper9820e342016-09-20 05:44:47 +00007747// Patterns for matching conversions from float to half-float and vice versa.
Craig Topperb3b50332016-09-19 02:53:37 +00007748let Predicates = [HasVLX] in {
7749 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7750 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7751 // configurations we support (the default). However, falling back to MXCSR is
7752 // more consistent with other instructions, which are always controlled by it.
7753 // It's encoded as 0b100.
7754 def : Pat<(fp_to_f16 FR32X:$src),
7755 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7756 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7757
7758 def : Pat<(f16_to_fp GR16:$src),
7759 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7760 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7761
7762 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7763 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7764 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7765}
7766
Asaf Badouh2489f352015-12-02 08:17:51 +00007767// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
Craig Topper7e664da2016-09-24 21:42:43 +00007768multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007769 string OpcodeStr, OpndItins itins> {
Craig Topper07a7d562017-07-23 03:59:39 +00007770 let hasSideEffects = 0 in
Craig Topperc89e2822017-12-10 09:14:38 +00007771 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7772 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7773 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7774 Sched<[itins.Sched]>;
Asaf Badouh2489f352015-12-02 08:17:51 +00007775}
7776
7777let Defs = [EFLAGS], Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007778 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007779 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007780 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007781 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007782 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007783 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007784 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007785 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7786}
7787
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007788let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7789 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007790 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007791 EVEX_CD8<32, CD8VT1>;
7792 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007793 "ucomisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007794 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7795 let Pattern = []<dag> in {
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007796 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007797 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007798 EVEX_CD8<32, CD8VT1>;
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007799 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007800 "comisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007801 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7802 }
Craig Topper9dd48c82014-01-02 17:28:14 +00007803 let isCodeGenOnly = 1 in {
Craig Topper00265772018-01-23 21:37:51 +00007804 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7805 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7806 EVEX_CD8<32, CD8VT1>;
7807 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7808 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7809 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007810
Craig Topper00265772018-01-23 21:37:51 +00007811 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7812 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7813 EVEX_CD8<32, CD8VT1>;
7814 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7815 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7816 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00007817 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007818}
Michael Liao5bf95782014-12-04 05:20:33 +00007819
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007820/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
Asaf Badouheaf2da12015-09-21 10:23:53 +00007821multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007822 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007823 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Asaf Badouheaf2da12015-09-21 10:23:53 +00007824 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7825 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7826 "$src2, $src1", "$src1, $src2",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007827 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7828 EVEX_4V, Sched<[itins.Sched]>;
Asaf Badouheaf2da12015-09-21 10:23:53 +00007829 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00007830 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouheaf2da12015-09-21 10:23:53 +00007831 "$src2, $src1", "$src1, $src2",
7832 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007833 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7834 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007835}
7836}
7837
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007838defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007839 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007840defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007841 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007842defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007843 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007844defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007845 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007846
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007847/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7848multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007849 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007850 let ExeDomain = _.ExeDomain in {
Robert Khasanov3e534c92014-10-28 16:37:13 +00007851 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7852 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007853 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7854 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007855 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7856 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7857 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007858 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7859 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007860 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7861 (ins _.ScalarMemOp:$src), OpcodeStr,
7862 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7863 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007864 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7865 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007866 }
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007867}
Robert Khasanov3e534c92014-10-28 16:37:13 +00007868
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007869multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7870 SizeItins itins> {
7871 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7872 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7873 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7874 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov3e534c92014-10-28 16:37:13 +00007875
7876 // Define only if AVX512VL feature is present.
7877 let Predicates = [HasVLX] in {
7878 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007879 OpNode, itins.s, v4f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007880 EVEX_V128, EVEX_CD8<32, CD8VF>;
7881 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007882 OpNode, itins.s, v8f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007883 EVEX_V256, EVEX_CD8<32, CD8VF>;
7884 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007885 OpNode, itins.d, v2f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007886 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7887 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007888 OpNode, itins.d, v4f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007889 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7890 }
7891}
7892
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007893defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7894defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007895
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007896/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007897multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007898 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007899 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007900 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7901 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7902 "$src2, $src1", "$src1, $src2",
7903 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007904 (i32 FROUND_CURRENT)), itins.rr>,
7905 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007906
7907 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7908 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00007909 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007910 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007911 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
Craig Toppera2f55282017-12-10 03:16:36 +00007912 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007913
7914 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper512e9e72017-11-19 05:42:54 +00007915 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007916 "$src2, $src1", "$src1, $src2",
Craig Topper512e9e72017-11-19 05:42:54 +00007917 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007918 (i32 FROUND_CURRENT)), itins.rm>,
7919 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007920 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007921}
7922
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007923multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7924 SizeItins itins> {
7925 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007926 EVEX_CD8<32, CD8VT1>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007927 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007928 EVEX_CD8<64, CD8VT1>, VEX_W;
7929}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007930
Craig Toppere1cac152016-06-07 07:27:54 +00007931let Predicates = [HasERI] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007932 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7933 T8PD, EVEX_4V;
7934 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7935 T8PD, EVEX_4V;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007936}
Igor Breger8352a0d2015-07-28 06:53:28 +00007937
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007938defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7939 T8PD, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007940/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007941
7942multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007943 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007944 let ExeDomain = _.ExeDomain in {
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007945 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7946 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007947 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7948 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007949
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007950 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7951 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7952 (OpNode (_.FloatVT
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007953 (bitconvert (_.LdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007954 (i32 FROUND_CURRENT)), itins.rm>,
7955 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007956
7957 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007958 (ins _.ScalarMemOp:$src), OpcodeStr,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007959 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007960 (OpNode (_.FloatVT
7961 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007962 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7963 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007964 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007965}
Asaf Badouh402ebb32015-06-03 13:41:48 +00007966multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007967 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007968 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00007969 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7970 (ins _.RC:$src), OpcodeStr,
7971 "{sae}, $src", "$src, {sae}",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007972 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7973 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007974}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007975
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007976multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7977 SizeItins itins> {
7978 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7979 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007980 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007981 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7982 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007983 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007984}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007985
Asaf Badouh402ebb32015-06-03 13:41:48 +00007986multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007987 SDNode OpNode, SizeItins itins> {
Asaf Badouh402ebb32015-06-03 13:41:48 +00007988 // Define only if AVX512VL feature is present.
7989 let Predicates = [HasVLX] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007990 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007991 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007992 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007993 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007994 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007995 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007996 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007997 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7998 }
7999}
Craig Toppere1cac152016-06-07 07:27:54 +00008000let Predicates = [HasERI] in {
Michael Liao5bf95782014-12-04 05:20:33 +00008001
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008002 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
8003 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
8004 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00008005}
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008006defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
8007 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8008 SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00008009
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008010multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00008011 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00008012 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00008013 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8014 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008015 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
8016 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00008017}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00008018
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008019multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00008020 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00008021 let ExeDomain = _.ExeDomain in {
Robert Khasanov1cf354c2014-10-28 18:22:41 +00008022 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Robert Khasanoveb126392014-10-28 18:15:20 +00008023 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008024 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
8025 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008026 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8027 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper80405072017-11-11 08:24:12 +00008028 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008029 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
8030 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008031 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8032 (ins _.ScalarMemOp:$src), OpcodeStr,
8033 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Craig Topper80405072017-11-11 08:24:12 +00008034 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008035 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
8036 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00008037 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008038}
8039
Craig Topper80405072017-11-11 08:24:12 +00008040multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008041 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008042 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008043 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008044 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8045 // Define only if AVX512VL feature is present.
8046 let Predicates = [HasVLX] in {
8047 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008048 SSE_SQRTPS, v4f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008049 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8050 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008051 SSE_SQRTPS, v8f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008052 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8053 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008054 SSE_SQRTPD, v2f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008055 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8056 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008057 SSE_SQRTPD, v4f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008058 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8059 }
8060}
8061
Craig Topper80405072017-11-11 08:24:12 +00008062multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008063 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008064 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008065 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008066 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8067}
8068
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008069multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
8070 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
Craig Topper176f3312017-02-25 19:18:11 +00008071 let ExeDomain = _.ExeDomain in {
Clement Courbet41a13742018-01-15 12:05:33 +00008072 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008073 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8074 "$src2, $src1", "$src1, $src2",
Craig Topper80405072017-11-11 08:24:12 +00008075 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008076 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008077 (i32 FROUND_CURRENT)), itins.rr>,
8078 Sched<[itins.Sched]>;
Clement Courbet41a13742018-01-15 12:05:33 +00008079 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8080 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8081 "$src2, $src1", "$src1, $src2",
8082 (X86fsqrtRnds (_.VT _.RC:$src1),
8083 _.ScalarIntMemCPat:$src2,
8084 (i32 FROUND_CURRENT)), itins.rm>,
8085 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8086 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008087 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8088 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Topper80405072017-11-11 08:24:12 +00008089 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008090 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008091 (i32 imm:$rc)), itins.rr>,
Craig Toppera2f55282017-12-10 03:16:36 +00008092 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008093
Clement Courbet41a13742018-01-15 12:05:33 +00008094 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8095 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8096 (ins _.FRC:$src1, _.FRC:$src2),
8097 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8098 itins.rr>, Sched<[itins.Sched]>;
8099 let mayLoad = 1 in
8100 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8101 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8102 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8103 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
8104 }
Craig Topper176f3312017-02-25 19:18:11 +00008105 }
Igor Breger4c4cd782015-09-20 09:13:41 +00008106
Clement Courbet41a13742018-01-15 12:05:33 +00008107 let Predicates = [HasAVX512] in {
8108 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8109 (!cast<Instruction>(NAME#SUFF#Zr)
8110 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008111
Clement Courbet41a13742018-01-15 12:05:33 +00008112 def : Pat<(Intr VR128X:$src),
8113 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
Craig Toppereff606c2017-11-06 04:04:01 +00008114 VR128X:$src)>;
Clement Courbet41a13742018-01-15 12:05:33 +00008115 }
Craig Toppereff606c2017-11-06 04:04:01 +00008116
Clement Courbet41a13742018-01-15 12:05:33 +00008117 let Predicates = [HasAVX512, OptForSize] in {
8118 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8119 (!cast<Instruction>(NAME#SUFF#Zm)
8120 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
Craig Toppereff606c2017-11-06 04:04:01 +00008121
Clement Courbet41a13742018-01-15 12:05:33 +00008122 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
8123 (!cast<Instruction>(NAME#SUFF#Zm_Int)
8124 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
8125 }
Craig Topperd6471cb2017-11-05 21:14:06 +00008126}
Igor Breger4c4cd782015-09-20 09:13:41 +00008127
8128multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008129 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
Craig Topper80405072017-11-11 08:24:12 +00008130 int_x86_sse_sqrt_ss>,
Craig Toppereff606c2017-11-06 04:04:01 +00008131 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008132 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
Craig Topper80405072017-11-11 08:24:12 +00008133 int_x86_sse2_sqrt_sd>,
Craig Toppereff606c2017-11-06 04:04:01 +00008134 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00008135 NotMemoryFoldable;
Igor Breger4c4cd782015-09-20 09:13:41 +00008136}
8137
Craig Topper80405072017-11-11 08:24:12 +00008138defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
8139 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008140
Igor Breger4c4cd782015-09-20 09:13:41 +00008141defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008142
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008143multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8144 OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008145 let ExeDomain = _.ExeDomain in {
Craig Topper0ccec702017-11-11 08:24:15 +00008146 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008147 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8148 "$src3, $src2, $src1", "$src1, $src2, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008149 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008150 (i32 imm:$src3))), itins.rr>,
8151 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008152
Craig Topper0ccec702017-11-11 08:24:15 +00008153 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008154 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008155 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
Craig Topper0af48f12017-11-13 02:02:58 +00008156 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008157 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
8158 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008159
Craig Topper0ccec702017-11-11 08:24:15 +00008160 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperbece74c2017-11-19 06:24:26 +00008161 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008162 OpcodeStr,
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008163 "$src3, $src2, $src1", "$src1, $src2, $src3",
Craig Topperdeee24b2017-11-13 02:03:01 +00008164 (_.VT (X86RndScales _.RC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008165 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
8166 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008167
Clement Courbetda1fad32018-01-15 14:24:07 +00008168 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
Craig Topper0ccec702017-11-11 08:24:15 +00008169 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8170 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8171 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008172 [], itins.rr>, Sched<[itins.Sched]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008173
8174 let mayLoad = 1 in
8175 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8176 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8177 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008178 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008179 }
8180 }
8181
8182 let Predicates = [HasAVX512] in {
8183 def : Pat<(ffloor _.FRC:$src),
8184 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8185 _.FRC:$src, (i32 0x9)))>;
8186 def : Pat<(fceil _.FRC:$src),
8187 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8188 _.FRC:$src, (i32 0xa)))>;
8189 def : Pat<(ftrunc _.FRC:$src),
8190 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8191 _.FRC:$src, (i32 0xb)))>;
8192 def : Pat<(frint _.FRC:$src),
8193 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8194 _.FRC:$src, (i32 0x4)))>;
8195 def : Pat<(fnearbyint _.FRC:$src),
8196 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8197 _.FRC:$src, (i32 0xc)))>;
8198 }
8199
8200 let Predicates = [HasAVX512, OptForSize] in {
8201 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8202 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8203 addr:$src, (i32 0x9)))>;
8204 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8205 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8206 addr:$src, (i32 0xa)))>;
8207 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8208 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8209 addr:$src, (i32 0xb)))>;
8210 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8211 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8212 addr:$src, (i32 0x4)))>;
8213 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8214 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8215 addr:$src, (i32 0xc)))>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008216 }
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00008217}
8218
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008219defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8220 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008221
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008222defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8223 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8224 EVEX_CD8<64, CD8VT1>;
Eric Christopher0d94fa92015-02-20 00:45:28 +00008225
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008226//-------------------------------------------------
8227// Integer truncate and extend operations
8228//-------------------------------------------------
8229
Simon Pilgrim833c2602017-12-05 19:21:28 +00008230let Sched = WriteShuffle256 in
8231def AVX512_EXTEND : OpndItins<
8232 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8233>;
8234
8235let Sched = WriteShuffle256 in
8236def AVX512_TRUNCATE : OpndItins<
8237 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8238>;
8239
Igor Breger074a64e2015-07-24 17:24:15 +00008240multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008241 OpndItins itins, X86VectorVTInfo SrcInfo,
8242 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
Craig Topper52e2e832016-07-22 05:46:44 +00008243 let ExeDomain = DestInfo.ExeDomain in
Igor Breger074a64e2015-07-24 17:24:15 +00008244 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8245 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008246 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8247 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
Igor Breger074a64e2015-07-24 17:24:15 +00008248
Craig Topper52e2e832016-07-22 05:46:44 +00008249 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8250 ExeDomain = DestInfo.ExeDomain in {
Igor Breger074a64e2015-07-24 17:24:15 +00008251 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8252 (ins x86memop:$dst, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008253 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008254 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008255
Igor Breger074a64e2015-07-24 17:24:15 +00008256 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8257 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008258 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008259 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
Craig Topper99f6b622016-05-01 01:03:56 +00008260 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008261}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008262
Igor Breger074a64e2015-07-24 17:24:15 +00008263multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8264 X86VectorVTInfo DestInfo,
8265 PatFrag truncFrag, PatFrag mtruncFrag > {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008266
Igor Breger074a64e2015-07-24 17:24:15 +00008267 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8268 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8269 addr:$dst, SrcInfo.RC:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008270
Igor Breger074a64e2015-07-24 17:24:15 +00008271 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8272 (SrcInfo.VT SrcInfo.RC:$src)),
8273 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8274 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8275}
8276
Craig Topperb2868232018-01-14 08:11:36 +00008277multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
8278 SDNode OpNode256, SDNode OpNode512, OpndItins itins,
8279 AVX512VLVectorVTInfo VTSrcInfo,
8280 X86VectorVTInfo DestInfoZ128,
8281 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8282 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8283 X86MemOperand x86memopZ, PatFrag truncFrag,
8284 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
Igor Breger074a64e2015-07-24 17:24:15 +00008285
8286 let Predicates = [HasVLX, prd] in {
Craig Topperb2868232018-01-14 08:11:36 +00008287 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008288 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
Igor Breger074a64e2015-07-24 17:24:15 +00008289 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8290 truncFrag, mtruncFrag>, EVEX_V128;
8291
Craig Topperb2868232018-01-14 08:11:36 +00008292 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008293 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
Igor Breger074a64e2015-07-24 17:24:15 +00008294 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8295 truncFrag, mtruncFrag>, EVEX_V256;
8296 }
8297 let Predicates = [prd] in
Craig Topperb2868232018-01-14 08:11:36 +00008298 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008299 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
Igor Breger074a64e2015-07-24 17:24:15 +00008300 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8301 truncFrag, mtruncFrag>, EVEX_V512;
8302}
8303
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008304multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008305 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008306 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8307 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, itins,
8308 avx512vl_i64_info, v16i8x_info, v16i8x_info,
8309 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
8310 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
Igor Breger074a64e2015-07-24 17:24:15 +00008311}
8312
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008313multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008314 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008315 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8316 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8317 avx512vl_i64_info, v8i16x_info, v8i16x_info,
8318 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
8319 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008320}
8321
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008322multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008323 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008324 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8325 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8326 avx512vl_i64_info, v4i32x_info, v4i32x_info,
8327 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
8328 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008329}
8330
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008331multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008332 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008333 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8334 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8335 avx512vl_i32_info, v16i8x_info, v16i8x_info,
8336 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
8337 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008338}
8339
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008340multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008341 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008342 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8343 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8344 avx512vl_i32_info, v8i16x_info, v8i16x_info,
8345 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
8346 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008347}
8348
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008349multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008350 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008351 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8352 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
8353 itins, avx512vl_i16_info, v16i8x_info, v16i8x_info,
8354 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
8355 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008356}
8357
Craig Topperb2868232018-01-14 08:11:36 +00008358defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE,
8359 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008360defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008361 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008362defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008363 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008364
Craig Topperb2868232018-01-14 08:11:36 +00008365defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE,
8366 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008367defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008368 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008369defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008370 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008371
Craig Topperb2868232018-01-14 08:11:36 +00008372defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE,
8373 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008374defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008375 truncstore_s_vi32, masked_truncstore_s_vi32>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008376defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008377 truncstore_us_vi32, masked_truncstore_us_vi32>;
Igor Breger074a64e2015-07-24 17:24:15 +00008378
Craig Topperb2868232018-01-14 08:11:36 +00008379defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE,
8380 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008381defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008382 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008383defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008384 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008385
Craig Topperb2868232018-01-14 08:11:36 +00008386defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE,
8387 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008388defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008389 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008390defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008391 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008392
Craig Topperb2868232018-01-14 08:11:36 +00008393defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE,
8394 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008395defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008396 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008397defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008398 truncstore_us_vi8, masked_truncstore_us_vi8>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008399
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008400let Predicates = [HasAVX512, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008401def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008402 (v8i16 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008403 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008404 VR256X:$src, sub_ymm)))), sub_xmm))>;
Craig Topperb2868232018-01-14 08:11:36 +00008405def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008406 (v4i32 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008407 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008408 VR256X:$src, sub_ymm)))), sub_xmm))>;
8409}
8410
8411let Predicates = [HasBWI, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008412def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
Craig Topper61403202016-09-19 02:53:43 +00008413 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008414 VR256X:$src, sub_ymm))), sub_xmm))>;
8415}
8416
Simon Pilgrim833c2602017-12-05 19:21:28 +00008417multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
Igor Breger2ba64ab2016-05-22 10:21:04 +00008418 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
Craig Topper6694df12018-02-25 06:21:04 +00008419 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
Craig Topper52e2e832016-07-22 05:46:44 +00008420 let ExeDomain = DestInfo.ExeDomain in {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008421 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8422 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008423 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8424 EVEX, Sched<[itins.Sched]>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008425
Craig Toppere1cac152016-06-07 07:27:54 +00008426 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8427 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008428 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8429 EVEX, Sched<[itins.Sched.Folded]>;
Craig Topper52e2e832016-07-22 05:46:44 +00008430 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008431}
8432
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008433multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008434 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008435 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008436 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008437 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008438 v16i8x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008439 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008440
Simon Pilgrim833c2602017-12-05 19:21:28 +00008441 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008442 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008443 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008444 }
8445 let Predicates = [HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008446 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
Craig Topper6840f112016-07-14 06:41:34 +00008447 v32i8x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008448 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008449 }
8450}
8451
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008452multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008453 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008454 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008455 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008456 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008457 v16i8x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008458 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008459
Simon Pilgrim833c2602017-12-05 19:21:28 +00008460 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008461 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008462 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008463 }
8464 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008465 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008466 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008467 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008468 }
8469}
8470
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008471multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008472 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008473 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008474 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008475 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008476 v16i8x_info, i16mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008477 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008478
Simon Pilgrim833c2602017-12-05 19:21:28 +00008479 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008480 v16i8x_info, i32mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008481 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008482 }
8483 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008484 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008485 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008486 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008487 }
8488}
8489
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008490multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008491 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008492 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008493 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008494 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008495 v8i16x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008496 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008497
Simon Pilgrim833c2602017-12-05 19:21:28 +00008498 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008499 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008500 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008501 }
8502 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008503 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008504 v16i16x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008505 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008506 }
8507}
8508
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008509multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008510 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008511 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008512 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008513 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008514 v8i16x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008515 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008516
Simon Pilgrim833c2602017-12-05 19:21:28 +00008517 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008518 v8i16x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008519 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008520 }
8521 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008522 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008523 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008524 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008525 }
8526}
8527
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008528multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
Craig Topper6694df12018-02-25 06:21:04 +00008529 SDNode OpNode, SDNode InVecNode, string ExtTy,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008530 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008531
8532 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008533 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008534 v4i32x_info, i64mem, LdFrag, InVecNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008535 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8536
Simon Pilgrim833c2602017-12-05 19:21:28 +00008537 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008538 v4i32x_info, i128mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008539 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8540 }
8541 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008542 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008543 v8i32x_info, i256mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008544 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8545 }
8546}
8547
Simon Pilgrim833c2602017-12-05 19:21:28 +00008548defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8549defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8550defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8551defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8552defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8553defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008554
Simon Pilgrim833c2602017-12-05 19:21:28 +00008555defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8556defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8557defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8558defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8559defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8560defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008561
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008562
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008563multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8564 SDNode InVecOp, PatFrag ExtLoad16> {
Craig Topper64378f42016-10-09 23:08:39 +00008565 // 128-bit patterns
8566 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008567 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008568 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008569 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008570 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008571 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008572 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008573 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008574 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008575 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008576 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8577 }
8578 let Predicates = [HasVLX] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008579 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008580 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008581 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008582 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008583 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008584 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008585 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008586 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8587
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008588 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008589 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008590 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008591 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008592 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008593 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008594 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008595 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8596
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008597 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008598 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008599 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008600 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008601 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008602 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008603 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008604 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008605 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008606 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8607
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008608 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008609 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008610 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008611 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008612 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008613 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008614 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008615 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8616
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008617 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008618 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008619 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008620 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008621 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008622 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008623 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008624 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008625 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008626 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8627 }
8628 // 256-bit patterns
8629 let Predicates = [HasVLX, HasBWI] in {
8630 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8631 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8632 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8633 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8634 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8635 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8636 }
8637 let Predicates = [HasVLX] in {
8638 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8639 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8640 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8641 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8642 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8643 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8644 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8645 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8646
8647 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8648 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8649 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8650 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8651 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8652 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8653 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8654 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8655
8656 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8657 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8658 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8659 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8660 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8661 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8662
8663 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8664 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8665 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8666 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8667 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8668 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8669 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8670 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8671
8672 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8673 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8674 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8675 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8676 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8677 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8678 }
8679 // 512-bit patterns
8680 let Predicates = [HasBWI] in {
8681 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8682 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8683 }
8684 let Predicates = [HasAVX512] in {
8685 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8686 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8687
8688 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8689 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper9ece2f72016-10-10 06:25:48 +00008690 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8691 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper64378f42016-10-09 23:08:39 +00008692
8693 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8694 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8695
8696 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8697 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8698
8699 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8700 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8701 }
8702}
8703
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008704defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8705defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
Craig Topper64378f42016-10-09 23:08:39 +00008706
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008707//===----------------------------------------------------------------------===//
8708// GATHER - SCATTER Operations
8709
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008710// FIXME: Improve scheduling of gather/scatter instructions.
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008711multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper16a91ce2017-11-15 07:46:43 +00008712 X86MemOperand memop, PatFrag GatherNode,
8713 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008714 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8715 ExeDomain = _.ExeDomain in
Craig Topper16a91ce2017-11-15 07:46:43 +00008716 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8717 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008718 !strconcat(OpcodeStr#_.Suffix,
Craig Topperedb09112014-11-25 20:11:23 +00008719 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Craig Topper16a91ce2017-11-15 07:46:43 +00008720 [(set _.RC:$dst, MaskRC:$mask_wb,
8721 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008722 vectoraddr:$src2))]>, EVEX, EVEX_K,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008723 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008724}
Cameron McInally45325962014-03-26 13:50:50 +00008725
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008726multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8727 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8728 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008729 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008730 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008731 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008732let Predicates = [HasVLX] in {
8733 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008734 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008735 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008736 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008737 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008738 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008739 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008740 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008741}
Cameron McInally45325962014-03-26 13:50:50 +00008742}
8743
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008744multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8745 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008746 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008747 mgatherv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008748 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008749 mgatherv8i64>, EVEX_V512;
8750let Predicates = [HasVLX] in {
8751 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008752 vy256xmem, mgatherv8i32>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008753 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008754 vy128xmem, mgatherv4i64>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008755 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008756 vx128xmem, mgatherv4i32>, EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008757 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Craig Topperc1e7b3f2017-11-22 07:11:03 +00008758 vx64xmem, mgatherv2i64, VK2WM>,
Craig Topper16a91ce2017-11-15 07:46:43 +00008759 EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008760}
Cameron McInally45325962014-03-26 13:50:50 +00008761}
Michael Liao5bf95782014-12-04 05:20:33 +00008762
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008763
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008764defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8765 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8766
8767defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8768 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008769
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008770multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper0b590342018-01-11 06:31:28 +00008771 X86MemOperand memop, PatFrag ScatterNode,
8772 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008773
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008774let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008775
Craig Topper0b590342018-01-11 06:31:28 +00008776 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
8777 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008778 !strconcat(OpcodeStr#_.Suffix,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008779 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Craig Topper0b590342018-01-11 06:31:28 +00008780 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8781 MaskRC:$mask, vectoraddr:$dst))]>,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008782 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8783 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008784}
8785
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008786multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8787 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8788 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008789 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008790 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008791 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008792let Predicates = [HasVLX] in {
8793 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008794 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008795 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008796 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008797 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008798 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008799 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008800 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008801}
Cameron McInally45325962014-03-26 13:50:50 +00008802}
8803
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008804multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8805 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008806 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008807 mscatterv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008808 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008809 mscatterv8i64>, EVEX_V512;
8810let Predicates = [HasVLX] in {
8811 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008812 vy256xmem, mscatterv8i32>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008813 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008814 vy128xmem, mscatterv4i64>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008815 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008816 vx128xmem, mscatterv4i32>, EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008817 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Craig Topper0b590342018-01-11 06:31:28 +00008818 vx64xmem, mscatterv2i64, VK2WM>,
8819 EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008820}
Cameron McInally45325962014-03-26 13:50:50 +00008821}
8822
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008823defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8824 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008825
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008826defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8827 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008828
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008829// prefetch
8830multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8831 RegisterClass KRC, X86MemOperand memop> {
8832 let Predicates = [HasPFI], hasSideEffects = 1 in
8833 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008834 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008835 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008836}
8837
8838defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008839 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008840
8841defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008842 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008843
8844defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008845 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008846
8847defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008848 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008849
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008850defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008851 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008852
8853defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008854 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008855
8856defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008857 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008858
8859defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008860 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008861
8862defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008863 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008864
8865defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008866 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008867
8868defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008869 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008870
8871defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008872 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008873
8874defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008875 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008876
8877defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008878 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008879
8880defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008881 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008882
8883defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008884 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008885
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008886multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008887def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008888 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
Craig Topper0321ebc2018-01-24 04:51:17 +00008889 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008890 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008891}
Michael Liao5bf95782014-12-04 05:20:33 +00008892
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008893multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8894 string OpcodeStr, Predicate prd> {
8895let Predicates = [prd] in
8896 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8897
8898 let Predicates = [prd, HasVLX] in {
8899 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8900 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8901 }
8902}
8903
Michael Zuckerman85436ec2017-03-23 09:57:01 +00008904defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8905defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8906defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8907defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008908
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008909multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
Igor Bregerfca0a342016-01-28 13:19:25 +00008910 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8911 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topperf090e8a2018-01-08 06:53:54 +00008912 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008913 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Igor Bregerfca0a342016-01-28 13:19:25 +00008914}
8915
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008916// Use 512bit version to implement 128/256 bit in case NoVLX.
8917multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00008918 X86VectorVTInfo _> {
8919
Craig Topperf090e8a2018-01-08 06:53:54 +00008920 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
Igor Bregerfca0a342016-01-28 13:19:25 +00008921 (_.KVT (COPY_TO_REGCLASS
8922 (!cast<Instruction>(NAME#"Zrr")
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008923 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00008924 _.RC:$src, _.SubRegIdx)),
8925 _.KRC))>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008926}
8927
8928multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
Igor Bregerfca0a342016-01-28 13:19:25 +00008929 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8930 let Predicates = [prd] in
8931 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8932 EVEX_V512;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008933
8934 let Predicates = [prd, HasVLX] in {
8935 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008936 EVEX_V256;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008937 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008938 EVEX_V128;
8939 }
8940 let Predicates = [prd, NoVLX] in {
8941 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8942 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008943 }
8944}
8945
8946defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8947 avx512vl_i8_info, HasBWI>;
8948defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8949 avx512vl_i16_info, HasBWI>, VEX_W;
8950defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8951 avx512vl_i32_info, HasDQI>;
8952defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8953 avx512vl_i64_info, HasDQI>, VEX_W;
8954
Craig Topper0321ebc2018-01-24 04:51:17 +00008955// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
8956// is available, but BWI is not. We can't handle this in lowering because
8957// a target independent DAG combine likes to combine sext and trunc.
8958let Predicates = [HasDQI, NoBWI] in {
8959 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
8960 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8961 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
8962 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8963}
8964
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008965//===----------------------------------------------------------------------===//
8966// AVX-512 - COMPRESS and EXPAND
8967//
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008968
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008969// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8970let Sched = WriteShuffle256 in {
8971def AVX512_COMPRESS : OpndItins<
8972 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8973>;
8974def AVX512_EXPAND : OpndItins<
8975 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8976>;
8977}
8978
Ayman Musad7a5ed42016-09-26 06:22:08 +00008979multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008980 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008981 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00008982 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008983 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8984 Sched<[itins.Sched]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008985
Craig Toppere1cac152016-06-07 07:27:54 +00008986 let mayStore = 1, hasSideEffects = 0 in
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008987 def mr : AVX5128I<opc, MRMDestMem, (outs),
8988 (ins _.MemOp:$dst, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008989 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008990 []>, EVEX_CD8<_.EltSize, CD8VT1>,
8991 Sched<[itins.Sched.Folded]>;
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008992
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008993 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8994 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008995 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Ayman Musad7a5ed42016-09-26 06:22:08 +00008996 []>,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008997 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8998 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008999}
9000
Ayman Musad7a5ed42016-09-26 06:22:08 +00009001multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
Ayman Musad7a5ed42016-09-26 06:22:08 +00009002 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
9003 (_.VT _.RC:$src)),
9004 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
9005 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9006}
9007
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009008multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009009 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009010 AVX512VLVectorVTInfo VTInfo,
9011 Predicate Pred = HasAVX512> {
9012 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009013 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009014 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009015
Coby Tayree71e37cc2017-11-21 09:48:44 +00009016 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009017 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009018 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009019 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009020 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009021 }
9022}
9023
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009024defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
9025 avx512vl_i32_info>, EVEX;
9026defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
9027 avx512vl_i64_info>, EVEX, VEX_W;
9028defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
9029 avx512vl_f32_info>, EVEX;
9030defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
9031 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009032
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009033// expand
9034multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009035 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009036 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00009037 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009038 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
9039 Sched<[itins.Sched]>;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00009040
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009041 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9042 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9043 (_.VT (X86expand (_.VT (bitconvert
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009044 (_.LdFrag addr:$src1))))), itins.rm>,
9045 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9046 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009047}
9048
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009049multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
9050
9051 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9052 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
9053 _.KRCWM:$mask, addr:$src)>;
9054
9055 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9056 (_.VT _.RC:$src0))),
9057 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
9058 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9059}
9060
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009061multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009062 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009063 AVX512VLVectorVTInfo VTInfo,
9064 Predicate Pred = HasAVX512> {
9065 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009066 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009067 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009068
Coby Tayree71e37cc2017-11-21 09:48:44 +00009069 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009070 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009071 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009072 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009073 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009074 }
9075}
9076
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009077defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
9078 avx512vl_i32_info>, EVEX;
9079defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
9080 avx512vl_i64_info>, EVEX, VEX_W;
9081defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
9082 avx512vl_f32_info>, EVEX;
9083defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
9084 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009085
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009086//handle instruction reg_vec1 = op(reg_vec,imm)
9087// op(mem_vec,imm)
9088// op(broadcast(eltVt),imm)
9089//all instruction created with FROUND_CURRENT
9090multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009091 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009092 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009093 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9094 (ins _.RC:$src1, i32u8imm:$src2),
Igor Breger252c2d92016-02-22 12:37:41 +00009095 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009096 (OpNode (_.VT _.RC:$src1),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009097 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009098 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9099 (ins _.MemOp:$src1, i32u8imm:$src2),
9100 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9101 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009102 (i32 imm:$src2)), itins.rm>,
9103 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009104 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9105 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9106 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9107 "${src1}"##_.BroadcastStr##", $src2",
9108 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009109 (i32 imm:$src2)), itins.rm>, EVEX_B,
9110 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009111 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009112}
9113
9114//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9115multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009116 SDNode OpNode, OpndItins itins,
9117 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009118 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009119 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9120 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009121 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009122 "$src1, {sae}, $src2",
9123 (OpNode (_.VT _.RC:$src1),
9124 (i32 imm:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009125 (i32 FROUND_NO_EXC)), itins.rr>,
9126 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009127}
9128
9129multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009130 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009131 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009132 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009133 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9134 _.info512>,
9135 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9136 itins, _.info512>, EVEX_V512;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009137 }
9138 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009139 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9140 _.info128>, EVEX_V128;
9141 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9142 _.info256>, EVEX_V256;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009143 }
9144}
9145
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009146//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9147// op(reg_vec2,mem_vec,imm)
9148// op(reg_vec2,broadcast(eltVt),imm)
9149//all instruction created with FROUND_CURRENT
9150multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009151 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +00009152 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009153 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009154 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009155 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9156 (OpNode (_.VT _.RC:$src1),
9157 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009158 (i32 imm:$src3)), itins.rr>,
9159 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009160 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9161 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9162 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9163 (OpNode (_.VT _.RC:$src1),
9164 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009165 (i32 imm:$src3)), itins.rm>,
9166 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009167 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9168 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9169 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9170 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9171 (OpNode (_.VT _.RC:$src1),
9172 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009173 (i32 imm:$src3)), itins.rm>, EVEX_B,
9174 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009175 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009176}
9177
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009178//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9179// op(reg_vec2,mem_vec,imm)
Igor Breger2ae0fe32015-08-31 11:14:02 +00009180multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009181 OpndItins itins, X86VectorVTInfo DestInfo,
9182 X86VectorVTInfo SrcInfo>{
Craig Topper05948fb2016-08-02 05:11:15 +00009183 let ExeDomain = DestInfo.ExeDomain in {
Igor Breger2ae0fe32015-08-31 11:14:02 +00009184 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9185 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9186 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9187 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9188 (SrcInfo.VT SrcInfo.RC:$src2),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009189 (i8 imm:$src3))), itins.rr>,
9190 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009191 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9192 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9193 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9194 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9195 (SrcInfo.VT (bitconvert
9196 (SrcInfo.LdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009197 (i8 imm:$src3))), itins.rm>,
9198 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009199 }
Igor Breger2ae0fe32015-08-31 11:14:02 +00009200}
9201
9202//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9203// op(reg_vec2,mem_vec,imm)
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009204// op(reg_vec2,broadcast(eltVt),imm)
9205multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009206 OpndItins itins, X86VectorVTInfo _>:
9207 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
Igor Breger2ae0fe32015-08-31 11:14:02 +00009208
Craig Topper05948fb2016-08-02 05:11:15 +00009209 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00009210 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9211 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9212 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9213 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9214 (OpNode (_.VT _.RC:$src1),
9215 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009216 (i8 imm:$src3)), itins.rm>, EVEX_B,
9217 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009218}
9219
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009220//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9221// op(reg_vec2,mem_scalar,imm)
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009222multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009223 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009224 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009225 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009226 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009227 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9228 (OpNode (_.VT _.RC:$src1),
9229 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009230 (i32 imm:$src3)), itins.rr>,
9231 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009232 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Bregere73ef852016-09-11 12:38:46 +00009233 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
Craig Toppere1cac152016-06-07 07:27:54 +00009234 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9235 (OpNode (_.VT _.RC:$src1),
9236 (_.VT (scalar_to_vector
9237 (_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009238 (i32 imm:$src3)), itins.rm>,
9239 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009240 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009241}
9242
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009243//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9244multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009245 SDNode OpNode, OpndItins itins,
9246 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009247 let ExeDomain = _.ExeDomain in
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009248 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009249 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009250 OpcodeStr, "$src3, {sae}, $src2, $src1",
9251 "$src1, $src2, {sae}, $src3",
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009252 (OpNode (_.VT _.RC:$src1),
9253 (_.VT _.RC:$src2),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009254 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009255 (i32 FROUND_NO_EXC)), itins.rr>,
9256 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009257}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009258
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009259//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009260multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9261 OpndItins itins, X86VectorVTInfo _> {
Craig Toppercac5d692017-02-26 06:45:37 +00009262 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009263 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9264 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009265 OpcodeStr, "$src3, {sae}, $src2, $src1",
9266 "$src1, $src2, {sae}, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009267 (OpNode (_.VT _.RC:$src1),
9268 (_.VT _.RC:$src2),
9269 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009270 (i32 FROUND_NO_EXC)), itins.rr>,
9271 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009272}
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009273
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009274multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009275 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009276 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009277 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009278 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9279 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009280 EVEX_V512;
9281
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009282 }
9283 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009284 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009285 EVEX_V128;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009286 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009287 EVEX_V256;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009288 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009289}
9290
Igor Breger2ae0fe32015-08-31 11:14:02 +00009291multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009292 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9293 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +00009294 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009295 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009296 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9297 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009298 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009299 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009300 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009301 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009302 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9303 }
9304}
9305
Igor Breger00d9f842015-06-08 14:03:17 +00009306multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009307 bits<8> opc, SDNode OpNode, OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009308 Predicate Pred = HasAVX512> {
9309 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009310 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Igor Breger00d9f842015-06-08 14:03:17 +00009311 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009312 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009313 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9314 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Igor Breger00d9f842015-06-08 14:03:17 +00009315 }
9316}
9317
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009318multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009319 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009320 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009321 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009322 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9323 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009324 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009325}
9326
Igor Breger1e58e8a2015-09-02 11:18:55 +00009327multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009328 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009329 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
Igor Breger1e58e8a2015-09-02 11:18:55 +00009330 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009331 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9332 EVEX_CD8<32, CD8VF>;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009333 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009334 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9335 EVEX_CD8<64, CD8VF>, VEX_W;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009336}
9337
Igor Breger1e58e8a2015-09-02 11:18:55 +00009338defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009339 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
Craig Topper0af48f12017-11-13 02:02:58 +00009340 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009341defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009342 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009343 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009344defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009345 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009346 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009347
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009348defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009349 0x50, X86VRange, X86VRangeRnd,
9350 SSE_ALU_F64P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009351 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9352defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009353 0x50, X86VRange, X86VRangeRnd,
9354 SSE_ALU_F32P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009355 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9356
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009357defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9358 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009359 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9360defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009361 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009362 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9363
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009364defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009365 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009366 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9367defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009368 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009369 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009370
Igor Breger1e58e8a2015-09-02 11:18:55 +00009371defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009372 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009373 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9374defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009375 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009376 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9377
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009378let Predicates = [HasAVX512] in {
9379def : Pat<(v16f32 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009380 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009381def : Pat<(v16f32 (fnearbyint VR512:$src)),
9382 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9383def : Pat<(v16f32 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009384 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009385def : Pat<(v16f32 (frint VR512:$src)),
9386 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9387def : Pat<(v16f32 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009388 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009389
9390def : Pat<(v8f64 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009391 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009392def : Pat<(v8f64 (fnearbyint VR512:$src)),
9393 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9394def : Pat<(v8f64 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009395 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009396def : Pat<(v8f64 (frint VR512:$src)),
9397 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9398def : Pat<(v8f64 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009399 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009400}
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009401
Craig Topperac2508252017-11-11 21:44:51 +00009402let Predicates = [HasVLX] in {
9403def : Pat<(v4f32 (ffloor VR128X:$src)),
9404 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9405def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9406 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9407def : Pat<(v4f32 (fceil VR128X:$src)),
9408 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9409def : Pat<(v4f32 (frint VR128X:$src)),
9410 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9411def : Pat<(v4f32 (ftrunc VR128X:$src)),
9412 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9413
9414def : Pat<(v2f64 (ffloor VR128X:$src)),
9415 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9416def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9417 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9418def : Pat<(v2f64 (fceil VR128X:$src)),
9419 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9420def : Pat<(v2f64 (frint VR128X:$src)),
9421 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9422def : Pat<(v2f64 (ftrunc VR128X:$src)),
9423 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9424
9425def : Pat<(v8f32 (ffloor VR256X:$src)),
9426 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9427def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9428 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9429def : Pat<(v8f32 (fceil VR256X:$src)),
9430 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9431def : Pat<(v8f32 (frint VR256X:$src)),
9432 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9433def : Pat<(v8f32 (ftrunc VR256X:$src)),
9434 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9435
9436def : Pat<(v4f64 (ffloor VR256X:$src)),
9437 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9438def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9439 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9440def : Pat<(v4f64 (fceil VR256X:$src)),
9441 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9442def : Pat<(v4f64 (frint VR256X:$src)),
9443 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9444def : Pat<(v4f64 (ftrunc VR256X:$src)),
9445 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9446}
9447
Craig Topper25ceba72018-02-05 06:00:23 +00009448multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
9449 OpndItins itins, X86VectorVTInfo _,
9450 X86VectorVTInfo CastInfo> {
9451 let ExeDomain = _.ExeDomain in {
9452 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9453 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
9454 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9455 (_.VT (bitconvert
9456 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
9457 (i8 imm:$src3))))),
9458 itins.rr>, Sched<[itins.Sched]>;
9459 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9460 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
9461 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9462 (_.VT
9463 (bitconvert
9464 (CastInfo.VT (X86Shuf128 _.RC:$src1,
9465 (bitconvert (_.LdFrag addr:$src2)),
9466 (i8 imm:$src3))))), itins.rm>,
9467 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9468 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9469 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9470 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9471 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9472 (_.VT
9473 (bitconvert
9474 (CastInfo.VT
9475 (X86Shuf128 _.RC:$src1,
9476 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
9477 (i8 imm:$src3))))), itins.rm>, EVEX_B,
9478 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper42a53532017-08-16 23:38:25 +00009479 }
9480}
9481
Craig Topper25ceba72018-02-05 06:00:23 +00009482multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9483 AVX512VLVectorVTInfo _,
9484 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
9485 let Predicates = [HasAVX512] in
9486 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, itins,
9487 _.info512, CastInfo.info512>, EVEX_V512;
9488
9489 let Predicates = [HasAVX512, HasVLX] in
9490 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, itins,
9491 _.info256, CastInfo.info256>, EVEX_V256;
9492}
9493
Simon Pilgrim36be8522017-11-29 18:52:20 +00009494defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009495 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009496defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009497 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009498defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009499 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009500defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009501 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger00d9f842015-06-08 14:03:17 +00009502
Craig Topperb561e662017-01-19 02:34:29 +00009503let Predicates = [HasAVX512] in {
9504// Provide fallback in case the load node that is used in the broadcast
9505// patterns above is used by additional users, which prevents the pattern
9506// selection.
9507def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9508 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9509 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9510 0)>;
9511def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9512 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9513 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9514 0)>;
9515
9516def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9517 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9518 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9519 0)>;
9520def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9521 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9523 0)>;
9524
9525def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9526 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9527 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9528 0)>;
9529
9530def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9531 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9532 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9533 0)>;
9534}
9535
Simon Pilgrim36be8522017-11-29 18:52:20 +00009536multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9537 AVX512VLVectorVTInfo VTInfo_I> {
9538 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
Igor Breger00d9f842015-06-08 14:03:17 +00009539 AVX512AIi8Base, EVEX_4V;
Igor Breger00d9f842015-06-08 14:03:17 +00009540}
9541
Simon Pilgrim36be8522017-11-29 18:52:20 +00009542defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009543 EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009544defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009545 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009546
Simon Pilgrim36be8522017-11-29 18:52:20 +00009547defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009548 avx512vl_i8_info, avx512vl_i8_info>,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009549 EVEX_CD8<8, CD8VF>;
9550
Craig Topper333897e2017-11-03 06:48:02 +00009551// Fragments to help convert valignq into masked valignd. Or valignq/valignd
9552// into vpalignr.
9553def ValignqImm32XForm : SDNodeXForm<imm, [{
9554 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9555}]>;
9556def ValignqImm8XForm : SDNodeXForm<imm, [{
9557 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9558}]>;
9559def ValigndImm8XForm : SDNodeXForm<imm, [{
9560 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9561}]>;
9562
9563multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9564 X86VectorVTInfo From, X86VectorVTInfo To,
9565 SDNodeXForm ImmXForm> {
9566 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9567 (bitconvert
9568 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9569 imm:$src3))),
9570 To.RC:$src0)),
9571 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9572 To.RC:$src1, To.RC:$src2,
9573 (ImmXForm imm:$src3))>;
9574
9575 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9576 (bitconvert
9577 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9578 imm:$src3))),
9579 To.ImmAllZerosV)),
9580 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9581 To.RC:$src1, To.RC:$src2,
9582 (ImmXForm imm:$src3))>;
9583
9584 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9585 (bitconvert
9586 (From.VT (OpNode From.RC:$src1,
9587 (bitconvert (To.LdFrag addr:$src2)),
9588 imm:$src3))),
9589 To.RC:$src0)),
9590 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9591 To.RC:$src1, addr:$src2,
9592 (ImmXForm imm:$src3))>;
9593
9594 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9595 (bitconvert
9596 (From.VT (OpNode From.RC:$src1,
9597 (bitconvert (To.LdFrag addr:$src2)),
9598 imm:$src3))),
9599 To.ImmAllZerosV)),
9600 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9601 To.RC:$src1, addr:$src2,
9602 (ImmXForm imm:$src3))>;
9603}
9604
9605multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9606 X86VectorVTInfo From,
9607 X86VectorVTInfo To,
9608 SDNodeXForm ImmXForm> :
9609 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9610 def : Pat<(From.VT (OpNode From.RC:$src1,
9611 (bitconvert (To.VT (X86VBroadcast
9612 (To.ScalarLdFrag addr:$src2)))),
9613 imm:$src3)),
9614 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9615 (ImmXForm imm:$src3))>;
9616
9617 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9618 (bitconvert
9619 (From.VT (OpNode From.RC:$src1,
9620 (bitconvert
9621 (To.VT (X86VBroadcast
9622 (To.ScalarLdFrag addr:$src2)))),
9623 imm:$src3))),
9624 To.RC:$src0)),
9625 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9626 To.RC:$src1, addr:$src2,
9627 (ImmXForm imm:$src3))>;
9628
9629 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9630 (bitconvert
9631 (From.VT (OpNode From.RC:$src1,
9632 (bitconvert
9633 (To.VT (X86VBroadcast
9634 (To.ScalarLdFrag addr:$src2)))),
9635 imm:$src3))),
9636 To.ImmAllZerosV)),
9637 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9638 To.RC:$src1, addr:$src2,
9639 (ImmXForm imm:$src3))>;
9640}
9641
9642let Predicates = [HasAVX512] in {
9643 // For 512-bit we lower to the widest element type we can. So we only need
9644 // to handle converting valignq to valignd.
9645 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9646 v16i32_info, ValignqImm32XForm>;
9647}
9648
9649let Predicates = [HasVLX] in {
9650 // For 128-bit we lower to the widest element type we can. So we only need
9651 // to handle converting valignq to valignd.
9652 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9653 v4i32x_info, ValignqImm32XForm>;
9654 // For 256-bit we lower to the widest element type we can. So we only need
9655 // to handle converting valignq to valignd.
9656 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9657 v8i32x_info, ValignqImm32XForm>;
9658}
9659
9660let Predicates = [HasVLX, HasBWI] in {
9661 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9662 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9663 v16i8x_info, ValignqImm8XForm>;
9664 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9665 v16i8x_info, ValigndImm8XForm>;
9666}
9667
Simon Pilgrim36be8522017-11-29 18:52:20 +00009668defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9669 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9670 EVEX_CD8<8, CD8VF>;
Igor Bregerf3ded812015-08-31 13:09:30 +00009671
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009672multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009673 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009674 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009675 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger24cab0f2015-11-16 07:22:00 +00009676 (ins _.RC:$src1), OpcodeStr,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009677 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009678 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9679 Sched<[itins.Sched]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009680
Craig Toppere1cac152016-06-07 07:27:54 +00009681 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9682 (ins _.MemOp:$src1), OpcodeStr,
9683 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009684 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9685 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9686 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009687 }
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009688}
9689
9690multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009691 OpndItins itins, X86VectorVTInfo _> :
9692 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009693 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9694 (ins _.ScalarMemOp:$src1), OpcodeStr,
9695 "${src1}"##_.BroadcastStr,
9696 "${src1}"##_.BroadcastStr,
9697 (_.VT (OpNode (X86VBroadcast
Simon Pilgrim756348c2017-11-29 13:49:51 +00009698 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9699 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9700 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009701}
9702
9703multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009704 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9705 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009706 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009707 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9708 EVEX_V512;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009709
9710 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009711 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009712 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009713 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009714 EVEX_V128;
9715 }
9716}
9717
9718multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009719 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9720 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009721 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009722 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009723 EVEX_V512;
9724
9725 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009726 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009727 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009728 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009729 EVEX_V128;
9730 }
9731}
9732
9733multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009734 SDNode OpNode, OpndItins itins, Predicate prd> {
9735 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9736 avx512vl_i64_info, prd>, VEX_W;
9737 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9738 avx512vl_i32_info, prd>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009739}
9740
9741multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009742 SDNode OpNode, OpndItins itins, Predicate prd> {
9743 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9744 avx512vl_i16_info, prd>, VEX_WIG;
9745 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9746 avx512vl_i8_info, prd>, VEX_WIG;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009747}
9748
9749multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9750 bits<8> opc_d, bits<8> opc_q,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009751 string OpcodeStr, SDNode OpNode,
9752 OpndItins itins> {
9753 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009754 HasAVX512>,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009755 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009756 HasBWI>;
9757}
9758
Simon Pilgrim756348c2017-11-29 13:49:51 +00009759defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
Igor Bregerf2460112015-07-26 14:41:44 +00009760
Simon Pilgrimfea153f2017-05-06 19:11:59 +00009761// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9762let Predicates = [HasAVX512, NoVLX] in {
9763 def : Pat<(v4i64 (abs VR256X:$src)),
9764 (EXTRACT_SUBREG
9765 (VPABSQZrr
9766 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9767 sub_ymm)>;
9768 def : Pat<(v2i64 (abs VR128X:$src)),
9769 (EXTRACT_SUBREG
9770 (VPABSQZrr
9771 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9772 sub_xmm)>;
9773}
9774
Craig Topperc0896052017-12-16 02:40:28 +00009775// Use 512bit version to implement 128/256 bit.
9776multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9777 AVX512VLVectorVTInfo _, Predicate prd> {
9778 let Predicates = [prd, NoVLX] in {
9779 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9780 (EXTRACT_SUBREG
9781 (!cast<Instruction>(InstrStr # "Zrr")
9782 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9783 _.info256.RC:$src1,
9784 _.info256.SubRegIdx)),
9785 _.info256.SubRegIdx)>;
9786
9787 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9788 (EXTRACT_SUBREG
9789 (!cast<Instruction>(InstrStr # "Zrr")
9790 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9791 _.info128.RC:$src1,
9792 _.info128.SubRegIdx)),
9793 _.info128.SubRegIdx)>;
9794 }
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009795}
9796
Simon Pilgrim756348c2017-11-29 13:49:51 +00009797// FIXME: Is there a better scheduler itinerary for VPLZCNT?
Craig Topperc0896052017-12-16 02:40:28 +00009798defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9799 SSE_INTALU_ITINS_P, HasCDI>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009800
9801// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9802defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9803 SSE_INTALU_ITINS_P, HasCDI>;
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009804
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009805// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topperc0896052017-12-16 02:40:28 +00009806defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9807defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009808
Igor Breger24cab0f2015-11-16 07:22:00 +00009809//===---------------------------------------------------------------------===//
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009810// Counts number of ones - VPOPCNTD and VPOPCNTQ
9811//===---------------------------------------------------------------------===//
9812
Simon Pilgrim756348c2017-11-29 13:49:51 +00009813// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
Craig Topperc0896052017-12-16 02:40:28 +00009814defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9815 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009816
Craig Topperc0896052017-12-16 02:40:28 +00009817defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9818defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009819
9820//===---------------------------------------------------------------------===//
Igor Breger24cab0f2015-11-16 07:22:00 +00009821// Replicate Single FP - MOVSHDUP and MOVSLDUP
9822//===---------------------------------------------------------------------===//
Simon Pilgrim756348c2017-11-29 13:49:51 +00009823multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9824 OpndItins itins> {
9825 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9826 avx512vl_f32_info, HasAVX512>, XS;
Igor Breger24cab0f2015-11-16 07:22:00 +00009827}
9828
Simon Pilgrim756348c2017-11-29 13:49:51 +00009829defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9830defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009831
9832//===----------------------------------------------------------------------===//
9833// AVX-512 - MOVDDUP
9834//===----------------------------------------------------------------------===//
9835
9836multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009837 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009838 let ExeDomain = _.ExeDomain in {
Igor Breger1f782962015-11-19 08:26:56 +00009839 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9840 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009841 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9842 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009843 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9844 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9845 (_.VT (OpNode (_.VT (scalar_to_vector
Simon Pilgrim756348c2017-11-29 13:49:51 +00009846 (_.ScalarLdFrag addr:$src))))),
9847 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9848 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009849 }
Igor Breger1f782962015-11-19 08:26:56 +00009850}
9851
9852multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009853 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Igor Breger1f782962015-11-19 08:26:56 +00009854
Simon Pilgrim756348c2017-11-29 13:49:51 +00009855 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
Igor Breger1f782962015-11-19 08:26:56 +00009856
9857 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009858 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
Igor Breger1f782962015-11-19 08:26:56 +00009859 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009860 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
Craig Topperf6c69562017-10-13 21:56:48 +00009861 EVEX_V128;
Igor Breger1f782962015-11-19 08:26:56 +00009862 }
9863}
9864
Simon Pilgrim756348c2017-11-29 13:49:51 +00009865multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9866 OpndItins itins> {
9867 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
Igor Breger1f782962015-11-19 08:26:56 +00009868 avx512vl_f64_info>, XD, VEX_W;
Igor Breger1f782962015-11-19 08:26:56 +00009869}
9870
Simon Pilgrim756348c2017-11-29 13:49:51 +00009871defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009872
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009873let Predicates = [HasVLX] in {
Igor Breger1f782962015-11-19 08:26:56 +00009874def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009875 (VMOVDDUPZ128rm addr:$src)>;
9876def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9877 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topperf6c69562017-10-13 21:56:48 +00009878def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9879 (VMOVDDUPZ128rm addr:$src)>;
Craig Topperda84ff32017-01-07 22:20:23 +00009880
9881def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9882 (v2f64 VR128X:$src0)),
9883 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9884 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9885def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9886 (bitconvert (v4i32 immAllZerosV))),
9887 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9888
9889def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9890 (v2f64 VR128X:$src0)),
9891 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9892def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9893 (bitconvert (v4i32 immAllZerosV))),
9894 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topperf6c69562017-10-13 21:56:48 +00009895
9896def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9897 (v2f64 VR128X:$src0)),
9898 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9899def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9900 (bitconvert (v4i32 immAllZerosV))),
9901 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009902}
Igor Breger1f782962015-11-19 08:26:56 +00009903
Igor Bregerf2460112015-07-26 14:41:44 +00009904//===----------------------------------------------------------------------===//
9905// AVX-512 - Unpack Instructions
9906//===----------------------------------------------------------------------===//
Craig Topper9433f972016-08-02 06:16:53 +00009907defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9908 SSE_ALU_ITINS_S>;
9909defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9910 SSE_ALU_ITINS_S>;
Igor Bregerf2460112015-07-26 14:41:44 +00009911
9912defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9913 SSE_INTALU_ITINS_P, HasBWI>;
9914defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9915 SSE_INTALU_ITINS_P, HasBWI>;
9916defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9917 SSE_INTALU_ITINS_P, HasBWI>;
9918defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9919 SSE_INTALU_ITINS_P, HasBWI>;
9920
9921defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9922 SSE_INTALU_ITINS_P, HasAVX512>;
9923defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9924 SSE_INTALU_ITINS_P, HasAVX512>;
9925defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9926 SSE_INTALU_ITINS_P, HasAVX512>;
9927defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9928 SSE_INTALU_ITINS_P, HasAVX512>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009929
9930//===----------------------------------------------------------------------===//
9931// AVX-512 - Extract & Insert Integer Instructions
9932//===----------------------------------------------------------------------===//
9933
9934multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9935 X86VectorVTInfo _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009936 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9937 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9938 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim1dcb9132017-10-23 16:00:57 +00009939 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9940 addr:$dst)]>,
Craig Topper05af43f2018-01-24 17:58:57 +00009941 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009942}
9943
9944multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9945 let Predicates = [HasBWI] in {
9946 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9947 (ins _.RC:$src1, u8imm:$src2),
9948 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9949 [(set GR32orGR64:$dst,
9950 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009951 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009952
9953 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9954 }
9955}
9956
9957multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9958 let Predicates = [HasBWI] in {
9959 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9960 (ins _.RC:$src1, u8imm:$src2),
9961 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9962 [(set GR32orGR64:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009963 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9964 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009965
Craig Topper99f6b622016-05-01 01:03:56 +00009966 let hasSideEffects = 0 in
Igor Breger55747302015-11-18 08:46:16 +00009967 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9968 (ins _.RC:$src1, u8imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00009969 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9970 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9971 Sched<[WriteShuffle]>;
Igor Breger55747302015-11-18 08:46:16 +00009972
Igor Bregerdefab3c2015-10-08 12:55:01 +00009973 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9974 }
9975}
9976
9977multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9978 RegisterClass GRC> {
9979 let Predicates = [HasDQI] in {
9980 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9981 (ins _.RC:$src1, u8imm:$src2),
9982 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9983 [(set GRC:$dst,
9984 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009985 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009986
Craig Toppere1cac152016-06-07 07:27:54 +00009987 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9988 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9989 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9990 [(store (extractelt (_.VT _.RC:$src1),
9991 imm:$src2),addr:$dst)]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009992 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
Craig Topper05af43f2018-01-24 17:58:57 +00009993 Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009994 }
9995}
9996
Craig Toppera33846a2017-10-22 06:18:23 +00009997defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9998defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009999defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10000defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10001
10002multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10003 X86VectorVTInfo _, PatFrag LdFrag> {
10004 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10005 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10006 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10007 [(set _.RC:$dst,
10008 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010009 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010010}
10011
10012multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10013 X86VectorVTInfo _, PatFrag LdFrag> {
10014 let Predicates = [HasBWI] in {
10015 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10016 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10017 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10018 [(set _.RC:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010019 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10020 Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010021
10022 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10023 }
10024}
10025
10026multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10027 X86VectorVTInfo _, RegisterClass GRC> {
10028 let Predicates = [HasDQI] in {
10029 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10030 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10031 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10032 [(set _.RC:$dst,
10033 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010034 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010035
10036 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10037 _.ScalarLdFrag>, TAPD;
10038 }
10039}
10040
10041defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
Craig Toppera33846a2017-10-22 06:18:23 +000010042 extloadi8>, TAPD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010043defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
Craig Toppera33846a2017-10-22 06:18:23 +000010044 extloadi16>, PD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010045defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10046defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010047
Igor Bregera6297c72015-09-02 10:50:58 +000010048//===----------------------------------------------------------------------===//
10049// VSHUFPS - VSHUFPD Operations
10050//===----------------------------------------------------------------------===//
Simon Pilgrim36be8522017-11-29 18:52:20 +000010051
Igor Bregera6297c72015-09-02 10:50:58 +000010052multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10053 AVX512VLVectorVTInfo VTInfo_FP>{
Simon Pilgrim36be8522017-11-29 18:52:20 +000010054 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10055 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10056 AVX512AIi8Base, EVEX_4V;
Igor Bregera6297c72015-09-02 10:50:58 +000010057}
10058
10059defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10060defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010061
Asaf Badouhd2c35992015-09-02 14:21:54 +000010062//===----------------------------------------------------------------------===//
10063// AVX-512 - Byte shift Left/Right
10064//===----------------------------------------------------------------------===//
10065
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010066let Sched = WriteVecShift in
10067def AVX512_BYTESHIFT : OpndItins<
10068 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
10069>;
10070
Asaf Badouhd2c35992015-09-02 14:21:54 +000010071multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010072 Format MRMm, string OpcodeStr,
10073 OpndItins itins, X86VectorVTInfo _>{
Asaf Badouhd2c35992015-09-02 14:21:54 +000010074 def rr : AVX512<opc, MRMr,
10075 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10076 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010077 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
10078 itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010079 def rm : AVX512<opc, MRMm,
10080 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10081 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10082 [(set _.RC:$dst,(_.VT (OpNode
Simon Pilgrim255fdd02016-06-11 12:54:37 +000010083 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010084 (i8 imm:$src2))))], itins.rm>,
10085 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010086}
10087
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010088multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010089 Format MRMm, string OpcodeStr,
10090 OpndItins itins, Predicate prd>{
Asaf Badouhd2c35992015-09-02 14:21:54 +000010091 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010092 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
10093 OpcodeStr, itins, v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010094 let Predicates = [prd, HasVLX] in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010095 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010096 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010097 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010098 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010099 }
10100}
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010101defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010102 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
10103 EVEX_4V, VEX_WIG;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010104defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010105 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
10106 EVEX_4V, VEX_WIG;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010107
10108
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010109multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010110 string OpcodeStr, OpndItins itins,
10111 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010112 def rr : AVX512BI<opc, MRMSrcReg,
Cong Houdb6220f2015-11-24 19:51:26 +000010113 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
Asaf Badouhd2c35992015-09-02 14:21:54 +000010114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Cong Houdb6220f2015-11-24 19:51:26 +000010115 [(set _dst.RC:$dst,(_dst.VT
10116 (OpNode (_src.VT _src.RC:$src1),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010117 (_src.VT _src.RC:$src2))))], itins.rr>,
10118 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010119 def rm : AVX512BI<opc, MRMSrcMem,
10120 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10121 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10122 [(set _dst.RC:$dst,(_dst.VT
10123 (OpNode (_src.VT _src.RC:$src1),
10124 (_src.VT (bitconvert
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010125 (_src.LdFrag addr:$src2))))))], itins.rm>,
10126 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010127}
10128
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010129multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010130 string OpcodeStr, OpndItins itins,
10131 Predicate prd> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010132 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010133 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
10134 v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010135 let Predicates = [prd, HasVLX] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010136 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010137 v32i8x_info>, EVEX_V256;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010138 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010139 v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010140 }
10141}
10142
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010143defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010144 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010145
Craig Topper4e794c72017-02-19 19:36:58 +000010146// Transforms to swizzle an immediate to enable better matching when
10147// memory operand isn't in the right place.
10148def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
10149 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
10150 uint8_t Imm = N->getZExtValue();
10151 // Swap bits 1/4 and 3/6.
10152 uint8_t NewImm = Imm & 0xa5;
10153 if (Imm & 0x02) NewImm |= 0x10;
10154 if (Imm & 0x10) NewImm |= 0x02;
10155 if (Imm & 0x08) NewImm |= 0x40;
10156 if (Imm & 0x40) NewImm |= 0x08;
10157 return getI8Imm(NewImm, SDLoc(N));
10158}]>;
10159def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
10160 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10161 uint8_t Imm = N->getZExtValue();
10162 // Swap bits 2/4 and 3/5.
10163 uint8_t NewImm = Imm & 0xc3;
Craig Toppera5fa2e42017-02-20 07:00:34 +000010164 if (Imm & 0x04) NewImm |= 0x10;
10165 if (Imm & 0x10) NewImm |= 0x04;
Craig Topper4e794c72017-02-19 19:36:58 +000010166 if (Imm & 0x08) NewImm |= 0x20;
10167 if (Imm & 0x20) NewImm |= 0x08;
10168 return getI8Imm(NewImm, SDLoc(N));
10169}]>;
Craig Topper48905772017-02-19 21:32:15 +000010170def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
10171 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10172 uint8_t Imm = N->getZExtValue();
10173 // Swap bits 1/2 and 5/6.
10174 uint8_t NewImm = Imm & 0x99;
10175 if (Imm & 0x02) NewImm |= 0x04;
10176 if (Imm & 0x04) NewImm |= 0x02;
10177 if (Imm & 0x20) NewImm |= 0x40;
10178 if (Imm & 0x40) NewImm |= 0x20;
10179 return getI8Imm(NewImm, SDLoc(N));
10180}]>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010181def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
10182 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
10183 uint8_t Imm = N->getZExtValue();
10184 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
10185 uint8_t NewImm = Imm & 0x81;
10186 if (Imm & 0x02) NewImm |= 0x04;
10187 if (Imm & 0x04) NewImm |= 0x10;
10188 if (Imm & 0x08) NewImm |= 0x40;
10189 if (Imm & 0x10) NewImm |= 0x02;
10190 if (Imm & 0x20) NewImm |= 0x08;
10191 if (Imm & 0x40) NewImm |= 0x20;
10192 return getI8Imm(NewImm, SDLoc(N));
10193}]>;
10194def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
10195 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
10196 uint8_t Imm = N->getZExtValue();
10197 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
10198 uint8_t NewImm = Imm & 0x81;
10199 if (Imm & 0x02) NewImm |= 0x10;
10200 if (Imm & 0x04) NewImm |= 0x02;
10201 if (Imm & 0x08) NewImm |= 0x20;
10202 if (Imm & 0x10) NewImm |= 0x04;
10203 if (Imm & 0x20) NewImm |= 0x40;
10204 if (Imm & 0x40) NewImm |= 0x08;
10205 return getI8Imm(NewImm, SDLoc(N));
10206}]>;
Craig Topper4e794c72017-02-19 19:36:58 +000010207
Igor Bregerb4bb1902015-10-15 12:33:24 +000010208multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010209 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010210 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010211 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10212 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
Igor Breger252c2d92016-02-22 12:37:41 +000010213 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
Igor Bregerb4bb1902015-10-15 12:33:24 +000010214 (OpNode (_.VT _.RC:$src1),
10215 (_.VT _.RC:$src2),
10216 (_.VT _.RC:$src3),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010217 (i8 imm:$src4)), itins.rr, 1, 1>,
10218 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010219 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10220 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10221 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10222 (OpNode (_.VT _.RC:$src1),
10223 (_.VT _.RC:$src2),
10224 (_.VT (bitconvert (_.LdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010225 (i8 imm:$src4)), itins.rm, 1, 0>,
10226 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10227 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010228 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10229 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10230 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10231 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10232 (OpNode (_.VT _.RC:$src1),
10233 (_.VT _.RC:$src2),
10234 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010235 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10236 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10237 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010238 }// Constraints = "$src1 = $dst"
Craig Topper4e794c72017-02-19 19:36:58 +000010239
10240 // Additional patterns for matching passthru operand in other positions.
Craig Topper4e794c72017-02-19 19:36:58 +000010241 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10242 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10243 _.RC:$src1)),
10244 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10245 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10246 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10247 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10248 _.RC:$src1)),
10249 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10250 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010251
10252 // Additional patterns for matching loads in other positions.
10253 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10254 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10255 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10256 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10257 def : Pat<(_.VT (OpNode _.RC:$src1,
10258 (bitconvert (_.LdFrag addr:$src3)),
10259 _.RC:$src2, (i8 imm:$src4))),
10260 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10261 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10262
10263 // Additional patterns for matching zero masking with loads in other
10264 // positions.
Craig Topper48905772017-02-19 21:32:15 +000010265 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10266 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10267 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10268 _.ImmAllZerosV)),
10269 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10270 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10271 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10272 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10273 _.RC:$src2, (i8 imm:$src4)),
10274 _.ImmAllZerosV)),
10275 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10276 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010277
10278 // Additional patterns for matching masked loads with different
10279 // operand orders.
Craig Topper48905772017-02-19 21:32:15 +000010280 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10281 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10282 _.RC:$src2, (i8 imm:$src4)),
10283 _.RC:$src1)),
10284 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10285 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010286 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10287 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10288 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10289 _.RC:$src1)),
10290 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10291 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10292 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10293 (OpNode _.RC:$src2, _.RC:$src1,
10294 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10295 _.RC:$src1)),
10296 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10297 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10298 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10299 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10300 _.RC:$src1, (i8 imm:$src4)),
10301 _.RC:$src1)),
10302 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10303 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10304 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10305 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10306 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10307 _.RC:$src1)),
10308 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10309 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Craig Topper5b4e36a2017-02-20 02:47:42 +000010310
10311 // Additional patterns for matching broadcasts in other positions.
10312 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10313 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10314 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10315 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10316 def : Pat<(_.VT (OpNode _.RC:$src1,
10317 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10318 _.RC:$src2, (i8 imm:$src4))),
10319 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10320 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10321
10322 // Additional patterns for matching zero masking with broadcasts in other
10323 // positions.
10324 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10325 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10326 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10327 _.ImmAllZerosV)),
10328 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10329 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10330 (VPTERNLOG321_imm8 imm:$src4))>;
10331 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10332 (OpNode _.RC:$src1,
10333 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10334 _.RC:$src2, (i8 imm:$src4)),
10335 _.ImmAllZerosV)),
10336 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10337 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10338 (VPTERNLOG132_imm8 imm:$src4))>;
10339
10340 // Additional patterns for matching masked broadcasts with different
10341 // operand orders.
10342 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10343 (OpNode _.RC:$src1,
10344 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10345 _.RC:$src2, (i8 imm:$src4)),
10346 _.RC:$src1)),
10347 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10348 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper2012dda2017-02-20 17:44:09 +000010349 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10350 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10351 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10352 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010353 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010354 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10355 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10356 (OpNode _.RC:$src2, _.RC:$src1,
10357 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10358 (i8 imm:$src4)), _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010359 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010360 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10361 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10362 (OpNode _.RC:$src2,
10363 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10364 _.RC:$src1, (i8 imm:$src4)),
10365 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010366 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010367 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10368 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10369 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10370 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10371 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010372 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010373 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010374}
10375
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010376multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10377 AVX512VLVectorVTInfo _> {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010378 let Predicates = [HasAVX512] in
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010379 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010380 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010381 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10382 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010383 }
10384}
10385
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010386defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10387 avx512vl_i32_info>;
10388defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10389 avx512vl_i64_info>, VEX_W;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010390
Craig Topper8a444ee2018-01-26 22:17:40 +000010391
10392// Patterns to implement vnot using vpternlog instead of creating all ones
10393// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
10394// so that the result is only dependent on src0. But we use the same source
10395// for all operands to prevent a false dependency.
10396// TODO: We should maybe have a more generalized algorithm for folding to
10397// vpternlog.
10398let Predicates = [HasAVX512] in {
10399 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
10400 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
10401}
10402
10403let Predicates = [HasAVX512, NoVLX] in {
10404 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10405 (EXTRACT_SUBREG
10406 (VPTERNLOGQZrri
10407 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10408 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10409 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10410 (i8 15)), sub_xmm)>;
10411 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10412 (EXTRACT_SUBREG
10413 (VPTERNLOGQZrri
10414 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10415 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10416 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10417 (i8 15)), sub_ymm)>;
10418}
10419
10420let Predicates = [HasVLX] in {
10421 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10422 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
10423 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10424 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
10425}
10426
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010427//===----------------------------------------------------------------------===//
10428// AVX-512 - FixupImm
10429//===----------------------------------------------------------------------===//
10430
10431multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010432 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010433 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010434 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10435 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10436 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10437 (OpNode (_.VT _.RC:$src1),
10438 (_.VT _.RC:$src2),
10439 (_.IntVT _.RC:$src3),
10440 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010441 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010442 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10443 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10444 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10445 (OpNode (_.VT _.RC:$src1),
10446 (_.VT _.RC:$src2),
10447 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10448 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010449 (i32 FROUND_CURRENT)), itins.rm>,
10450 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010451 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10452 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10453 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10454 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10455 (OpNode (_.VT _.RC:$src1),
10456 (_.VT _.RC:$src2),
10457 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10458 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010459 (i32 FROUND_CURRENT)), itins.rm>,
10460 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010461 } // Constraints = "$src1 = $dst"
10462}
10463
10464multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010465 SDNode OpNode, OpndItins itins,
10466 X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010467let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010468 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10469 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010470 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010471 "$src2, $src3, {sae}, $src4",
10472 (OpNode (_.VT _.RC:$src1),
10473 (_.VT _.RC:$src2),
10474 (_.IntVT _.RC:$src3),
10475 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010476 (i32 FROUND_NO_EXC)), itins.rr>,
10477 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010478 }
10479}
10480
10481multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010482 OpndItins itins, X86VectorVTInfo _,
10483 X86VectorVTInfo _src3VT> {
Craig Topper05948fb2016-08-02 05:11:15 +000010484 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10485 ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010486 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10487 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10488 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10489 (OpNode (_.VT _.RC:$src1),
10490 (_.VT _.RC:$src2),
10491 (_src3VT.VT _src3VT.RC:$src3),
10492 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010493 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010494 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10495 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10496 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10497 "$src2, $src3, {sae}, $src4",
10498 (OpNode (_.VT _.RC:$src1),
10499 (_.VT _.RC:$src2),
10500 (_src3VT.VT _src3VT.RC:$src3),
10501 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010502 (i32 FROUND_NO_EXC)), itins.rm>,
10503 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010504 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10505 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10506 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10507 (OpNode (_.VT _.RC:$src1),
10508 (_.VT _.RC:$src2),
10509 (_src3VT.VT (scalar_to_vector
10510 (_src3VT.ScalarLdFrag addr:$src3))),
10511 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010512 (i32 FROUND_CURRENT)), itins.rm>,
10513 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010514 }
10515}
10516
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010517multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010518 let Predicates = [HasAVX512] in
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010519 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10520 _Vec.info512>,
10521 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10522 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010523 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010524 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10525 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10526 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10527 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010528 }
10529}
10530
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010531defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010532 SSE_ALU_F32S, f32x_info, v4i32x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010533 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010534defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010535 SSE_ALU_F64S, f64x_info, v2i64x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010536 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010537defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010538 EVEX_CD8<32, CD8VF>;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010539defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010540 EVEX_CD8<64, CD8VF>, VEX_W;
Craig Topper5625d242016-07-29 06:06:00 +000010541
10542
10543
10544// Patterns used to select SSE scalar fp arithmetic instructions from
10545// either:
10546//
10547// (1) a scalar fp operation followed by a blend
10548//
10549// The effect is that the backend no longer emits unnecessary vector
10550// insert instructions immediately after SSE scalar fp instructions
10551// like addss or mulss.
10552//
10553// For example, given the following code:
10554// __m128 foo(__m128 A, __m128 B) {
10555// A[0] += B[0];
10556// return A;
10557// }
10558//
10559// Previously we generated:
10560// addss %xmm0, %xmm1
10561// movss %xmm1, %xmm0
10562//
10563// We now generate:
10564// addss %xmm1, %xmm0
10565//
10566// (2) a vector packed single/double fp operation followed by a vector insert
10567//
10568// The effect is that the backend converts the packed fp instruction
10569// followed by a vector insert into a single SSE scalar fp instruction.
10570//
10571// For example, given the following code:
10572// __m128 foo(__m128 A, __m128 B) {
10573// __m128 C = A + B;
10574// return (__m128) {c[0], a[1], a[2], a[3]};
10575// }
10576//
10577// Previously we generated:
10578// addps %xmm0, %xmm1
10579// movss %xmm1, %xmm0
10580//
10581// We now generate:
10582// addss %xmm1, %xmm0
10583
10584// TODO: Some canonicalization in lowering would simplify the number of
10585// patterns we have to try to match.
10586multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10587 let Predicates = [HasAVX512] in {
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010588 // extracted scalar math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010589 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10590 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10591 FR32X:$src))))),
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010592 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010593 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010594
Craig Topper5625d242016-07-29 06:06:00 +000010595 // vector math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010596 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10597 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010598 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10599
Craig Topper83f21452016-12-27 01:56:24 +000010600 // extracted masked scalar math op with insert via movss
10601 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10602 (scalar_to_vector
10603 (X86selects VK1WM:$mask,
10604 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10605 FR32X:$src2),
10606 FR32X:$src0))),
10607 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10608 VK1WM:$mask, v4f32:$src1,
10609 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010610 }
10611}
10612
10613defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10614defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10615defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10616defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10617
10618multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10619 let Predicates = [HasAVX512] in {
10620 // extracted scalar math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010621 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10622 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10623 FR64X:$src))))),
Craig Topper5625d242016-07-29 06:06:00 +000010624 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010625 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010626
Craig Topper5625d242016-07-29 06:06:00 +000010627 // vector math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010628 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10629 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010630 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10631
Craig Topper83f21452016-12-27 01:56:24 +000010632 // extracted masked scalar math op with insert via movss
10633 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10634 (scalar_to_vector
10635 (X86selects VK1WM:$mask,
10636 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10637 FR64X:$src2),
10638 FR64X:$src0))),
10639 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10640 VK1WM:$mask, v2f64:$src1,
10641 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010642 }
10643}
10644
10645defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10646defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10647defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10648defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010649
10650//===----------------------------------------------------------------------===//
10651// AES instructions
10652//===----------------------------------------------------------------------===//
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010653
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010654multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10655 let Predicates = [HasVLX, HasVAES] in {
10656 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10657 !cast<Intrinsic>(IntPrefix),
10658 loadv2i64, 0, VR128X, i128mem>,
10659 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10660 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10661 !cast<Intrinsic>(IntPrefix##"_256"),
10662 loadv4i64, 0, VR256X, i256mem>,
10663 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10664 }
10665 let Predicates = [HasAVX512, HasVAES] in
10666 defm Z : AESI_binop_rm_int<Op, OpStr,
10667 !cast<Intrinsic>(IntPrefix##"_512"),
10668 loadv8i64, 0, VR512, i512mem>,
10669 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10670}
10671
10672defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10673defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10674defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10675defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10676
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010677//===----------------------------------------------------------------------===//
10678// PCLMUL instructions - Carry less multiplication
10679//===----------------------------------------------------------------------===//
10680
10681let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10682defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10683 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10684
10685let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10686defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10687 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10688
10689defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10690 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10691 EVEX_CD8<64, CD8VF>, VEX_WIG;
10692}
10693
10694// Aliases
10695defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10696defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10697defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10698
Coby Tayree71e37cc2017-11-21 09:48:44 +000010699//===----------------------------------------------------------------------===//
10700// VBMI2
10701//===----------------------------------------------------------------------===//
10702
10703multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010704 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010705 let Constraints = "$src1 = $dst",
10706 ExeDomain = VTI.ExeDomain in {
10707 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10708 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10709 "$src3, $src2", "$src2, $src3",
Simon Pilgrim36be8522017-11-29 18:52:20 +000010710 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10711 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010712 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10713 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10714 "$src3, $src2", "$src2, $src3",
10715 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010716 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10717 itins.rm>, AVX512FMA3Base,
10718 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010719 }
10720}
10721
10722multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010723 OpndItins itins, X86VectorVTInfo VTI>
10724 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010725 let Constraints = "$src1 = $dst",
10726 ExeDomain = VTI.ExeDomain in
10727 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10728 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10729 "${src3}"##VTI.BroadcastStr##", $src2",
10730 "$src2, ${src3}"##VTI.BroadcastStr,
10731 (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010732 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10733 itins.rm>, AVX512FMA3Base, EVEX_B,
10734 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010735}
10736
10737multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010738 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010739 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010740 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010741 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010742 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10743 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010744 }
10745}
10746
10747multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010748 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010749 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010750 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010751 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010752 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10753 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010754 }
10755}
10756multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010757 SDNode OpNode, OpndItins itins> {
10758 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010759 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010760 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010761 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010762 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010763 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10764}
10765
10766multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010767 SDNode OpNode, OpndItins itins> {
10768 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10769 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10770 VEX_W, EVEX_CD8<16, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010771 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010772 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010773 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010774 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010775}
10776
10777// Concat & Shift
Simon Pilgrim36be8522017-11-29 18:52:20 +000010778defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10779defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10780defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10781defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10782
Coby Tayree71e37cc2017-11-21 09:48:44 +000010783// Compress
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010784defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10785 avx512vl_i8_info, HasVBMI2>, EVEX;
10786defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10787 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010788// Expand
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010789defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10790 avx512vl_i8_info, HasVBMI2>, EVEX;
10791defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10792 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010793
Coby Tayree3880f2a2017-11-21 10:04:28 +000010794//===----------------------------------------------------------------------===//
10795// VNNI
10796//===----------------------------------------------------------------------===//
10797
10798let Constraints = "$src1 = $dst" in
10799multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010800 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010801 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10802 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10803 "$src3, $src2", "$src2, $src3",
10804 (VTI.VT (OpNode VTI.RC:$src1,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010805 VTI.RC:$src2, VTI.RC:$src3)),
10806 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010807 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10808 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10809 "$src3, $src2", "$src2, $src3",
10810 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10811 (VTI.VT (bitconvert
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010812 (VTI.LdFrag addr:$src3))))),
10813 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10814 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010815 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10816 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10817 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10818 "$src2, ${src3}"##VTI.BroadcastStr,
10819 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10820 (VTI.VT (X86VBroadcast
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010821 (VTI.ScalarLdFrag addr:$src3)))),
10822 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10823 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010824}
10825
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010826multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010827 let Predicates = [HasVNNI] in
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010828 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010829 let Predicates = [HasVNNI, HasVLX] in {
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010830 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10831 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010832 }
10833}
10834
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010835// FIXME: Is there a better scheduler itinerary for VPDP?
10836defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10837defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10838defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10839defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010840
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010841//===----------------------------------------------------------------------===//
10842// Bit Algorithms
10843//===----------------------------------------------------------------------===//
10844
Simon Pilgrim756348c2017-11-29 13:49:51 +000010845// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10846defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010847 avx512vl_i8_info, HasBITALG>;
Simon Pilgrim756348c2017-11-29 13:49:51 +000010848defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010849 avx512vl_i16_info, HasBITALG>, VEX_W;
10850
10851defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10852defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010853
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010854multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010855 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10856 (ins VTI.RC:$src1, VTI.RC:$src2),
10857 "vpshufbitqmb",
10858 "$src2, $src1", "$src1, $src2",
10859 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010860 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10861 Sched<[itins.Sched]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010862 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10863 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10864 "vpshufbitqmb",
10865 "$src2, $src1", "$src1, $src2",
10866 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010867 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10868 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10869 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010870}
10871
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010872multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010873 let Predicates = [HasBITALG] in
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010874 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010875 let Predicates = [HasBITALG, HasVLX] in {
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010876 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10877 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010878 }
10879}
10880
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010881// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10882defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010883
Coby Tayreed8b17be2017-11-26 09:36:41 +000010884//===----------------------------------------------------------------------===//
10885// GFNI
10886//===----------------------------------------------------------------------===//
10887
10888multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10889 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10890 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10891 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10892 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10893 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10894 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10895 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10896 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10897 }
10898}
10899
Craig Topperb18d6222018-01-06 07:18:08 +000010900defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10901 EVEX_CD8<8, CD8VF>, T8PD;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010902
10903multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010904 OpndItins itins, X86VectorVTInfo VTI,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010905 X86VectorVTInfo BcstVTI>
Simon Pilgrim36be8522017-11-29 18:52:20 +000010906 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010907 let ExeDomain = VTI.ExeDomain in
10908 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10909 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10910 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10911 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10912 (OpNode (VTI.VT VTI.RC:$src1),
10913 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
Simon Pilgrim36be8522017-11-29 18:52:20 +000010914 (i8 imm:$src3)), itins.rm>, EVEX_B,
10915 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010916}
10917
Simon Pilgrim36be8522017-11-29 18:52:20 +000010918multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10919 OpndItins itins> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010920 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010921 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010922 v8i64_info>, EVEX_V512;
10923 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010924 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010925 v4i64x_info>, EVEX_V256;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010926 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010927 v2i64x_info>, EVEX_V128;
10928 }
10929}
10930
Craig Topperb18d6222018-01-06 07:18:08 +000010931defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10932 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10933 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10934defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10935 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10936 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010937