blob: 7b1cc56b43fd914ff007969119f21b4e4bf26829 [file] [log] [blame]
Eric Christopher06b32cd2015-02-20 00:36:53 +00001//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
Adam Nemet5ed17da2014-08-21 19:50:07 +000016// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT). These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000020// The template is also used for scalar types, in this case numelts is 1.
Robert Khasanov4204c1a2014-12-12 14:21:30 +000021class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Adam Nemet5ed17da2014-08-21 19:50:07 +000022 string suffix = ""> {
23 RegisterClass RC = rc;
Robert Khasanov4204c1a2014-12-12 14:21:30 +000024 ValueType EltVT = eltvt;
Adam Nemet449b3f02014-10-15 23:42:09 +000025 int NumElts = numelts;
Adam Nemet5ed17da2014-08-21 19:50:07 +000026
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
Igor Bregerfca0a342016-01-28 13:19:25 +000033 // The mask VT.
Guy Blank548e22a2017-05-19 12:35:15 +000034 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
Simon Pilgrimb13961d2016-06-11 14:34:10 +000035
Adam Nemet5ed17da2014-08-21 19:50:07 +000036 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
38
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000039 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
Robert Khasanov2ea081d2014-08-25 14:49:34 +000046
Adam Nemet5ed17da2014-08-21 19:50:07 +000047 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000048 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000049
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000052 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000057
58 // Size of RC in bits, e.g. 512 for VR512.
59 int Size = VT.Size;
60
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000063 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
Ayman Musaf77219e2017-02-13 09:55:48 +000064 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
Robert Khasanov2ea081d2014-08-25 14:49:34 +000067
68 // Load patterns
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
Craig Toppera78b7682016-08-11 06:04:07 +000075 !if (!eq (Size, 512), "v8i64",
76 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000077
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
Craig Toppera78b7682016-08-11 06:04:07 +000079 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
83 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000084
Robert Khasanov2ea081d2014-08-25 14:49:34 +000085 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000086
Craig Topperd9fe6642017-02-21 04:26:10 +000087 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
91 ?));
92
Adam Nemet5ed17da2014-08-21 19:50:07 +000093 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000094 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
98 VTName,
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
101 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000102
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +0000103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
105 VTName,
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
108 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +0000111
Adam Nemet449b3f02014-10-15 23:42:09 +0000112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
Adam Nemet55536c62014-09-25 23:48:45 +0000117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
119
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122 SSEPackedInt));
Adam Nemet09377232014-10-08 23:25:31 +0000123
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +0000124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
Craig Topperabe80cc2016-08-28 06:06:28 +0000126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
Adam Nemet09377232014-10-08 23:25:31 +0000130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
Elena Demikhovskyd207f172015-03-03 15:03:35 +0000134
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000137}
138
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000139def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000141def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
Adam Nemet6bddb8c2014-09-29 22:54:41 +0000143def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000145
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000146// "x" in v32i8x_info means RC = VR256X
147def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000151def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000153
154def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000158def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000160
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +0000161// We map scalar types to the smallest (128-bit) vector type
162// with the appropriate element type. This allows to use the same masking logic.
Asaf Badouh2744d212015-09-20 14:31:19 +0000163def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000165def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
167
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000168class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
173}
174
175def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176 v16i8x_info>;
177def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178 v8i16x_info>;
179def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180 v4i32x_info>;
181def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182 v2i64x_info>;
Robert Khasanovaf318f72014-10-30 14:21:47 +0000183def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184 v4f32x_info>;
185def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186 v2f64x_info>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000187
Ayman Musa721d97f2017-06-27 12:08:37 +0000188class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189 ValueType _vt> {
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
192 ValueType KVT = _vt;
193}
194
Michael Zuckerman9e588312017-10-31 10:00:19 +0000195def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
Ayman Musa721d97f2017-06-27 12:08:37 +0000196def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000203// This multiclass generates the masking variants from the non-masking
204// variant. It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
Adam Nemet34801422014-10-08 23:25:39 +0000207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208 dag Outs,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210 string OpcodeStr,
211 string AttSrcAsm, string IntelSrcAsm,
212 list<dag> Pattern,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000215 InstrItinClass itin,
Adam Nemet34801422014-10-08 23:25:39 +0000216 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
Craig Topper9d2cab72016-01-11 01:03:40 +0000222 "$dst, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000223 Pattern, itin>;
224
225 // Prefer over VMOV*rrk Pat<>
Craig Topper63801df2017-02-19 21:44:35 +0000226 let isCommutable = IsKCommutable in
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000230 MaskingPattern, itin>,
231 EVEX_K {
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000234 }
235
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
Craig Topper63801df2017-02-19 21:44:35 +0000238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000242 ZeroMaskingPattern,
243 itin>,
244 EVEX_KZ;
245}
246
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000247
Adam Nemet34801422014-10-08 23:25:39 +0000248// Common base class of AVX512_maskable and AVX512_maskable_3src.
249multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250 dag Outs,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252 string OpcodeStr,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000255 InstrItinClass itin,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000256 SDNode Select = vselect,
Adam Nemet34801422014-10-08 23:25:39 +0000257 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000266 itin, MaskingConstraint, IsCommutable,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000267 IsKCommutable>;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000268
Adam Nemet2e91ee52014-08-14 17:13:19 +0000269// This multiclass generates the unconditional/non-masking, the masking and
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000270// the zero-masking variant of the vector instruction. In the masking case, the
Adam Nemet2e91ee52014-08-14 17:13:19 +0000271// perserved vector elements come from a new dummy input operand tied to $dst.
Craig Topper3a622a12017-08-17 15:40:25 +0000272// This version uses a separate dag for non-masking and masking.
273multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000277 InstrItinClass itin,
Craig Topper3a622a12017-08-17 15:40:25 +0000278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
285 [(set _.RC:$dst,
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287 [(set _.RC:$dst,
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
Craig Topper3a622a12017-08-17 15:40:25 +0000290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the vector instruction. In the masking case, the
293// perserved vector elements come from a new dummy input operand tied to $dst.
Adam Nemet34801422014-10-08 23:25:39 +0000294multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000297 dag RHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000298 InstrItinClass itin,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
Adam Nemet34801422014-10-08 23:25:39 +0000301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000307
308// This multiclass generates the unconditional/non-masking, the masking and
309// the zero-masking variant of the scalar instruction.
310multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000313 dag RHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000314 InstrItinClass itin,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000315 bit IsCommutable = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000318
Adam Nemet34801422014-10-08 23:25:39 +0000319// Similar to AVX512_maskable but in this case one of the source operands
Adam Nemet2e91ee52014-08-14 17:13:19 +0000320// ($src1) is already tied to $dst so we just use that for the preserved
321// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
322// $src1.
Adam Nemet34801422014-10-08 23:25:39 +0000323multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000326 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000327 bit IsCommutable = 0,
Craig Topper1aa49ca2017-09-01 07:58:14 +0000328 bit IsKCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000329 SDNode Select = vselect,
330 bit MaskOnly = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
Craig Topperb16598d2017-09-01 07:58:16 +0000335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000339
Igor Breger15820b02015-07-01 13:24:28 +0000340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000343 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000344 bit IsCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000345 bit IsKCommutable = 0,
346 bit MaskOnly = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
Craig Topperb16598d2017-09-01 07:58:16 +0000349 X86selects, MaskOnly>;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000350
Adam Nemet34801422014-10-08 23:25:39 +0000351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352 dag Outs, dag Ins,
353 string OpcodeStr,
354 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim569e53b2017-12-03 21:43:54 +0000355 list<dag> Pattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000356 InstrItinClass itin> :
Adam Nemet34801422014-10-08 23:25:39 +0000357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000361 itin, "$src0 = $dst">;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000362
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000363
364// Instruction with mask that puts result in mask register,
365// like "compare" and "vptest"
366multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367 dag Outs,
368 dag Ins, dag MaskingIns,
369 string OpcodeStr,
370 string AttSrcAsm, string IntelSrcAsm,
371 list<dag> Pattern,
Craig Topper225da2c2016-08-27 05:22:15 +0000372 list<dag> MaskingPattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000373 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000376 def NAME: AVX512<O, F, Outs, Ins,
Craig Topper156622a2016-01-11 00:44:56 +0000377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000379 Pattern, itin>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000380
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Craig Topper156622a2016-01-11 00:44:56 +0000382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000384 MaskingPattern, itin>, EVEX_K;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000385}
386
387multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388 dag Outs,
389 dag Ins, dag MaskingIns,
390 string OpcodeStr,
391 string AttSrcAsm, string IntelSrcAsm,
Craig Topper225da2c2016-08-27 05:22:15 +0000392 dag RHS, dag MaskingRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000393 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000394 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000399
400multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000403 dag RHS, InstrItinClass itin,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000404 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000409
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000410multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000412 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000413 InstrItinClass itin> :
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000417
Craig Topperabe80cc2016-08-28 06:06:28 +0000418// This multiclass generates the unconditional/non-masking, the masking and
419// the zero-masking variant of the vector instruction. In the masking case, the
420// perserved vector elements come from a new dummy input operand tied to $dst.
421multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000425 InstrItinClass itin,
Craig Topperabe80cc2016-08-28 06:06:28 +0000426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
432 [(set _.RC:$dst,
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434 [(set _.RC:$dst,
435 (Select _.KRCWM:$mask, MaskedRHS,
436 _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000437 itin, "$src0 = $dst", IsCommutable>;
Craig Topperabe80cc2016-08-28 06:06:28 +0000438
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000439
Craig Topper9d9251b2016-05-08 20:10:20 +0000440// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
Marina Yatsina6fc2aaa2018-01-22 10:05:23 +0000442// swizzled by ExecutionDomainFix to pxor.
Craig Topper9d9251b2016-05-08 20:10:20 +0000443// We set canFoldAsLoad because this can be converted to a constant-pool
444// load of an all-zeros value if folding it would be beneficial.
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000445let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper86748492016-07-11 05:36:41 +0000446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000447def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
Craig Topper9d9251b2016-05-08 20:10:20 +0000448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
Craig Topper516e14c2016-07-11 05:36:48 +0000449def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000451}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000452
Craig Topper6393afc2017-01-09 02:44:34 +0000453// Alias instructions that allow VPTERNLOG to be used with a mask to create
454// a mix of all ones and all zeros elements. This is done this way to force
455// the same register to be used as input for all three sources.
Simon Pilgrim26f106f2017-12-08 15:17:32 +0000456let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
Craig Topper6393afc2017-01-09 02:44:34 +0000457def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
467}
468
Craig Toppere5ce84a2016-05-08 21:33:53 +0000469let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Craig Toppere5ce84a2016-05-08 21:33:53 +0000471def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475}
476
Craig Topperadd9cc62016-12-18 06:23:14 +0000477// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478// This is expanded by ExpandPostRAPseudos.
479let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
Craig Topperadd9cc62016-12-18 06:23:14 +0000481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
485}
486
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000487//===----------------------------------------------------------------------===//
488// AVX-512 - VECTOR INSERT
489//
Craig Topper3a622a12017-08-17 15:40:25 +0000490
491// Supports two different pattern operators for mask and unmasked ops. Allows
492// null_frag to be passed for one.
493multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494 X86VectorVTInfo To,
495 SDPatternOperator vinsert_insert,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000496 SDPatternOperator vinsert_for_mask,
497 OpndItins itins> {
Craig Topperc228d792017-09-05 05:49:44 +0000498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
Craig Topper3a622a12017-08-17 15:40:25 +0000505 (iPTR imm)),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Topperc228d792017-09-05 05:49:44 +0000510 let mayLoad = 1 in
Craig Topper3a622a12017-08-17 15:40:25 +0000511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Craig Topper3a622a12017-08-17 15:40:25 +0000517 (iPTR imm)),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemet4e2ef472014-10-02 23:18:28 +0000523 }
Adam Nemet4285c1f2014-10-15 23:42:17 +0000524}
Adam Nemet4e2ef472014-10-02 23:18:28 +0000525
Craig Topper3a622a12017-08-17 15:40:25 +0000526// Passes the same pattern operator for masked and unmasked ops.
527multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000529 SDPatternOperator vinsert_insert,
530 OpndItins itins> :
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000532
Igor Breger0ede3cb2015-09-20 06:52:42 +0000533multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
Adam Nemet4285c1f2014-10-15 23:42:17 +0000537 def : Pat<(vinsert_insert:$ins
Igor Breger0ede3cb2015-09-20 06:52:42 +0000538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543 def : Pat<(vinsert_insert:$ins
544 (To.VT To.RC:$src1),
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
546 (iPTR imm)),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
550 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551}
552
Adam Nemetb1c3ef42014-10-15 23:42:04 +0000553multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000554 ValueType EltVT64, int Opcode256,
555 OpndItins itins> {
Igor Breger0ede3cb2015-09-20 06:52:42 +0000556
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000561 vinsert128_insert, itins>, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000562
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000566 vinsert128_insert, itins>, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000567
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000572
Craig Topper3a622a12017-08-17 15:40:25 +0000573 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000574 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000578 null_frag, vinsert128_insert, itins>,
579 VEX_W, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000580
Craig Topper3a622a12017-08-17 15:40:25 +0000581 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000582 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000586 null_frag, vinsert128_insert, itins>,
587 VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000588
Craig Topper3a622a12017-08-17 15:40:25 +0000589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000592 null_frag, vinsert256_insert, itins>,
593 EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000594 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595}
596
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000597// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598let Sched = WriteFShuffle256 in
599def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
601>;
602let Sched = WriteShuffle256 in
603def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605>;
606
607defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000609
Igor Breger0ede3cb2015-09-20 06:52:42 +0000610// Codegen pattern with the alternative types,
Craig Topper3a622a12017-08-17 15:40:25 +0000611// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000612defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000614defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000616
617defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000619defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000621
622defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000624defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000626
627// Codegen pattern with the alternative types insert VEC128 into VEC256
628defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632// Codegen pattern with the alternative types insert VEC128 into VEC512
633defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637// Codegen pattern with the alternative types insert VEC256 into VEC512
638defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
Craig Topperf7a19db2017-10-08 01:33:40 +0000643
644multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
648 list<Predicate> p> {
649let Predicates = p in {
650 def : Pat<(Cast.VT
651 (vselect Cast.KRCWM:$mask,
652 (bitconvert
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
655 (iPTR imm))),
656 Cast.RC:$src0)),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
660 def : Pat<(Cast.VT
661 (vselect Cast.KRCWM:$mask,
662 (bitconvert
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT
665 (bitconvert
666 (From.LdFrag addr:$src2))),
667 (iPTR imm))),
668 Cast.RC:$src0)),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673 def : Pat<(Cast.VT
674 (vselect Cast.KRCWM:$mask,
675 (bitconvert
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
678 (iPTR imm))),
679 Cast.ImmAllZerosV)),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
683 def : Pat<(Cast.VT
684 (vselect Cast.KRCWM:$mask,
685 (bitconvert
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
687 (From.VT
688 (bitconvert
689 (From.LdFrag addr:$src2))),
690 (iPTR imm))),
691 Cast.ImmAllZerosV)),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
695}
696}
697
698defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
730
731defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
749
750defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
775
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000776// vinsertps - insert f32 to XMM
Craig Topper43973152016-10-09 06:41:47 +0000777let ExeDomain = SSEPackedSingle in {
Craig Topper6189d3e2016-07-19 01:26:19 +0000778def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd255a622017-12-06 18:46:06 +0000781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
Craig Topper6189d3e2016-07-19 01:26:19 +0000783def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
Simon Pilgrimd255a622017-12-06 18:46:06 +0000788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
Craig Topper43973152016-10-09 06:41:47 +0000790}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000791
792//===----------------------------------------------------------------------===//
793// AVX-512 VECTOR EXTRACT
794//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000795
Craig Topper3a622a12017-08-17 15:40:25 +0000796// Supports two different pattern operators for mask and unmasked ops. Allows
797// null_frag to be passed for one.
798multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000801 SDPatternOperator vextract_for_mask,
802 OpndItins itins> {
Igor Breger7f69a992015-09-10 12:54:54 +0000803
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000806 (ins From.RC:$src1, u8imm:$idx),
Igor Breger7f69a992015-09-10 12:54:54 +0000807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
Craig Topper3a622a12017-08-17 15:40:25 +0000809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
Craig Toppere1cac152016-06-07 07:27:54 +0000813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
Ayman Musaf77219e2017-02-13 09:55:48 +0000814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000821
Craig Toppere1cac152016-06-07 07:27:54 +0000822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
Ayman Musaf77219e2017-02-13 09:55:48 +0000825 From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000831 }
Igor Bregerac29a822015-09-09 14:35:09 +0000832}
833
Craig Topper3a622a12017-08-17 15:40:25 +0000834// Passes the same pattern operator for masked and unmasked ops.
835multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000837 SDPatternOperator vextract_extract,
838 OpndItins itins> :
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000840
Igor Bregerdefab3c2015-10-08 12:55:01 +0000841// Codegen pattern for the alternative types
842multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
Craig Topper5f3fef82016-05-22 07:40:58 +0000844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
Craig Topperdb960ed2016-05-21 22:50:14 +0000845 let Predicates = p in {
Igor Bregerdefab3c2015-10-08 12:55:01 +0000846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
848 From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
Craig Topperdb960ed2016-05-21 22:50:14 +0000850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854 }
Igor Breger7f69a992015-09-10 12:54:54 +0000855}
856
857multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000858 ValueType EltVT64, int Opcode256,
859 OpndItins itins> {
Craig Topperaadec702017-08-14 01:53:10 +0000860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000864 vextract128_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000869 vextract256_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871 }
Igor Breger7f69a992015-09-10 12:54:54 +0000872 let Predicates = [HasVLX] in
Igor Bregerdefab3c2015-10-08 12:55:01 +0000873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000876 vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Craig Topper3a622a12017-08-17 15:40:25 +0000878
879 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000880 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000884 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000886
887 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000888 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000892 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
Igor Breger7f69a992015-09-10 12:54:54 +0000895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000897 null_frag, vextract256_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
899 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000900}
901
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000902// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903let Sched = WriteFShuffle256 in
904def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
906>;
907let Sched = WriteShuffle256 in
908def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910>;
911
912defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000914
Igor Bregerdefab3c2015-10-08 12:55:01 +0000915// extract_subvector codegen patterns with the alternative types.
Craig Topper3a622a12017-08-17 15:40:25 +0000916// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Bregerdefab3c2015-10-08 12:55:01 +0000917defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000919defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000921
922defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000924defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000926
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000931
Craig Topper08a68572016-05-21 22:50:04 +0000932// Codegen pattern with the alternative types extract VEC128 from VEC256
Craig Topper02626c02016-05-21 07:08:56 +0000933defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938// Codegen pattern with the alternative types extract VEC128 from VEC512
Igor Bregerdefab3c2015-10-08 12:55:01 +0000939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943// Codegen pattern with the alternative types extract VEC256 from VEC512
944defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
Craig Topper5f3fef82016-05-22 07:40:58 +0000949
Craig Topper48a79172017-08-30 07:26:12 +0000950// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951// smaller extract to enable EVEX->VEX.
952let Predicates = [NoVLX] in {
953def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956 (iPTR 1)))>;
957def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960 (iPTR 1)))>;
961def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964 (iPTR 1)))>;
965def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968 (iPTR 1)))>;
969def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972 (iPTR 1)))>;
973def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 (iPTR 1)))>;
977}
978
979// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980// smaller extract to enable EVEX->VEX.
981let Predicates = [HasVLX] in {
982def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985 (iPTR 1)))>;
986def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989 (iPTR 1)))>;
990def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993 (iPTR 1)))>;
994def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997 (iPTR 1)))>;
998def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001 (iPTR 1)))>;
1002def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005 (iPTR 1)))>;
1006}
1007
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001008
Craig Toppera0883622017-08-26 22:24:57 +00001009// Additional patterns for handling a bitcast between the vselect and the
1010// extract_subvector.
1011multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018 (bitconvert
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1021 To.RC:$src0)),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027 (bitconvert
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034}
1035}
1036
1037defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115// vextractps - extract 32 bits from XMM
Craig Topper03b849e2016-05-21 22:50:11 +00001116def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Craig Topperfc946a02015-01-25 02:21:13 +00001117 (ins VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimd255a622017-12-06 18:46:06 +00001119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001121
Craig Topper03b849e2016-05-21 22:50:11 +00001122def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Craig Topperfc946a02015-01-25 02:21:13 +00001123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00001126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001128
1129//===---------------------------------------------------------------------===//
1130// AVX-512 BROADCAST
1131//---
Igor Breger131008f2016-05-01 08:40:00 +00001132// broadcast with a scalar argument.
1133multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
Craig Topperf6df4a62017-01-30 06:59:06 +00001135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
Igor Breger131008f2016-05-01 08:40:00 +00001149}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001150
Craig Topper17854ec2017-08-30 07:48:39 +00001151// Split version to allow mask and broadcast node to be different types. This
1152// helps support the 32x2 broadcasts.
1153multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001154 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
Igor Breger21296d22015-10-20 11:56:42 +00001162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001163 (MaskInfo.VT
1164 (bitconvert
1165 (DestInfo.VT
Craig Topperbf0de9d2017-10-13 06:07:10 +00001166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167 (MaskInfo.VT
1168 (bitconvert
1169 (DestInfo.VT
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
Craig Topperbf0de9d2017-10-13 06:07:10 +00001172 let mayLoad = 1 in
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
Igor Breger52bd1d52016-05-31 07:43:39 +00001175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001176 (MaskInfo.VT
1177 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1180 (MaskInfo.VT
1181 (bitconvert
Craig Topper17854ec2017-08-30 07:48:39 +00001182 (DestInfo.VT (X86VBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185 Sched<[SchedRM]>;
Craig Topper80934372016-07-16 03:42:59 +00001186 }
Craig Toppere1cac152016-06-07 07:27:54 +00001187
Craig Topper17854ec2017-08-30 07:48:39 +00001188 def : Pat<(MaskInfo.VT
1189 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001190 (DestInfo.VT (UnmaskedOp
Craig Topper17854ec2017-08-30 07:48:39 +00001191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195 (bitconvert
1196 (DestInfo.VT
1197 (X86VBroadcast
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
Craig Topper80934372016-07-16 03:42:59 +00001201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
Craig Topper17854ec2017-08-30 07:48:39 +00001202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204 (bitconvert
1205 (DestInfo.VT
1206 (X86VBroadcast
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001213
Craig Topper17854ec2017-08-30 07:48:39 +00001214// Helper class to force mask and broadcast result to same type.
1215multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001216 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
Craig Topper17854ec2017-08-30 07:48:39 +00001221
Craig Topper80934372016-07-16 03:42:59 +00001222multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
Igor Breger21296d22015-10-20 11:56:42 +00001223 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001224 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001228 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001229 }
Robert Khasanovaf318f72014-10-30 14:21:47 +00001230
1231 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001232 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1233 WriteFShuffle256Ld, _.info256, _.info128>,
Igor Breger131008f2016-05-01 08:40:00 +00001234 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001235 EVEX_V256;
Robert Khasanovaf318f72014-10-30 14:21:47 +00001236 }
1237}
1238
Craig Topper80934372016-07-16 03:42:59 +00001239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001241 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001244 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1245 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001246 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001247
Craig Topper80934372016-07-16 03:42:59 +00001248 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001249 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1250 WriteFShuffle256Ld, _.info256, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001251 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1252 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001253 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1254 WriteFShuffle256Ld, _.info128, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001255 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1256 EVEX_V128;
1257 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001258}
Craig Topper80934372016-07-16 03:42:59 +00001259defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1260 avx512vl_f32_info>;
1261defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1262 avx512vl_f64_info>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001263
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001264def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001265 (VBROADCASTSSZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001266def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001267 (VBROADCASTSDZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001268
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001269multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001271 RegisterClass SrcRC> {
Craig Topperfe259882017-02-26 06:45:51 +00001272 let ExeDomain = _.ExeDomain in
Igor Breger0aeda372016-02-07 08:30:50 +00001273 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001274 (ins SrcRC:$src),
1275 "vpbroadcast"##_.Suffix, "$src", "$src",
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001276 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1277 Sched<[SchedRR]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001278}
1279
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001280multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
Guy Blank7f60c992017-08-09 17:21:01 +00001281 X86VectorVTInfo _, SDPatternOperator OpNode,
1282 RegisterClass SrcRC, SubRegIndex Subreg> {
Craig Topper508aa972017-08-14 05:09:34 +00001283 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
Guy Blank7f60c992017-08-09 17:21:01 +00001284 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1285 (outs _.RC:$dst), (ins GR32:$src),
1286 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1287 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1288 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +00001289 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
Guy Blank7f60c992017-08-09 17:21:01 +00001290
1291 def : Pat <(_.VT (OpNode SrcRC:$src)),
1292 (!cast<Instruction>(Name#r)
1293 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1294
1295 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1296 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1297 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1298
1299 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1300 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1301 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302}
1303
1304multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1305 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1306 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1307 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001308 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1309 OpNode, SrcRC, Subreg>, EVEX_V512;
Guy Blank7f60c992017-08-09 17:21:01 +00001310 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001311 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1312 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1313 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1314 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
Guy Blank7f60c992017-08-09 17:21:01 +00001315 }
1316}
1317
Robert Khasanovcbc57032014-12-09 16:38:41 +00001318multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
Craig Topper49ba3f52017-02-26 06:45:48 +00001319 SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001320 RegisterClass SrcRC, Predicate prd> {
1321 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001322 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1323 SrcRC>, EVEX_V512;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001324 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001325 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1326 SrcRC>, EVEX_V256;
1327 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1328 SrcRC>, EVEX_V128;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001329 }
1330}
1331
Guy Blank7f60c992017-08-09 17:21:01 +00001332defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1333 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1334defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1335 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1336 HasBWI>;
Craig Topper49ba3f52017-02-26 06:45:48 +00001337defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1338 X86VBroadcast, GR32, HasAVX512>;
1339defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1340 X86VBroadcast, GR64, HasAVX512>, VEX_W;
Michael Liao5bf95782014-12-04 05:20:33 +00001341
Igor Breger21296d22015-10-20 11:56:42 +00001342// Provide aliases for broadcast from the same register class that
1343// automatically does the extract.
1344multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1345 X86VectorVTInfo SrcInfo> {
1346 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1347 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1348 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1349}
1350
1351multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1352 AVX512VLVectorVTInfo _, Predicate prd> {
1353 let Predicates = [prd] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001354 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1355 WriteShuffle256Ld, _.info512, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001356 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1357 EVEX_V512;
1358 // Defined separately to avoid redefinition.
1359 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1360 }
1361 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001362 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1363 WriteShuffle256Ld, _.info256, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001364 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1365 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001366 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1367 WriteShuffleLd, _.info128, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001368 EVEX_V128;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00001369 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001370}
1371
Igor Breger21296d22015-10-20 11:56:42 +00001372defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1373 avx512vl_i8_info, HasBWI>;
1374defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1375 avx512vl_i16_info, HasBWI>;
1376defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1377 avx512vl_i32_info, HasAVX512>;
1378defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1379 avx512vl_i64_info, HasAVX512>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001380
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001381multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1382 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001383 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00001384 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1385 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001386 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1387 NoItinerary>, AVX5128IBase, EVEX,
1388 Sched<[WriteShuffleLd]>;
Adam Nemet73f72e12014-06-27 00:43:38 +00001389}
1390
Craig Topperd6f4be92017-08-21 05:29:02 +00001391// This should be used for the AVX512DQ broadcast instructions. It disables
1392// the unmasked patterns so that we only use the DQ instructions when masking
1393// is requested.
1394multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1395 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Craig Topperc228d792017-09-05 05:49:44 +00001396 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperd6f4be92017-08-21 05:29:02 +00001397 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1398 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1399 (null_frag),
1400 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001401 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1402 NoItinerary>, AVX5128IBase, EVEX,
1403 Sched<[WriteShuffleLd]>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001404}
1405
Simon Pilgrim79195582017-02-21 16:41:44 +00001406let Predicates = [HasAVX512] in {
1407 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1408 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1409 (VPBROADCASTQZm addr:$src)>;
1410}
1411
Craig Topperad3d0312017-10-10 21:07:14 +00001412let Predicates = [HasVLX] in {
Simon Pilgrim79195582017-02-21 16:41:44 +00001413 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1414 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1415 (VPBROADCASTQZ128m addr:$src)>;
1416 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1417 (VPBROADCASTQZ256m addr:$src)>;
Craig Topperad3d0312017-10-10 21:07:14 +00001418}
1419let Predicates = [HasVLX, HasBWI] in {
Craig Topperbe351ee2016-10-01 06:01:23 +00001420 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1421 // This means we'll encounter truncated i32 loads; match that here.
1422 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1423 (VPBROADCASTWZ128m addr:$src)>;
1424 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1425 (VPBROADCASTWZ256m addr:$src)>;
1426 def : Pat<(v8i16 (X86VBroadcast
1427 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1428 (VPBROADCASTWZ128m addr:$src)>;
1429 def : Pat<(v16i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ256m addr:$src)>;
1432}
1433
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001434//===----------------------------------------------------------------------===//
1435// AVX-512 BROADCAST SUBVECTORS
1436//
1437
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001438defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1439 v16i32_info, v4i32x_info>,
Adam Nemet73f72e12014-06-27 00:43:38 +00001440 EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001441defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1442 v16f32_info, v4f32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1445 v8i64_info, v4i64x_info>, VEX_W,
Adam Nemet73f72e12014-06-27 00:43:38 +00001446 EVEX_V512, EVEX_CD8<64, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001447defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1448 v8f64_info, v4f64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450
Craig Topper715ad7f2016-10-16 23:29:51 +00001451let Predicates = [HasAVX512] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001452def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1453 (VBROADCASTF64X4rm addr:$src)>;
1454def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1455 (VBROADCASTI64X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001456def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1457 (VBROADCASTI64X4rm addr:$src)>;
1458def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1459 (VBROADCASTI64X4rm addr:$src)>;
1460
1461// Provide fallback in case the load node that is used in the patterns above
1462// is used by additional users, which prevents the pattern selection.
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001463def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1464 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001465 (v4f64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001466def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v8f32 VR256X:$src), 1)>;
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001469def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001471 (v4i64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001472def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v8i32 VR256X:$src), 1)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001475def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v16i16 VR256X:$src), 1)>;
1478def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v32i8 VR256X:$src), 1)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001481
Craig Topperd6f4be92017-08-21 05:29:02 +00001482def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1483 (VBROADCASTF32X4rm addr:$src)>;
1484def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1485 (VBROADCASTI32X4rm addr:$src)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001486def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1487 (VBROADCASTI32X4rm addr:$src)>;
1488def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1489 (VBROADCASTI32X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001490}
1491
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001492let Predicates = [HasVLX] in {
1493defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1494 v8i32x_info, v4i32x_info>,
1495 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1496defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1497 v8f32x_info, v4f32x_info>,
1498 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001499
Craig Topperd6f4be92017-08-21 05:29:02 +00001500def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1501 (VBROADCASTF32X4Z256rm addr:$src)>;
1502def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1503 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001504def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1505 (VBROADCASTI32X4Z256rm addr:$src)>;
1506def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1507 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001508
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001509// Provide fallback in case the load node that is used in the patterns above
1510// is used by additional users, which prevents the pattern selection.
Craig Topperd6f4be92017-08-21 05:29:02 +00001511def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1512 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1513 (v2f64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001514def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001515 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001516 (v4f32 VR128X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001517def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1518 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1519 (v2i64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001520def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001521 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001522 (v4i32 VR128X:$src), 1)>;
1523def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001524 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001525 (v8i16 VR128X:$src), 1)>;
1526def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001527 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001528 (v16i8 VR128X:$src), 1)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001529}
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001530
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001531let Predicates = [HasVLX, HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001532defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001533 v4i64x_info, v2i64x_info>, VEX_W,
1534 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001535defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001536 v4f64x_info, v2f64x_info>, VEX_W,
1537 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001538}
1539
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001540let Predicates = [HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001541defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001542 v8i64_info, v2i64x_info>, VEX_W,
1543 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001544defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001545 v16i32_info, v8i32x_info>,
1546 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001547defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001548 v8f64_info, v2f64x_info>, VEX_W,
1549 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001550defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001551 v16f32_info, v8f32x_info>,
1552 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1553}
Adam Nemet73f72e12014-06-27 00:43:38 +00001554
Igor Bregerfa798a92015-11-02 07:39:36 +00001555multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001556 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001557 let Predicates = [HasDQI] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001558 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1559 WriteShuffle256Ld, _Dst.info512,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001560 _Src.info512, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001561 EVEX_V512;
Igor Bregerfa798a92015-11-02 07:39:36 +00001562 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001563 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1564 WriteShuffle256Ld, _Dst.info256,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001565 _Src.info256, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001566 EVEX_V256;
Igor Bregerfa798a92015-11-02 07:39:36 +00001567}
1568
1569multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001570 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1571 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001572
1573 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001574 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1575 WriteShuffleLd, _Dst.info128,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001576 _Src.info128, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001577 EVEX_V128;
Igor Bregerfa798a92015-11-02 07:39:36 +00001578}
1579
Craig Topper51e052f2016-10-15 16:26:02 +00001580defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1581 avx512vl_i32_info, avx512vl_i64_info>;
1582defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1583 avx512vl_f32_info, avx512vl_f64_info>;
Igor Bregerfa798a92015-11-02 07:39:36 +00001584
Craig Topper52317e82017-01-15 05:47:45 +00001585let Predicates = [HasVLX] in {
1586def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1587 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1588def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1589 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1590}
1591
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001592def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001593 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001594def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1595 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1596
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001597def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001598 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001599def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1600 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001601
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001602//===----------------------------------------------------------------------===//
1603// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1604//---
Asaf Badouh0d957b82015-11-18 09:42:45 +00001605multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1606 X86VectorVTInfo _, RegisterClass KRC> {
1607 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00001608 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001609 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1610 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001611}
1612
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001613multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
Asaf Badouh0d957b82015-11-18 09:42:45 +00001614 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1615 let Predicates = [HasCDI] in
1616 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1617 let Predicates = [HasCDI, HasVLX] in {
1618 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1619 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1620 }
1621}
1622
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001623defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001624 avx512vl_i32_info, VK16>;
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001625defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001626 avx512vl_i64_info, VK8>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001627
1628//===----------------------------------------------------------------------===//
Craig Topperaad5f112015-11-30 00:13:24 +00001629// -- VPERMI2 - 3 source operands form --
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001630
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001631let Sched = WriteFShuffle256 in
1632def AVX512_PERM2_F : OpndItins<
1633 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1634>;
1635
1636let Sched = WriteShuffle256 in
1637def AVX512_PERM2_I : OpndItins<
1638 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1639>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001640
1641multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1642 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001643let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Craig Topper4fa3b502016-09-06 06:56:59 +00001644 // The index operand in the pattern should really be an integer type. However,
1645 // if we do that and it happens to come from a bitcast, then it becomes
1646 // difficult to find the bitcast needed to convert the index to the
1647 // destination type for the passthru since it will be folded with the bitcast
1648 // of the index operand.
1649 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001650 (ins _.RC:$src2, _.RC:$src3),
1651 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001652 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001653 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001654
Craig Topper4fa3b502016-09-06 06:56:59 +00001655 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001656 (ins _.RC:$src2, _.MemOp:$src3),
1657 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Topper4fa3b502016-09-06 06:56:59 +00001658 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001659 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1660 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001661 }
1662}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001663
1664multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001665 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001666 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Craig Topper4fa3b502016-09-06 06:56:59 +00001667 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001668 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1669 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1670 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper4fa3b502016-09-06 06:56:59 +00001671 (_.VT (X86VPermi2X _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001672 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001673 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1674 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemetefe9c982014-07-02 21:25:58 +00001675}
1676
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001677multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001678 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001679 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1680 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001681 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001682 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1683 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1684 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1685 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001686 }
1687}
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001688
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001689multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001690 OpndItins itins,
1691 AVX512VLVectorVTInfo VTInfo,
1692 Predicate Prd> {
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001693 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001694 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001695 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001696 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1697 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001698 }
1699}
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001700
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001701defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001702 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001703defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001704 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001705defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001706 avx512vl_i16_info, HasBWI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001707 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001708defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001709 avx512vl_i8_info, HasVBMI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001710 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001711defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001712 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001713defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001714 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001715
Craig Topperaad5f112015-11-30 00:13:24 +00001716// VPERMT2
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001717multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001718 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001719let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001720 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1721 (ins IdxVT.RC:$src2, _.RC:$src3),
1722 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001723 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001724 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001725
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001726 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1727 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1728 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Toppera47576f2015-11-26 20:21:29 +00001729 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001730 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1731 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001732 }
1733}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001734multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001735 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001737 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1738 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1739 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1740 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Toppera47576f2015-11-26 20:21:29 +00001741 (_.VT (X86VPermt2 _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001742 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001743 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1744 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001745}
1746
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001747multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001748 AVX512VLVectorVTInfo VTInfo,
1749 AVX512VLVectorVTInfo ShuffleMask> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001750 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001751 ShuffleMask.info512>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001752 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001753 ShuffleMask.info512>, EVEX_V512;
1754 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001755 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001756 ShuffleMask.info128>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001757 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001758 ShuffleMask.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001759 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001760 ShuffleMask.info256>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001761 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001762 ShuffleMask.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001763 }
1764}
1765
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001766multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001767 AVX512VLVectorVTInfo VTInfo,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001768 AVX512VLVectorVTInfo Idx,
1769 Predicate Prd> {
1770 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001771 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Craig Toppera47576f2015-11-26 20:21:29 +00001772 Idx.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001773 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001774 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Craig Toppera47576f2015-11-26 20:21:29 +00001775 Idx.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001776 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001777 Idx.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001778 }
1779}
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001780
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001781defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001782 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001783defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001784 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001785defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001786 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1787 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001788defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001789 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1790 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001791defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001792 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001793defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001794 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +00001795
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001796//===----------------------------------------------------------------------===//
1797// AVX-512 - BLEND using mask
1798//
Simon Pilgrimd4953012017-12-05 21:05:25 +00001799
Simon Pilgrim75673942017-12-06 11:23:13 +00001800let Sched = WriteFVarBlend in
1801def AVX512_BLENDM : OpndItins<
1802 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001803>;
1804
Simon Pilgrim75673942017-12-06 11:23:13 +00001805let Sched = WriteVarBlend in
1806def AVX512_PBLENDM : OpndItins<
1807 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001808>;
1809
1810multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1811 X86VectorVTInfo _> {
Craig Toppera74e3082017-01-07 22:20:34 +00001812 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001813 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1814 (ins _.RC:$src1, _.RC:$src2),
1815 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001816 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001817 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001818 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1819 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001820 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001821 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001822 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001823 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1824 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1825 !strconcat(OpcodeStr,
1826 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001827 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
Craig Toppera74e3082017-01-07 22:20:34 +00001828 let mayLoad = 1 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001829 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1830 (ins _.RC:$src1, _.MemOp:$src2),
1831 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001832 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001833 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1834 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001835 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1836 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001837 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001838 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001839 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1840 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001841 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1842 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1843 !strconcat(OpcodeStr,
1844 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001845 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1846 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001847 }
Craig Toppera74e3082017-01-07 22:20:34 +00001848 }
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001849}
Simon Pilgrimd4953012017-12-05 21:05:25 +00001850multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1851 X86VectorVTInfo _> {
Craig Topper81f20aa2017-01-07 22:20:26 +00001852 let mayLoad = 1, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001853 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1854 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1855 !strconcat(OpcodeStr,
1856 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1857 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001858 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1859 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001860
1861 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1862 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1863 !strconcat(OpcodeStr,
1864 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1865 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001866 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1867 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper81f20aa2017-01-07 22:20:26 +00001868 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001869}
1870
Simon Pilgrimd4953012017-12-05 21:05:25 +00001871multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001872 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001873 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1874 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001875
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001876 let Predicates = [HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001877 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1878 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1879 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1880 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001881 }
1882}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001883
Simon Pilgrimd4953012017-12-05 21:05:25 +00001884multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001885 AVX512VLVectorVTInfo VTInfo> {
1886 let Predicates = [HasBWI] in
Simon Pilgrimd4953012017-12-05 21:05:25 +00001887 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001888
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001889 let Predicates = [HasBWI, HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001890 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1891 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001892 }
1893}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001894
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001895
Simon Pilgrimd4953012017-12-05 21:05:25 +00001896defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
1897defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
1898defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
1899defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
1900defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
1901defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001902
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001903
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001904//===----------------------------------------------------------------------===//
1905// Compare Instructions
1906//===----------------------------------------------------------------------===//
1907
1908// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001909
Simon Pilgrim71660c62017-12-05 14:34:42 +00001910multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
1911 OpndItins itins> {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001912 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1913 (outs _.KRC:$dst),
1914 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1915 "vcmp${cc}"#_.Suffix,
1916 "$src2, $src1", "$src1, $src2",
1917 (OpNode (_.VT _.RC:$src1),
1918 (_.VT _.RC:$src2),
Simon Pilgrim71660c62017-12-05 14:34:42 +00001919 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00001920 let mayLoad = 1 in
Craig Toppere1cac152016-06-07 07:27:54 +00001921 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1922 (outs _.KRC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00001923 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
Craig Toppere1cac152016-06-07 07:27:54 +00001924 "vcmp${cc}"#_.Suffix,
1925 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00001926 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001927 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1928 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001929
1930 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1931 (outs _.KRC:$dst),
1932 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1933 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00001934 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001935 (OpNodeRnd (_.VT _.RC:$src1),
1936 (_.VT _.RC:$src2),
1937 imm:$cc,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001938 (i32 FROUND_NO_EXC)), itins.rr>,
1939 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001940 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001941 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001942 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1943 (outs VK1:$dst),
1944 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1945 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001946 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
1947 Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00001948 let mayLoad = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001949 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1950 (outs _.KRC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00001951 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001952 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001953 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
1954 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1955 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001956
1957 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1958 (outs _.KRC:$dst),
1959 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1960 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00001961 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
1962 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001963 }// let isAsmParserOnly = 1, hasSideEffects = 0
1964
1965 let isCodeGenOnly = 1 in {
Craig Topper225da2c2016-08-27 05:22:15 +00001966 let isCommutable = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00001967 def rr : AVX512Ii8<0xC2, MRMSrcReg,
1968 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1969 !strconcat("vcmp${cc}", _.Suffix,
1970 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1971 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1972 _.FRC:$src2,
1973 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00001974 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00001975 def rm : AVX512Ii8<0xC2, MRMSrcMem,
1976 (outs _.KRC:$dst),
1977 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1978 !strconcat("vcmp${cc}", _.Suffix,
1979 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1980 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1981 (_.ScalarLdFrag addr:$src2),
1982 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00001983 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1984 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001985 }
1986}
1987
1988let Predicates = [HasAVX512] in {
Craig Topperd890db62017-02-21 04:26:04 +00001989 let ExeDomain = SSEPackedSingle in
Simon Pilgrim71660c62017-12-05 14:34:42 +00001990 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
1991 SSE_ALU_F32S>, AVX512XSIi8Base;
Craig Topperd890db62017-02-21 04:26:04 +00001992 let ExeDomain = SSEPackedDouble in
Simon Pilgrim71660c62017-12-05 14:34:42 +00001993 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
1994 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001995}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001996
Craig Topper513d3fa2018-01-27 20:19:02 +00001997multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00001998 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
Craig Topper392cd032016-09-03 16:28:03 +00001999 let isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002000 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002001 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2002 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2003 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002004 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002005 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002006 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2007 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2008 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2009 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002010 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1d81032017-06-13 07:13:47 +00002011 let isCommutable = IsCommutable in
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002012 def rrk : AVX512BI<opc, MRMSrcReg,
2013 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2014 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2015 "$dst {${mask}}, $src1, $src2}"),
2016 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2017 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002018 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002019 def rmk : AVX512BI<opc, MRMSrcMem,
2020 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2022 "$dst {${mask}}, $src1, $src2}"),
2023 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2024 (OpNode (_.VT _.RC:$src1),
2025 (_.VT (bitconvert
2026 (_.LdFrag addr:$src2))))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002027 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002028}
2029
Craig Topper513d3fa2018-01-27 20:19:02 +00002030multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002031 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2032 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002033 def rmb : AVX512BI<opc, MRMSrcMem,
2034 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2035 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2036 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2037 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2038 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002039 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002040 def rmbk : AVX512BI<opc, MRMSrcMem,
2041 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2042 _.ScalarMemOp:$src2),
2043 !strconcat(OpcodeStr,
2044 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2045 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2046 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2047 (OpNode (_.VT _.RC:$src1),
2048 (X86VBroadcast
2049 (_.ScalarLdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002050 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2051 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002052}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002053
Craig Topper513d3fa2018-01-27 20:19:02 +00002054multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002055 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2056 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002057 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002058 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002059 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002060
2061 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002062 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002063 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002064 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002065 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002066 }
2067}
2068
2069multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
Craig Topper513d3fa2018-01-27 20:19:02 +00002070 PatFrag OpNode, OpndItins itins,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002071 AVX512VLVectorVTInfo VTInfo,
2072 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002073 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002074 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002075 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002076
2077 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002078 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002079 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002080 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002081 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002082 }
2083}
2084
Craig Topper513d3fa2018-01-27 20:19:02 +00002085def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
2086 (X86cmpm node:$src1, node:$src2, (i8 0))>;
Craig Topper15d69732018-01-28 00:56:30 +00002087def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
2088 (X86cmpm node:$src1, node:$src2, (i8 4))>;
Craig Topper513d3fa2018-01-27 20:19:02 +00002089def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2090 (X86cmpm node:$src1, node:$src2, (i8 6))>;
2091
Simon Pilgrima2b58622017-12-05 12:02:22 +00002092// FIXME: Is there a better scheduler itinerary for VPCMP?
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002093defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002094 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002095 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002096
2097defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002098 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002099 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002100
Robert Khasanovf70f7982014-09-18 14:06:55 +00002101defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002102 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002103 EVEX_CD8<32, CD8VF>;
2104
Robert Khasanovf70f7982014-09-18 14:06:55 +00002105defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002106 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002107 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2108
2109defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002110 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002111 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002112
2113defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002114 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002115 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002116
Robert Khasanovf70f7982014-09-18 14:06:55 +00002117defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002118 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002119 EVEX_CD8<32, CD8VF>;
2120
Robert Khasanovf70f7982014-09-18 14:06:55 +00002121defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002122 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002123 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002124
Craig Toppera88306e2017-10-10 06:36:46 +00002125// Transforms to swizzle an immediate to help matching memory operand in first
2126// operand.
2127def CommutePCMPCC : SDNodeXForm<imm, [{
2128 uint8_t Imm = N->getZExtValue() & 0x7;
2129 switch (Imm) {
2130 default: llvm_unreachable("Unreachable!");
2131 case 0x01: Imm = 0x06; break; // LT -> NLE
2132 case 0x02: Imm = 0x05; break; // LE -> NLT
2133 case 0x05: Imm = 0x02; break; // NLT -> LE
2134 case 0x06: Imm = 0x01; break; // NLE -> LT
2135 case 0x00: // EQ
2136 case 0x03: // FALSE
2137 case 0x04: // NE
2138 case 0x07: // TRUE
2139 break;
2140 }
2141 return getI8Imm(Imm, SDLoc(N));
2142}]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002143
Robert Khasanov29e3b962014-08-27 09:34:37 +00002144multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002145 OpndItins itins, X86VectorVTInfo _> {
Craig Topper149e6bd2016-09-09 01:36:10 +00002146 let isCommutable = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002147 def rri : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002148 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002149 !strconcat("vpcmp${cc}", Suffix,
2150 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002151 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2152 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002153 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002154 def rmi : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002155 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002156 !strconcat("vpcmp${cc}", Suffix,
2157 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002158 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2159 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002160 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002161 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper8b876762017-06-13 07:13:50 +00002162 let isCommutable = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002163 def rrik : AVX512AIi8<opc, MRMSrcReg,
2164 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002165 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002166 !strconcat("vpcmp${cc}", Suffix,
2167 "\t{$src2, $src1, $dst {${mask}}|",
2168 "$dst {${mask}}, $src1, $src2}"),
2169 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2170 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Craig Topper6e3a5822014-12-27 20:08:45 +00002171 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002172 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002173 def rmik : AVX512AIi8<opc, MRMSrcMem,
2174 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002175 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002176 !strconcat("vpcmp${cc}", Suffix,
2177 "\t{$src2, $src1, $dst {${mask}}|",
2178 "$dst {${mask}}, $src1, $src2}"),
2179 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2180 (OpNode (_.VT _.RC:$src1),
2181 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002182 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002183 itins.rm>, EVEX_4V, EVEX_K,
2184 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002185
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002186 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002187 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002188 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002189 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002190 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2191 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002192 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002193 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002194 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002195 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002196 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2197 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002198 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002199 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2200 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002201 u8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00002202 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002203 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2204 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002205 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002206 let mayLoad = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002207 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2208 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002209 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002210 !strconcat("vpcmp", Suffix,
2211 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2212 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002213 [], itins.rm>, EVEX_4V, EVEX_K,
2214 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002215 }
Craig Toppera88306e2017-10-10 06:36:46 +00002216
2217 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2218 (_.VT _.RC:$src1), imm:$cc),
2219 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2220 (CommutePCMPCC imm:$cc))>;
2221
2222 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2223 (_.VT _.RC:$src1), imm:$cc)),
2224 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2225 _.RC:$src1, addr:$src2,
2226 (CommutePCMPCC imm:$cc))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002227}
2228
Robert Khasanov29e3b962014-08-27 09:34:37 +00002229multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002230 OpndItins itins, X86VectorVTInfo _> :
2231 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002232 def rmib : AVX512AIi8<opc, MRMSrcMem,
2233 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002234 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002235 !strconcat("vpcmp${cc}", Suffix,
2236 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2237 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2238 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2239 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002240 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002241 itins.rm>, EVEX_4V, EVEX_B,
2242 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002243 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2244 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002245 _.ScalarMemOp:$src2, AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002246 !strconcat("vpcmp${cc}", Suffix,
2247 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2248 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2249 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2250 (OpNode (_.VT _.RC:$src1),
2251 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002252 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002253 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2254 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002255
Robert Khasanov29e3b962014-08-27 09:34:37 +00002256 // Accept explicit immediate argument form instead of comparison code.
Craig Topper9f4d4852015-01-20 12:15:30 +00002257 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002258 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2259 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002260 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002261 !strconcat("vpcmp", Suffix,
2262 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2263 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002264 [], itins.rm>, EVEX_4V, EVEX_B,
2265 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002266 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2267 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002268 _.ScalarMemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002269 !strconcat("vpcmp", Suffix,
2270 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2271 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002272 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2273 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002274 }
Craig Toppera88306e2017-10-10 06:36:46 +00002275
2276 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2277 (_.VT _.RC:$src1), imm:$cc),
2278 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2279 (CommutePCMPCC imm:$cc))>;
2280
2281 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2282 (_.ScalarLdFrag addr:$src2)),
2283 (_.VT _.RC:$src1), imm:$cc)),
2284 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2285 _.RC:$src1, addr:$src2,
2286 (CommutePCMPCC imm:$cc))>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002287}
2288
2289multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002290 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2291 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002292 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002293 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2294 EVEX_V512;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002295
2296 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002297 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2298 EVEX_V256;
2299 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2300 EVEX_V128;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002301 }
2302}
2303
2304multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002305 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2306 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002307 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002308 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002309 EVEX_V512;
2310
2311 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002312 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002313 EVEX_V256;
Simon Pilgrimaa911552017-12-05 12:14:36 +00002314 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002315 EVEX_V128;
2316 }
2317}
2318
Simon Pilgrimaa911552017-12-05 12:14:36 +00002319// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2320defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2321 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2322defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2323 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002324
Simon Pilgrimaa911552017-12-05 12:14:36 +00002325defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2326 avx512vl_i16_info, HasBWI>,
2327 VEX_W, EVEX_CD8<16, CD8VF>;
2328defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2329 avx512vl_i16_info, HasBWI>,
2330 VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002331
Simon Pilgrimaa911552017-12-05 12:14:36 +00002332defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2333 avx512vl_i32_info, HasAVX512>,
2334 EVEX_CD8<32, CD8VF>;
2335defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2336 avx512vl_i32_info, HasAVX512>,
2337 EVEX_CD8<32, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002338
Simon Pilgrimaa911552017-12-05 12:14:36 +00002339defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2340 avx512vl_i64_info, HasAVX512>,
2341 VEX_W, EVEX_CD8<64, CD8VF>;
2342defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2343 avx512vl_i64_info, HasAVX512>,
2344 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002345
Ayman Musa721d97f2017-06-27 12:08:37 +00002346
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002347multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002348 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2349 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2350 "vcmp${cc}"#_.Suffix,
2351 "$src2, $src1", "$src1, $src2",
2352 (X86cmpm (_.VT _.RC:$src1),
2353 (_.VT _.RC:$src2),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002354 imm:$cc), itins.rr, 1>,
2355 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002356
Craig Toppere1cac152016-06-07 07:27:54 +00002357 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2358 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2359 "vcmp${cc}"#_.Suffix,
2360 "$src2, $src1", "$src1, $src2",
2361 (X86cmpm (_.VT _.RC:$src1),
2362 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002363 imm:$cc), itins.rm>,
2364 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002365
Craig Toppere1cac152016-06-07 07:27:54 +00002366 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2367 (outs _.KRC:$dst),
2368 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2369 "vcmp${cc}"#_.Suffix,
2370 "${src2}"##_.BroadcastStr##", $src1",
2371 "$src1, ${src2}"##_.BroadcastStr,
2372 (X86cmpm (_.VT _.RC:$src1),
2373 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002374 imm:$cc), itins.rm>,
2375 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002376 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002377 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002378 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2379 (outs _.KRC:$dst),
2380 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2381 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002382 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2383 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002384
2385 let mayLoad = 1 in {
2386 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2387 (outs _.KRC:$dst),
2388 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2389 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002390 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2391 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002392
2393 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2394 (outs _.KRC:$dst),
2395 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2396 "vcmp"#_.Suffix,
2397 "$cc, ${src2}"##_.BroadcastStr##", $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002398 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2399 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002400 }
Craig Topper61956982017-09-30 17:02:39 +00002401 }
2402
2403 // Patterns for selecting with loads in other operand.
2404 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2405 CommutableCMPCC:$cc),
2406 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2407 imm:$cc)>;
2408
2409 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2410 (_.VT _.RC:$src1),
2411 CommutableCMPCC:$cc)),
2412 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2413 _.RC:$src1, addr:$src2,
2414 imm:$cc)>;
2415
2416 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2417 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2418 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2419 imm:$cc)>;
2420
2421 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2422 (_.ScalarLdFrag addr:$src2)),
2423 (_.VT _.RC:$src1),
2424 CommutableCMPCC:$cc)),
2425 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2426 _.RC:$src1, addr:$src2,
2427 imm:$cc)>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002428}
2429
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002430multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002431 // comparison code form (VCMP[EQ/LT/LE/...]
2432 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2433 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2434 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002435 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002436 (X86cmpmRnd (_.VT _.RC:$src1),
2437 (_.VT _.RC:$src2),
2438 imm:$cc,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002439 (i32 FROUND_NO_EXC)), itins.rr>,
2440 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002441
2442 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2443 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2444 (outs _.KRC:$dst),
2445 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2446 "vcmp"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002447 "$cc, {sae}, $src2, $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002448 "$src1, $src2, {sae}, $cc", itins.rr>,
2449 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002450 }
2451}
2452
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002453multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002454 let Predicates = [HasAVX512] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002455 defm Z : avx512_vcmp_common<itins, _.info512>,
2456 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002457
2458 }
2459 let Predicates = [HasAVX512,HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002460 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2461 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002462 }
2463}
2464
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002465defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002466 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002467defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002468 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002469
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00002470
Craig Topper61956982017-09-30 17:02:39 +00002471// Patterns to select fp compares with load as first operand.
2472let Predicates = [HasAVX512] in {
2473 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2474 CommutableCMPCC:$cc)),
2475 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2476
2477 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2478 CommutableCMPCC:$cc)),
2479 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2480}
2481
Asaf Badouh572bbce2015-09-20 08:46:07 +00002482// ----------------------------------------------------------------
2483// FPClass
Asaf Badouh696e8e02015-10-18 11:04:38 +00002484//handle fpclass instruction mask = op(reg_scalar,imm)
2485// op(mem_scalar,imm)
2486multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002487 OpndItins itins, X86VectorVTInfo _,
2488 Predicate prd> {
Craig Topper4a638432017-11-11 06:57:44 +00002489 let Predicates = [prd], ExeDomain = _.ExeDomain in {
Craig Topper702097d2017-08-20 18:30:24 +00002490 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
Asaf Badouh696e8e02015-10-18 11:04:38 +00002491 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002492 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh696e8e02015-10-18 11:04:38 +00002493 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002494 (i32 imm:$src2)))], itins.rr>,
2495 Sched<[itins.Sched]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002496 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2497 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2498 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002499 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002500 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002501 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002502 (i32 imm:$src2))))], itins.rr>,
2503 EVEX_K, Sched<[itins.Sched]>;
Craig Topper63801df2017-02-19 21:44:35 +00002504 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002505 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002506 OpcodeStr##_.Suffix##
2507 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2508 [(set _.KRC:$dst,
Craig Topperca8abed2017-11-13 06:46:48 +00002509 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002510 (i32 imm:$src2)))], itins.rm>,
2511 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper63801df2017-02-19 21:44:35 +00002512 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002513 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002514 OpcodeStr##_.Suffix##
2515 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2516 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Craig Topperca8abed2017-11-13 06:46:48 +00002517 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002518 (i32 imm:$src2))))], itins.rm>,
2519 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002520 }
2521}
2522
Asaf Badouh572bbce2015-09-20 08:46:07 +00002523//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2524// fpclass(reg_vec, mem_vec, imm)
2525// fpclass(reg_vec, broadcast(eltVt), imm)
2526multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002527 OpndItins itins, X86VectorVTInfo _,
2528 string mem, string broadcast>{
Craig Topper4a638432017-11-11 06:57:44 +00002529 let ExeDomain = _.ExeDomain in {
Asaf Badouh572bbce2015-09-20 08:46:07 +00002530 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2531 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002532 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh572bbce2015-09-20 08:46:07 +00002533 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002534 (i32 imm:$src2)))], itins.rr>,
2535 Sched<[itins.Sched]>;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002536 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2537 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2538 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002539 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002540 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Asaf Badouh572bbce2015-09-20 08:46:07 +00002541 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002542 (i32 imm:$src2))))], itins.rr>,
2543 EVEX_K, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002544 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2545 (ins _.MemOp:$src1, i32u8imm:$src2),
2546 OpcodeStr##_.Suffix##mem#
2547 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002548 [(set _.KRC:$dst,(OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002549 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002550 (i32 imm:$src2)))], itins.rm>,
2551 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002552 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2553 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2554 OpcodeStr##_.Suffix##mem#
2555 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002556 [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002557 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002558 (i32 imm:$src2))))], itins.rm>,
2559 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002560 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2561 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2562 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2563 _.BroadcastStr##", $dst|$dst, ${src1}"
2564 ##_.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002565 [(set _.KRC:$dst,(OpNode
2566 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002567 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002568 (i32 imm:$src2)))], itins.rm>,
2569 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002570 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2571 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2572 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2573 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2574 _.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002575 [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2576 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002577 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002578 (i32 imm:$src2))))], itins.rm>,
2579 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper4a638432017-11-11 06:57:44 +00002580 }
Asaf Badouh572bbce2015-09-20 08:46:07 +00002581}
2582
Simon Pilgrim54c60832017-12-01 16:51:48 +00002583multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2584 bits<8> opc, SDNode OpNode,
2585 OpndItins itins, Predicate prd,
2586 string broadcast>{
Asaf Badouh572bbce2015-09-20 08:46:07 +00002587 let Predicates = [prd] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002588 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2589 _.info512, "{z}", broadcast>, EVEX_V512;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002590 }
2591 let Predicates = [prd, HasVLX] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002592 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2593 _.info128, "{x}", broadcast>, EVEX_V128;
2594 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2595 _.info256, "{y}", broadcast>, EVEX_V256;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002596 }
2597}
2598
Simon Pilgrim54c60832017-12-01 16:51:48 +00002599// FIXME: Is there a better scheduler itinerary for VFPCLASS?
Asaf Badouh572bbce2015-09-20 08:46:07 +00002600multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002601 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002602 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002603 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2604 EVEX_CD8<32, CD8VF>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002605 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002606 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2607 EVEX_CD8<64, CD8VF> , VEX_W;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002608 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002609 SSE_ALU_F32S, f32x_info, prd>,
2610 EVEX_CD8<32, CD8VT1>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002611 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002612 SSE_ALU_F64S, f64x_info, prd>,
2613 EVEX_CD8<64, CD8VT1>, VEX_W;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002614}
2615
Asaf Badouh696e8e02015-10-18 11:04:38 +00002616defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2617 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002618
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002619//-----------------------------------------------------------------
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002620// Mask register copy, including
2621// - copy between mask registers
2622// - load/store mask registers
2623// - copy from GPR to mask register and vice versa
2624//
2625multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2626 string OpcodeStr, RegisterClass KRC,
Elena Demikhovskyba846722015-02-17 09:20:12 +00002627 ValueType vvt, X86MemOperand x86memop> {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002628 let hasSideEffects = 0, SchedRW = [WriteMove] in
Craig Toppere1cac152016-06-07 07:27:54 +00002629 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002630 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2631 IIC_SSE_MOVDQ>;
Craig Toppere1cac152016-06-07 07:27:54 +00002632 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2633 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002634 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
Craig Toppere1cac152016-06-07 07:27:54 +00002635 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2636 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002637 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002638}
2639
2640multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2641 string OpcodeStr,
2642 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002643 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002644 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002645 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2646 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002647 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002648 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2649 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002650 }
2651}
2652
Robert Khasanov74acbb72014-07-23 14:49:42 +00002653let Predicates = [HasDQI] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002654 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002655 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2656 VEX, PD;
2657
2658let Predicates = [HasAVX512] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002659 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002660 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002661 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002662
2663let Predicates = [HasBWI] in {
Elena Demikhovskyba846722015-02-17 09:20:12 +00002664 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2665 VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002666 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2667 VEX, XD;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002668 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2669 VEX, PS, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002670 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2671 VEX, XD, VEX_W;
2672}
2673
2674// GR from/to mask register
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002675def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002676 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002677def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002678 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002679
2680def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002681 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002682def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002683 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002684
2685def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002686 (KMOVWrk VK16:$src)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002687def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002688 (COPY_TO_REGCLASS VK16:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002689
2690def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002691 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002692def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002693 (COPY_TO_REGCLASS VK8:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002694
2695def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2696 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2697def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2698 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2699def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2700 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2701def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2702 (COPY_TO_REGCLASS VK64:$src, GR64)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002703
Robert Khasanov74acbb72014-07-23 14:49:42 +00002704// Load/store kreg
2705let Predicates = [HasDQI] in {
Elena Demikhovsky9f83c732015-09-02 09:20:58 +00002706 def : Pat<(store VK4:$src, addr:$dst),
2707 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2708 def : Pat<(store VK2:$src, addr:$dst),
2709 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002710 def : Pat<(store VK1:$src, addr:$dst),
2711 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002712
2713 def : Pat<(v2i1 (load addr:$src)),
2714 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2715 def : Pat<(v4i1 (load addr:$src)),
2716 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002717}
2718let Predicates = [HasAVX512, NoDQI] in {
Igor Bregerd6c187b2016-01-27 08:43:25 +00002719 def : Pat<(store VK1:$src, addr:$dst),
2720 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002721 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2722 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002723 def : Pat<(store VK2:$src, addr:$dst),
2724 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002725 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2726 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002727 def : Pat<(store VK4:$src, addr:$dst),
2728 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002729 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2730 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002731 def : Pat<(store VK8:$src, addr:$dst),
2732 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002733 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2734 sub_8bit)))>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002735
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002736 def : Pat<(v8i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002737 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002738 def : Pat<(v2i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002739 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002740 def : Pat<(v4i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002741 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002742}
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002743
Robert Khasanov74acbb72014-07-23 14:49:42 +00002744let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002745 def : Pat<(v1i1 (load addr:$src)),
Craig Toppera362dee2017-12-31 07:38:33 +00002746 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
Craig Topper876ec0b2017-12-31 07:38:41 +00002747 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2748 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002749}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00002750
Robert Khasanov74acbb72014-07-23 14:49:42 +00002751let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002752 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2753 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2754 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002755
Craig Topperee1e71e2017-12-17 01:35:48 +00002756 def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
Guy Blank548e22a2017-05-19 12:35:15 +00002757 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002758
Guy Blank548e22a2017-05-19 12:35:15 +00002759 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2760 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
Guy Blank548e22a2017-05-19 12:35:15 +00002761 }
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002762
Guy Blank548e22a2017-05-19 12:35:15 +00002763 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2764 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2765 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2766 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2767 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2768 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2769 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
Elena Demikhovskyb906df92016-09-13 07:57:00 +00002770
Craig Topper26a701f2018-01-23 05:36:53 +00002771 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2772 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
Guy Blank548e22a2017-05-19 12:35:15 +00002773 (COPY_TO_REGCLASS
Craig Topper26a701f2018-01-23 05:36:53 +00002774 (KMOVWkr (AND32ri8
2775 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2776 (i32 1))), VK16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002777}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002778
2779// Mask unary operation
2780// - KNOT
2781multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002782 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002783 OpndItins itins, Predicate prd> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002784 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002785 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00002786 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002787 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2788 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002789}
2790
Robert Khasanov74acbb72014-07-23 14:49:42 +00002791multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002792 SDPatternOperator OpNode, OpndItins itins> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002793 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002794 itins, HasDQI>, VEX, PD;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002795 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002796 itins, HasAVX512>, VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002797 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002798 itins, HasBWI>, VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002799 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002800 itins, HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002801}
2802
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002803defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002804
Robert Khasanov74acbb72014-07-23 14:49:42 +00002805// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
Craig Topper7b9cc142016-11-03 06:04:28 +00002806let Predicates = [HasAVX512, NoDQI] in
2807def : Pat<(vnot VK8:$src),
2808 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2809
2810def : Pat<(vnot VK4:$src),
2811 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2812def : Pat<(vnot VK2:$src),
2813 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002814
2815// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00002816// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002817multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00002818 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002819 OpndItins itins, Predicate prd, bit IsCommutable> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002820 let Predicates = [prd], isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002821 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2822 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002823 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002824 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2825 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002826}
2827
Robert Khasanov595683d2014-07-28 13:46:45 +00002828multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002829 SDPatternOperator OpNode, OpndItins itins,
2830 bit IsCommutable, Predicate prdW = HasAVX512> {
Robert Khasanov595683d2014-07-28 13:46:45 +00002831 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002832 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002833 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002834 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
Robert Khasanov595683d2014-07-28 13:46:45 +00002835 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002836 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002837 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002838 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002839}
2840
2841def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2842def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002843// These nodes use 'vnot' instead of 'not' to support vectors.
2844def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2845def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002846
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002847defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2848defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2849defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2850defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2851defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
2852defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>;
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00002853
Craig Topper7b9cc142016-11-03 06:04:28 +00002854multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2855 Instruction Inst> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002856 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2857 // for the DQI set, this type is legal and KxxxB instruction is used
2858 let Predicates = [NoDQI] in
Craig Topper7b9cc142016-11-03 06:04:28 +00002859 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002860 (COPY_TO_REGCLASS
2861 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2862 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2863
2864 // All types smaller than 8 bits require conversion anyway
2865 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2866 (COPY_TO_REGCLASS (Inst
2867 (COPY_TO_REGCLASS VK1:$src1, VK16),
2868 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002869 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002870 (COPY_TO_REGCLASS (Inst
2871 (COPY_TO_REGCLASS VK2:$src1, VK16),
2872 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002873 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002874 (COPY_TO_REGCLASS (Inst
2875 (COPY_TO_REGCLASS VK4:$src1, VK16),
2876 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002877}
2878
Craig Topper7b9cc142016-11-03 06:04:28 +00002879defm : avx512_binop_pat<and, and, KANDWrr>;
2880defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2881defm : avx512_binop_pat<or, or, KORWrr>;
2882defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2883defm : avx512_binop_pat<xor, xor, KXORWrr>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002884
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002885// Mask unpacking
Igor Bregera54a1a82015-09-08 13:10:00 +00002886multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002887 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
Igor Bregera54a1a82015-09-08 13:10:00 +00002888 let Predicates = [prd] in {
Craig Topperad2ce362016-01-05 07:44:08 +00002889 let hasSideEffects = 0 in
Igor Bregera54a1a82015-09-08 13:10:00 +00002890 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2891 (ins KRC:$src1, KRC:$src2),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002892 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2893 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
Igor Bregera54a1a82015-09-08 13:10:00 +00002894
2895 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2896 (!cast<Instruction>(NAME##rr)
2897 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2898 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2899 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002900}
2901
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002902defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2903defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2904defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002905
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002906// Mask bit testing
2907multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002908 SDNode OpNode, OpndItins itins, Predicate prd> {
Igor Breger5ea0a6812015-08-31 13:30:19 +00002909 let Predicates = [prd], Defs = [EFLAGS] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002910 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Craig Topperedb09112014-11-25 20:11:23 +00002911 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002912 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2913 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002914}
2915
Igor Breger5ea0a6812015-08-31 13:30:19 +00002916multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002917 OpndItins itins, Predicate prdW = HasAVX512> {
2918 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002919 VEX, PD;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002920 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002921 VEX, PS;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002922 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002923 VEX, PS, VEX_W;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002924 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00002925 VEX, PD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002926}
2927
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002928defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
2929defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002930
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002931// Mask shift
2932multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002933 SDNode OpNode, OpndItins itins> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002934 let Predicates = [HasAVX512] in
Craig Topper7ff6ab32015-01-21 08:43:49 +00002935 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002936 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002937 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002938 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
2939 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002940}
2941
2942multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002943 SDNode OpNode, OpndItins itins> {
2944 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2945 itins>, VEX, TAPD, VEX_W;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00002946 let Predicates = [HasDQI] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002947 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2948 itins>, VEX, TAPD;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00002949 let Predicates = [HasBWI] in {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002950 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2951 itins>, VEX, TAPD, VEX_W;
2952 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2953 itins>, VEX, TAPD;
Michael Liao66233b72015-08-06 09:06:20 +00002954 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002955}
2956
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002957defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
2958defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002959
Craig Topper513d3fa2018-01-27 20:19:02 +00002960multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00002961 X86VectorVTInfo Narrow,
2962 X86VectorVTInfo Wide> {
Craig Topper5e4b4532018-01-27 23:49:14 +00002963 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00002964 (Narrow.VT Narrow.RC:$src2))),
2965 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00002966 (!cast<Instruction>(InstStr#"Zrr")
Craig Topperd58c1652018-01-07 18:20:37 +00002967 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2968 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
2969 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002970
Craig Topper5e4b4532018-01-27 23:49:14 +00002971 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2972 (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00002973 (Narrow.VT Narrow.RC:$src2)))),
Craig Toppereb5c4112017-09-24 05:24:52 +00002974 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00002975 (!cast<Instruction>(InstStr#"Zrrk")
Craig Topperd58c1652018-01-07 18:20:37 +00002976 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2977 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2978 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
2979 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002980}
2981
2982multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00002983 X86VectorVTInfo Narrow,
2984 X86VectorVTInfo Wide> {
2985def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
2986 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
2987 (COPY_TO_REGCLASS
2988 (!cast<Instruction>(InstStr##Zrri)
2989 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2990 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
2991 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00002992
Craig Topperd58c1652018-01-07 18:20:37 +00002993def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
2994 (OpNode (Narrow.VT Narrow.RC:$src1),
2995 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
2996 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
2997 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
2998 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
2999 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3000 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003001}
3002
3003let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003004 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3005 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v8i32x_info, v16i32_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003006
Craig Topperd58c1652018-01-07 18:20:37 +00003007 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3008 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v4i32x_info, v16i32_info>;
3009
3010 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3011 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3012
3013 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3014 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3015
3016 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3017 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3018 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v8i32x_info, v16i32_info>;
3019
3020 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3021 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3022 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v4i32x_info, v16i32_info>;
3023
3024 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3025 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3026 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v4i64x_info, v8i64_info>;
3027
3028 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3029 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3030 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v2i64x_info, v8i64_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003031}
3032
Craig Toppera2018e792018-01-08 06:53:52 +00003033let Predicates = [HasBWI, NoVLX] in {
3034 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3035 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v32i8x_info, v64i8_info>;
3036
3037 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3038 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v16i8x_info, v64i8_info>;
3039
3040 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3041 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v16i16x_info, v32i16_info>;
3042
3043 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3044 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v8i16x_info, v32i16_info>;
3045
3046 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3047 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
3048
3049 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3050 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v16i8x_info, v64i8_info>;
3051
3052 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3053 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v16i16x_info, v32i16_info>;
3054
3055 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3056 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v8i16x_info, v32i16_info>;
3057}
3058
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003059// Mask setting all 0s or 1s
3060multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3061 let Predicates = [HasAVX512] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003062 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3063 SchedRW = [WriteZero] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003064 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3065 [(set KRC:$dst, (VT Val))]>;
3066}
3067
3068multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003069 defm W : avx512_mask_setop<VK16, v16i1, Val>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003070 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3071 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003072}
3073
3074defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3075defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3076
3077// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3078let Predicates = [HasAVX512] in {
3079 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
Igor Breger86724082016-08-14 05:25:07 +00003080 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3081 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003082 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003083 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003084 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3085 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003086 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003087}
Igor Bregerf1bd7612016-03-06 07:46:03 +00003088
3089// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3090multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3091 RegisterClass RC, ValueType VT> {
3092 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3093 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003094
Igor Bregerf1bd7612016-03-06 07:46:03 +00003095 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003096 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003097}
Guy Blank548e22a2017-05-19 12:35:15 +00003098defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3099defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3100defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3101defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3102defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3103defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003104
3105defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3106defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3107defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3108defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3109defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3110
3111defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3112defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3113defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3114defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3115
3116defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3117defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3118defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3119
3120defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3121defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3122
3123defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003124
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003125//===----------------------------------------------------------------------===//
3126// AVX-512 - Aligned and unaligned load and store
3127//
3128
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003129
Simon Pilgrimdf052512017-12-06 17:59:26 +00003130multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3131 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3132 bit NoRMPattern = 0,
3133 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003134 let hasSideEffects = 0 in {
3135 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003136 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003137 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003138 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3139 (ins _.KRCWM:$mask, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003140 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
Simon Pilgrim18bcf932016-02-03 09:41:59 +00003141 "${dst} {${mask}} {z}, $src}"),
Craig Topper5c46c752017-01-08 05:46:21 +00003142 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
Igor Breger7a000f52016-01-21 14:18:11 +00003143 (_.VT _.RC:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003144 _.ImmAllZerosV)))], _.ExeDomain,
3145 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003146
Simon Pilgrimdf052512017-12-06 17:59:26 +00003147 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003148 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003149 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Toppercb0e7492017-07-31 17:35:44 +00003150 !if(NoRMPattern, [],
3151 [(set _.RC:$dst,
3152 (_.VT (bitconvert (ld_frag addr:$src))))]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003153 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003154
Craig Topper63e2cd62017-01-14 07:50:52 +00003155 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003156 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3157 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3158 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3159 "${dst} {${mask}}, $src1}"),
3160 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3161 (_.VT _.RC:$src1),
3162 (_.VT _.RC:$src0))))], _.ExeDomain,
3163 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003164 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3165 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003166 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3167 "${dst} {${mask}}, $src1}"),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003168 [(set _.RC:$dst, (_.VT
3169 (vselect _.KRCWM:$mask,
3170 (_.VT (bitconvert (ld_frag addr:$src1))),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003171 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3172 EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003173 }
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003174 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3175 (ins _.KRCWM:$mask, _.MemOp:$src),
3176 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3177 "${dst} {${mask}} {z}, $src}",
3178 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3179 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003180 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003181 }
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003182 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3183 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3184
3185 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3186 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3187
3188 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3189 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3190 _.KRCWM:$mask, addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003191}
3192
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003193multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3194 AVX512VLVectorVTInfo _,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003195 Predicate prd,
3196 bit NoRMPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003197 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003198 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003199 _.info512.AlignedLdFrag, masked_load_aligned512,
3200 NoRMPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003201
3202 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003203 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003204 _.info256.AlignedLdFrag, masked_load_aligned256,
3205 NoRMPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003206 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003207 _.info128.AlignedLdFrag, masked_load_aligned128,
3208 NoRMPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003209 }
3210}
3211
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003212multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3213 AVX512VLVectorVTInfo _,
3214 Predicate prd,
Craig Toppercb0e7492017-07-31 17:35:44 +00003215 bit NoRMPattern = 0,
Craig Topperc9293492016-02-26 06:50:29 +00003216 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003217 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003218 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003219 masked_load_unaligned, NoRMPattern,
3220 SelectOprr>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003221
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003222 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003223 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003224 masked_load_unaligned, NoRMPattern,
3225 SelectOprr>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003226 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003227 masked_load_unaligned, NoRMPattern,
3228 SelectOprr>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003229 }
3230}
3231
Simon Pilgrimdf052512017-12-06 17:59:26 +00003232multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3233 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3234 string Name, bit NoMRPattern = 0> {
Craig Topper99f6b622016-05-01 01:03:56 +00003235 let hasSideEffects = 0 in {
Igor Breger81b79de2015-11-19 07:43:43 +00003236 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3237 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003238 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3239 Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003240 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3241 (ins _.KRCWM:$mask, _.RC:$src),
3242 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3243 "${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003244 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3245 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003246 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003247 (ins _.KRCWM:$mask, _.RC:$src),
Igor Breger81b79de2015-11-19 07:43:43 +00003248 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003249 "${dst} {${mask}} {z}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003250 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3251 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
Craig Topper99f6b622016-05-01 01:03:56 +00003252 }
Igor Breger81b79de2015-11-19 07:43:43 +00003253
Craig Topper2462a712017-08-01 15:31:24 +00003254 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003255 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003256 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topper2462a712017-08-01 15:31:24 +00003257 !if(NoMRPattern, [],
3258 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003259 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003260 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003261 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3262 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003263 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003264
3265 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3266 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3267 _.KRCWM:$mask, _.RC:$src)>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003268}
3269
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003270
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003271multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003272 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper2462a712017-08-01 15:31:24 +00003273 string Name, bit NoMRPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003274 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003275 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
Craig Topper2462a712017-08-01 15:31:24 +00003276 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003277
3278 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003279 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
Craig Topper2462a712017-08-01 15:31:24 +00003280 masked_store_unaligned, Name#Z256,
3281 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003282 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
Craig Topper2462a712017-08-01 15:31:24 +00003283 masked_store_unaligned, Name#Z128,
3284 NoMRPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003285 }
3286}
3287
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003288multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003289 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper83b0a982018-01-18 07:44:09 +00003290 string Name, bit NoMRPattern = 0> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003291 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003292 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003293 masked_store_aligned512, Name#Z,
3294 NoMRPattern>, EVEX_V512;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003295
3296 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003297 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003298 masked_store_aligned256, Name#Z256,
3299 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003300 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003301 masked_store_aligned128, Name#Z128,
3302 NoMRPattern>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003303 }
3304}
3305
3306defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3307 HasAVX512>,
3308 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003309 HasAVX512, "VMOVAPS">,
3310 PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003311
3312defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3313 HasAVX512>,
3314 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003315 HasAVX512, "VMOVAPD">,
3316 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003317
Craig Topperc9293492016-02-26 06:50:29 +00003318defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003319 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003320 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3321 "VMOVUPS">,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003322 PS, EVEX_CD8<32, CD8VF>;
3323
Craig Topper4e7b8882016-10-03 02:00:29 +00003324defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003325 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003326 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3327 "VMOVUPD">,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003328 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003329
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003330defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003331 HasAVX512, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003332 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003333 HasAVX512, "VMOVDQA32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003334 PD, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003335
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003336defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3337 HasAVX512>,
3338 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003339 HasAVX512, "VMOVDQA64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003340 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003341
Craig Toppercb0e7492017-07-31 17:35:44 +00003342defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003343 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
Craig Topper2462a712017-08-01 15:31:24 +00003344 HasBWI, "VMOVDQU8", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003345 XD, EVEX_CD8<8, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003346
Craig Toppercb0e7492017-07-31 17:35:44 +00003347defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003348 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
Craig Topper2462a712017-08-01 15:31:24 +00003349 HasBWI, "VMOVDQU16", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003350 XD, VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003351
Craig Topperc9293492016-02-26 06:50:29 +00003352defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003353 1, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003354 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003355 HasAVX512, "VMOVDQU32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003356 XS, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003357
Craig Topperc9293492016-02-26 06:50:29 +00003358defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003359 0, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003360 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003361 HasAVX512, "VMOVDQU64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003362 XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00003363
Craig Topperd875d6b2016-09-29 06:07:09 +00003364// Special instructions to help with spilling when we don't have VLX. We need
3365// to load or store from a ZMM register instead. These are converted in
3366// expandPostRAPseudos.
Craig Toppereab23d32016-10-03 02:22:33 +00003367let isReMaterializable = 1, canFoldAsLoad = 1,
Craig Topperd875d6b2016-09-29 06:07:09 +00003368 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3369def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003370 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003371def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003372 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003373def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003374 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003375def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003376 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003377}
3378
Simon Pilgrimdf052512017-12-06 17:59:26 +00003379let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
Craig Topperf3e671e2016-09-30 05:35:47 +00003380def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003381 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003382def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003383 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003384def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003385 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003386def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003387 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003388}
3389
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003390def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003391 (v8i64 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003392 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003393 VK8), VR512:$src)>;
3394
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003395def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003396 (v16i32 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003397 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyf1de34b2014-12-04 09:40:44 +00003398
Craig Topper33c550c2016-05-22 00:39:30 +00003399// These patterns exist to prevent the above patterns from introducing a second
3400// mask inversion when one already exists.
3401def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3402 (bc_v8i64 (v16i32 immAllZerosV)),
3403 (v8i64 VR512:$src))),
3404 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3405def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3406 (v16i32 immAllZerosV),
3407 (v16i32 VR512:$src))),
3408 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3409
Craig Topperfc3ce492018-01-01 01:11:29 +00003410multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3411 X86VectorVTInfo Wide> {
3412 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3413 Narrow.RC:$src1, Narrow.RC:$src0)),
3414 (EXTRACT_SUBREG
3415 (Wide.VT
3416 (!cast<Instruction>(InstrStr#"rrk")
3417 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3418 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3419 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3420 Narrow.SubRegIdx)>;
3421
3422 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3423 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3424 (EXTRACT_SUBREG
3425 (Wide.VT
3426 (!cast<Instruction>(InstrStr#"rrkz")
3427 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3428 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3429 Narrow.SubRegIdx)>;
3430}
3431
Craig Topper96ab6fd2017-01-09 04:19:34 +00003432// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3433// available. Use a 512-bit operation and extract.
3434let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003435 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3436 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
Craig Topperfc3ce492018-01-01 01:11:29 +00003437 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3438 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003439
3440 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3441 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3442 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3443 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
Craig Topper96ab6fd2017-01-09 04:19:34 +00003444}
3445
Craig Toppere9fc0cd2018-01-14 02:05:51 +00003446let Predicates = [HasBWI, NoVLX] in {
3447 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3448 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3449
3450 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3451 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3452}
3453
Craig Topper2462a712017-08-01 15:31:24 +00003454let Predicates = [HasAVX512] in {
3455 // 512-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003456 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3457 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003458 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003459 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003460 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003461 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3462 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3463 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003464 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003465 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003466 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003467 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003468}
3469
3470let Predicates = [HasVLX] in {
3471 // 128-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003472 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3473 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003474 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003475 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003476 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003477 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3478 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3479 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003480 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003481 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003482 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003483 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper14aa2662016-08-11 06:04:04 +00003484
Craig Topper2462a712017-08-01 15:31:24 +00003485 // 256-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003486 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3487 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003488 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003489 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003490 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003491 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3492 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3493 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003494 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003495 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003496 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003497 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper95bdabd2016-05-22 23:44:33 +00003498}
3499
Craig Topper80075a52017-08-27 19:03:36 +00003500multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3501 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3502 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3503 (bitconvert
3504 (To.VT (extract_subvector
3505 (From.VT From.RC:$src), (iPTR 0)))),
3506 To.RC:$src0)),
3507 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3508 Cast.RC:$src0, Cast.KRCWM:$mask,
3509 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3510
3511 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3512 (bitconvert
3513 (To.VT (extract_subvector
3514 (From.VT From.RC:$src), (iPTR 0)))),
3515 Cast.ImmAllZerosV)),
3516 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3517 Cast.KRCWM:$mask,
3518 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3519}
3520
3521
Craig Topperd27386a2017-08-25 23:34:59 +00003522let Predicates = [HasVLX] in {
3523// A masked extract from the first 128-bits of a 256-bit vector can be
3524// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003525defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3526defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3527defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3528defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3529defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3530defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3531defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3532defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3533defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3534defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3535defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3536defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003537
3538// A masked extract from the first 128-bits of a 512-bit vector can be
3539// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003540defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3541defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3542defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3543defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3544defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3545defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3546defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3547defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3548defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3549defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3550defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3551defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003552
3553// A masked extract from the first 256-bits of a 512-bit vector can be
3554// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003555defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3556defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3557defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3558defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3559defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3560defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3561defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3562defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3563defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3564defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3565defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3566defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003567}
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003568
3569// Move Int Doubleword to Packed Double Int
3570//
3571let ExeDomain = SSEPackedInt in {
3572def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3573 "vmovd\t{$src, $dst|$dst, $src}",
3574 [(set VR128X:$dst,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003575 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003576 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003577def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003578 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003579 [(set VR128X:$dst,
3580 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003581 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003582def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003583 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003584 [(set VR128X:$dst,
3585 (v2i64 (scalar_to_vector GR64:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003586 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperc648c9b2015-12-28 06:11:42 +00003587let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3588def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3589 (ins i64mem:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003590 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3591 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00003592let isCodeGenOnly = 1 in {
Craig Topperaf88afb2015-12-28 06:11:45 +00003593def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003594 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003595 [(set FR64X:$dst, (bitconvert GR64:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003596 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper5971b542017-02-12 18:47:44 +00003597def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3598 "vmovq\t{$src, $dst|$dst, $src}",
3599 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003600 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003601def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003602 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003603 [(set GR64:$dst, (bitconvert FR64X:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003604 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003605def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003606 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003607 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003608 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3609 EVEX_CD8<64, CD8VT1>;
3610}
3611} // ExeDomain = SSEPackedInt
3612
3613// Move Int Doubleword to Single Scalar
3614//
3615let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3616def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3617 "vmovd\t{$src, $dst|$dst, $src}",
3618 [(set FR32X:$dst, (bitconvert GR32:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003619 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003620
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003621def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003622 "vmovd\t{$src, $dst|$dst, $src}",
3623 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003624 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003625} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3626
3627// Move doubleword from xmm register to r/m32
3628//
3629let ExeDomain = SSEPackedInt in {
3630def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3631 "vmovd\t{$src, $dst|$dst, $src}",
3632 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003633 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003634 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003635def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003636 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003637 "vmovd\t{$src, $dst|$dst, $src}",
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003638 [(store (i32 (extractelt (v4i32 VR128X:$src),
3639 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003640 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003641} // ExeDomain = SSEPackedInt
3642
3643// Move quadword from xmm1 register to r/m64
3644//
3645let ExeDomain = SSEPackedInt in {
3646def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3647 "vmovq\t{$src, $dst|$dst, $src}",
3648 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003649 (iPTR 0)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003650 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003651 Requires<[HasAVX512, In64BitMode]>;
3652
Craig Topperc648c9b2015-12-28 06:11:42 +00003653let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3654def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3655 "vmovq\t{$src, $dst|$dst, $src}",
Simon Pilgrim75673942017-12-06 11:23:13 +00003656 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003657 Requires<[HasAVX512, In64BitMode]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003658
Craig Topperc648c9b2015-12-28 06:11:42 +00003659def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3660 (ins i64mem:$dst, VR128X:$src),
3661 "vmovq\t{$src, $dst|$dst, $src}",
3662 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3663 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topper401675c2015-12-28 06:32:47 +00003664 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003665 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3666
3667let hasSideEffects = 0 in
3668def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003669 (ins VR128X:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003670 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3671 EVEX, VEX_W, Sched<[WriteMove]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003672} // ExeDomain = SSEPackedInt
3673
3674// Move Scalar Single to Double Int
3675//
3676let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3677def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3678 (ins FR32X:$src),
3679 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003680 [(set GR32:$dst, (bitconvert FR32X:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003681 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003682def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003683 (ins i32mem:$dst, FR32X:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003684 "vmovd\t{$src, $dst|$dst, $src}",
3685 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
Simon Pilgrim75673942017-12-06 11:23:13 +00003686 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003687} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3688
3689// Move Quadword Int to Packed Quadword Int
3690//
3691let ExeDomain = SSEPackedInt in {
3692def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3693 (ins i64mem:$src),
3694 "vmovq\t{$src, $dst|$dst, $src}",
3695 [(set VR128X:$dst,
3696 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003697 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003698} // ExeDomain = SSEPackedInt
3699
Craig Topper29476ab2018-01-05 21:57:23 +00003700// Allow "vmovd" but print "vmovq".
3701def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3702 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3703def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3704 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3705
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003706//===----------------------------------------------------------------------===//
3707// AVX-512 MOVSS, MOVSD
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003708//===----------------------------------------------------------------------===//
3709
Craig Topperc7de3a12016-07-29 02:49:08 +00003710multiclass avx512_move_scalar<string asm, SDNode OpNode,
Asaf Badouh41ecf462015-12-06 13:26:56 +00003711 X86VectorVTInfo _> {
Craig Topperc7de3a12016-07-29 02:49:08 +00003712 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003713 (ins _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003714 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Craig Topper6fb55712017-10-04 17:20:12 +00003715 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003716 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003717 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003718 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003719 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3720 "$dst {${mask}} {z}, $src1, $src2}"),
3721 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003722 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003723 _.ImmAllZerosV)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003724 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003725 let Constraints = "$src0 = $dst" in
3726 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003727 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003728 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3729 "$dst {${mask}}, $src1, $src2}"),
3730 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003731 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003732 (_.VT _.RC:$src0))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003733 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
Craig Toppere4f868e2016-07-29 06:06:04 +00003734 let canFoldAsLoad = 1, isReMaterializable = 1 in
Craig Topperc7de3a12016-07-29 02:49:08 +00003735 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3736 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3737 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003738 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003739 let mayLoad = 1, hasSideEffects = 0 in {
3740 let Constraints = "$src0 = $dst" in
3741 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3742 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3743 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3744 "$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003745 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003746 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3747 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3748 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3749 "$dst {${mask}} {z}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003750 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Asaf Badouh41ecf462015-12-06 13:26:56 +00003751 }
Craig Toppere1cac152016-06-07 07:27:54 +00003752 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3753 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3754 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003755 EVEX, Sched<[WriteStore]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003756 let mayStore = 1, hasSideEffects = 0 in
Craig Toppere1cac152016-06-07 07:27:54 +00003757 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3758 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3759 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003760 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003761}
3762
Asaf Badouh41ecf462015-12-06 13:26:56 +00003763defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3764 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003765
Asaf Badouh41ecf462015-12-06 13:26:56 +00003766defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3767 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003768
Ayman Musa46af8f92016-11-13 14:29:32 +00003769
3770multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3771 PatLeaf ZeroFP, X86VectorVTInfo _> {
3772
3773def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003774 (_.VT (scalar_to_vector
Guy Blank548e22a2017-05-19 12:35:15 +00003775 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003776 (_.EltVT _.FRC:$src1),
3777 (_.EltVT _.FRC:$src2))))))),
Craig Topper00230602017-10-01 23:53:50 +00003778 (!cast<Instruction>(InstrStr#rrk)
3779 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3780 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003781 (_.VT _.RC:$src0),
3782 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003783
3784def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003785 (_.VT (scalar_to_vector
Guy Blank548e22a2017-05-19 12:35:15 +00003786 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003787 (_.EltVT _.FRC:$src1),
3788 (_.EltVT ZeroFP))))))),
Craig Topper00230602017-10-01 23:53:50 +00003789 (!cast<Instruction>(InstrStr#rrkz)
3790 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003791 (_.VT _.RC:$src0),
3792 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003793}
3794
3795multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3796 dag Mask, RegisterClass MaskRC> {
3797
3798def : Pat<(masked_store addr:$dst, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003799 (_.info512.VT (insert_subvector undef,
Ayman Musa46af8f92016-11-13 14:29:32 +00003800 (_.info256.VT (insert_subvector undef,
3801 (_.info128.VT _.info128.RC:$src),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003802 (iPTR 0))),
3803 (iPTR 0)))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003804 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003805 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003806 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003807
3808}
3809
Craig Topper058f2f62017-03-28 16:35:29 +00003810multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3811 AVX512VLVectorVTInfo _,
3812 dag Mask, RegisterClass MaskRC,
3813 SubRegIndex subreg> {
3814
3815def : Pat<(masked_store addr:$dst, Mask,
3816 (_.info512.VT (insert_subvector undef,
3817 (_.info256.VT (insert_subvector undef,
3818 (_.info128.VT _.info128.RC:$src),
3819 (iPTR 0))),
3820 (iPTR 0)))),
3821 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003822 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003823 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3824
3825}
3826
Ayman Musa46af8f92016-11-13 14:29:32 +00003827multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3828 dag Mask, RegisterClass MaskRC> {
3829
3830def : Pat<(_.info128.VT (extract_subvector
3831 (_.info512.VT (masked_load addr:$srcAddr, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003832 (_.info512.VT (bitconvert
Ayman Musa46af8f92016-11-13 14:29:32 +00003833 (v16i32 immAllZerosV))))),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003834 (iPTR 0))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003835 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003836 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003837 addr:$srcAddr)>;
3838
3839def : Pat<(_.info128.VT (extract_subvector
3840 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3841 (_.info512.VT (insert_subvector undef,
3842 (_.info256.VT (insert_subvector undef,
3843 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003844 (iPTR 0))),
3845 (iPTR 0))))),
3846 (iPTR 0))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003847 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003848 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003849 addr:$srcAddr)>;
3850
3851}
3852
Craig Topper058f2f62017-03-28 16:35:29 +00003853multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3854 AVX512VLVectorVTInfo _,
3855 dag Mask, RegisterClass MaskRC,
3856 SubRegIndex subreg> {
3857
3858def : Pat<(_.info128.VT (extract_subvector
3859 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3860 (_.info512.VT (bitconvert
3861 (v16i32 immAllZerosV))))),
3862 (iPTR 0))),
3863 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003864 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003865 addr:$srcAddr)>;
3866
3867def : Pat<(_.info128.VT (extract_subvector
3868 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3869 (_.info512.VT (insert_subvector undef,
3870 (_.info256.VT (insert_subvector undef,
3871 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3872 (iPTR 0))),
3873 (iPTR 0))))),
3874 (iPTR 0))),
3875 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003876 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003877 addr:$srcAddr)>;
3878
3879}
3880
Ayman Musa46af8f92016-11-13 14:29:32 +00003881defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3882defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3883
3884defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3885 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003886defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3887 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3888defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3889 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003890
3891defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3892 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003893defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3894 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3895defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3896 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003897
Guy Blankb169d56d2017-07-31 08:26:14 +00003898def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3899 (f32 FR32X:$src1), (f32 FR32X:$src2))),
3900 (COPY_TO_REGCLASS
3901 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3902 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3903 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003904 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3905 FR32X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003906
Craig Topper74ed0872016-05-18 06:55:59 +00003907def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003908 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003909 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3910 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003911
Guy Blankb169d56d2017-07-31 08:26:14 +00003912def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3913 (f64 FR64X:$src1), (f64 FR64X:$src2))),
3914 (COPY_TO_REGCLASS
3915 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3916 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3917 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003918 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3919 FR64X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00003920
Craig Topper74ed0872016-05-18 06:55:59 +00003921def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00003922 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00003923 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3924 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003925
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00003926def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
Guy Blank548e22a2017-05-19 12:35:15 +00003927 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00003928 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3929
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003930let hasSideEffects = 0 in {
Simon Pilgrim64fff142017-07-16 18:37:23 +00003931 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003932 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003933 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003934 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
3935 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
Igor Breger4424aaa2015-11-19 07:58:33 +00003936
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003937let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00003938 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3939 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003940 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003941 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3942 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003943 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3944 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
Simon Pilgrim64fff142017-07-16 18:37:23 +00003945
3946 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003947 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003948 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3949 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003950 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3951 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003952
Simon Pilgrim64fff142017-07-16 18:37:23 +00003953 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003954 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003955 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003956 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
3957 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003958
3959let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00003960 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3961 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003962 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003963 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
3964 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003965 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
3966 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003967
Simon Pilgrim64fff142017-07-16 18:37:23 +00003968 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3969 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
Craig Topper6fb55712017-10-04 17:20:12 +00003970 VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003971 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3972 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003973 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
3974 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003975}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003976
3977let Predicates = [HasAVX512] in {
3978 let AddedComplexity = 15 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003979 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00003980 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003981 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00003982 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003983 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
Craig Topper6fb55712017-10-04 17:20:12 +00003984 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3985 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
Craig Topper3f8126e2016-08-13 05:43:20 +00003986 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003987
3988 // Move low f32 and clear high bits.
3989 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3990 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003991 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003992 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3993 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3994 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003995 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00003996 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00003997 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
3998 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00003999 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004000 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4001 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4002 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004003 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004004 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004005
4006 let AddedComplexity = 20 in {
4007 // MOVSSrm zeros the high parts of the register; represent this
4008 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4009 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4010 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4011 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4012 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4013 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4014 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004015 def : Pat<(v4f32 (X86vzload addr:$src)),
4016 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004017
4018 // MOVSDrm zeros the high parts of the register; represent this
4019 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4020 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4021 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4022 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4023 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4024 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4025 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4026 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4027 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4028 def : Pat<(v2f64 (X86vzload addr:$src)),
4029 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4030
4031 // Represent the same patterns above but in the form they appear for
4032 // 256-bit types
4033 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4034 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004035 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004036 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4037 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4038 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004039 def : Pat<(v8f32 (X86vzload addr:$src)),
4040 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004041 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4042 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4043 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004044 def : Pat<(v4f64 (X86vzload addr:$src)),
4045 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004046
4047 // Represent the same patterns above but in the form they appear for
4048 // 512-bit types
4049 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4050 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4051 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4052 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4053 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4054 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004055 def : Pat<(v16f32 (X86vzload addr:$src)),
4056 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004057 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4058 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4059 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004060 def : Pat<(v8f64 (X86vzload addr:$src)),
4061 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004062 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004063 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4064 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004065 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004066
4067 // Move low f64 and clear high bits.
4068 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4069 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004070 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004071 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004072 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4073 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004074 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004075 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004076
4077 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004078 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004079 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004080 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004081 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004082 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004083
4084 // Extract and store.
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +00004085 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004086 addr:$dst),
4087 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004088
4089 // Shuffle with VMOVSS
4090 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004091 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4092
4093 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4094 (VMOVSSZrr VR128X:$src1,
4095 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004096
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004097 // Shuffle with VMOVSD
4098 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004099 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4100
4101 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4102 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004103
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004104 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004105 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004106 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004107 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004108}
4109
4110let AddedComplexity = 15 in
4111def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4112 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00004113 "vmovq\t{$src, $dst|$dst, $src}",
Michael Liao5bf95782014-12-04 05:20:33 +00004114 [(set VR128X:$dst, (v2i64 (X86vzmovl
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004115 (v2i64 VR128X:$src))))],
4116 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4117
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004118let Predicates = [HasAVX512] in {
Craig Topperde549852016-05-22 06:09:34 +00004119 let AddedComplexity = 15 in {
4120 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4121 (VMOVDI2PDIZrr GR32:$src)>;
4122
4123 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4124 (VMOV64toPQIZrr GR64:$src)>;
4125
4126 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4127 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4128 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004129
4130 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4131 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4132 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperde549852016-05-22 06:09:34 +00004133 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004134 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4135 let AddedComplexity = 20 in {
Simon Pilgrima4c350f2017-02-17 20:43:32 +00004136 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4137 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004138 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4139 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004140 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4141 (VMOVDI2PDIZrm addr:$src)>;
4142 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4143 (VMOVDI2PDIZrm addr:$src)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004144 def : Pat<(v4i32 (X86vzload addr:$src)),
4145 (VMOVDI2PDIZrm addr:$src)>;
4146 def : Pat<(v8i32 (X86vzload addr:$src)),
4147 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004148 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004149 (VMOVQI2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004150 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004151 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00004152 def : Pat<(v2i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004153 (VMOVQI2PQIZrm addr:$src)>;
Craig Topperde549852016-05-22 06:09:34 +00004154 def : Pat<(v4i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004155 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004156 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00004157
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004158 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4159 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4160 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4161 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004162 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4163 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4164 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4165
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004166 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004167 def : Pat<(v16i32 (X86vzload addr:$src)),
4168 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004169 def : Pat<(v8i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004170 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004171}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004172//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00004173// AVX-512 - Non-temporals
4174//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00004175let SchedRW = [WriteLoad] in {
4176 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4177 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004178 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
Robert Khasanoved882972014-08-13 10:46:00 +00004179 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004180
Craig Topper2f90c1f2016-06-07 07:27:57 +00004181 let Predicates = [HasVLX] in {
Robert Khasanoved882972014-08-13 10:46:00 +00004182 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004183 (ins i256mem:$src),
4184 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004185 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004186 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004187
Robert Khasanoved882972014-08-13 10:46:00 +00004188 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004189 (ins i128mem:$src),
4190 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004191 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004192 EVEX_CD8<64, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004193 }
Adam Nemetefd07852014-06-18 16:51:10 +00004194}
4195
Igor Bregerd3341f52016-01-20 13:11:47 +00004196multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4197 PatFrag st_frag = alignednontemporalstore,
4198 InstrItinClass itin = IIC_SSE_MOVNT> {
Craig Toppere1cac152016-06-07 07:27:54 +00004199 let SchedRW = [WriteStore], AddedComplexity = 400 in
Igor Bregerd3341f52016-01-20 13:11:47 +00004200 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanoved882972014-08-13 10:46:00 +00004201 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Igor Bregerd3341f52016-01-20 13:11:47 +00004202 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4203 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004204}
4205
Igor Bregerd3341f52016-01-20 13:11:47 +00004206multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4207 AVX512VLVectorVTInfo VTInfo> {
4208 let Predicates = [HasAVX512] in
4209 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
Robert Khasanoved882972014-08-13 10:46:00 +00004210
Igor Bregerd3341f52016-01-20 13:11:47 +00004211 let Predicates = [HasAVX512, HasVLX] in {
4212 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4213 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
Robert Khasanoved882972014-08-13 10:46:00 +00004214 }
4215}
4216
Igor Bregerd3341f52016-01-20 13:11:47 +00004217defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4218defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4219defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
Robert Khasanoved882972014-08-13 10:46:00 +00004220
Craig Topper707c89c2016-05-08 23:43:17 +00004221let Predicates = [HasAVX512], AddedComplexity = 400 in {
4222 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4223 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4224 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4225 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4226 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4227 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004228
4229 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4230 (VMOVNTDQAZrm addr:$src)>;
4231 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4232 (VMOVNTDQAZrm addr:$src)>;
4233 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4234 (VMOVNTDQAZrm addr:$src)>;
Craig Topper707c89c2016-05-08 23:43:17 +00004235}
4236
Craig Topperc41320d2016-05-08 23:08:45 +00004237let Predicates = [HasVLX], AddedComplexity = 400 in {
4238 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4239 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4240 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4241 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4242 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4243 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4244
Simon Pilgrim9a896232016-06-07 13:34:24 +00004245 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4246 (VMOVNTDQAZ256rm addr:$src)>;
4247 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4248 (VMOVNTDQAZ256rm addr:$src)>;
4249 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4250 (VMOVNTDQAZ256rm addr:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004251
Craig Topperc41320d2016-05-08 23:08:45 +00004252 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4253 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4254 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4255 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4256 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4257 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004258
4259 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4260 (VMOVNTDQAZ128rm addr:$src)>;
4261 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4262 (VMOVNTDQAZ128rm addr:$src)>;
4263 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4264 (VMOVNTDQAZ128rm addr:$src)>;
Craig Topperc41320d2016-05-08 23:08:45 +00004265}
4266
Adam Nemet7f62b232014-06-10 16:39:53 +00004267//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004268// AVX-512 - Integer arithmetic
4269//
4270multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanov44241442014-10-08 14:37:45 +00004271 X86VectorVTInfo _, OpndItins itins,
4272 bit IsCommutable = 0> {
Adam Nemet34801422014-10-08 23:25:39 +00004273 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Bregerf2460112015-07-26 14:41:44 +00004274 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Robert Khasanov44241442014-10-08 14:37:45 +00004275 "$src2, $src1", "$src1, $src2",
4276 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004277 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4278 Sched<[itins.Sched]>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004279
Craig Toppere1cac152016-06-07 07:27:54 +00004280 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4281 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4282 "$src2, $src1", "$src1, $src2",
4283 (_.VT (OpNode _.RC:$src1,
4284 (bitconvert (_.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004285 itins.rm>, AVX512BIBase, EVEX_4V,
4286 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004287}
4288
4289multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4290 X86VectorVTInfo _, OpndItins itins,
4291 bit IsCommutable = 0> :
4292 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
Craig Toppere1cac152016-06-07 07:27:54 +00004293 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4294 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4295 "${src2}"##_.BroadcastStr##", $src1",
4296 "$src1, ${src2}"##_.BroadcastStr,
4297 (_.VT (OpNode _.RC:$src1,
4298 (X86VBroadcast
4299 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004300 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4301 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004302}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004303
Robert Khasanovd5b14f72014-10-09 08:38:48 +00004304multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4305 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4306 Predicate prd, bit IsCommutable = 0> {
4307 let Predicates = [prd] in
4308 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4309 IsCommutable>, EVEX_V512;
4310
4311 let Predicates = [prd, HasVLX] in {
4312 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4313 IsCommutable>, EVEX_V256;
4314 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4315 IsCommutable>, EVEX_V128;
4316 }
4317}
4318
Robert Khasanov545d1b72014-10-14 14:36:19 +00004319multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4320 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4321 Predicate prd, bit IsCommutable = 0> {
4322 let Predicates = [prd] in
4323 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4324 IsCommutable>, EVEX_V512;
4325
4326 let Predicates = [prd, HasVLX] in {
4327 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4328 IsCommutable>, EVEX_V256;
4329 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4330 IsCommutable>, EVEX_V128;
4331 }
4332}
4333
4334multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4335 OpndItins itins, Predicate prd,
4336 bit IsCommutable = 0> {
4337 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4338 itins, prd, IsCommutable>,
4339 VEX_W, EVEX_CD8<64, CD8VF>;
4340}
4341
4342multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4343 OpndItins itins, Predicate prd,
4344 bit IsCommutable = 0> {
4345 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4346 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4347}
4348
4349multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4350 OpndItins itins, Predicate prd,
4351 bit IsCommutable = 0> {
4352 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004353 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4354 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004355}
4356
4357multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4358 OpndItins itins, Predicate prd,
4359 bit IsCommutable = 0> {
4360 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004361 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4362 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004363}
4364
4365multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4366 SDNode OpNode, OpndItins itins, Predicate prd,
4367 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004368 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004369 IsCommutable>;
4370
Igor Bregerf2460112015-07-26 14:41:44 +00004371 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004372 IsCommutable>;
4373}
4374
4375multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4376 SDNode OpNode, OpndItins itins, Predicate prd,
4377 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004378 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004379 IsCommutable>;
4380
Igor Bregerf2460112015-07-26 14:41:44 +00004381 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004382 IsCommutable>;
4383}
4384
4385multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4386 bits<8> opc_d, bits<8> opc_q,
4387 string OpcodeStr, SDNode OpNode,
4388 OpndItins itins, bit IsCommutable = 0> {
4389 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4390 itins, HasAVX512, IsCommutable>,
4391 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4392 itins, HasBWI, IsCommutable>;
4393}
4394
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004395multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
Michael Liao66233b72015-08-06 09:06:20 +00004396 SDNode OpNode,X86VectorVTInfo _Src,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004397 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4398 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004399 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004400 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004401 "$src2, $src1","$src1, $src2",
4402 (_Dst.VT (OpNode
4403 (_Src.VT _Src.RC:$src1),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004404 (_Src.VT _Src.RC:$src2))),
Michael Liao66233b72015-08-06 09:06:20 +00004405 itins.rr, IsCommutable>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004406 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004407 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4408 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4409 "$src2, $src1", "$src1, $src2",
4410 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4411 (bitconvert (_Src.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004412 itins.rm>, AVX512BIBase, EVEX_4V,
4413 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004414
4415 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Coby Tayree99a66392016-11-20 17:19:55 +00004416 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
Craig Toppere1cac152016-06-07 07:27:54 +00004417 OpcodeStr,
4418 "${src2}"##_Brdct.BroadcastStr##", $src1",
Coby Tayree99a66392016-11-20 17:19:55 +00004419 "$src1, ${src2}"##_Brdct.BroadcastStr,
Craig Toppere1cac152016-06-07 07:27:54 +00004420 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4421 (_Brdct.VT (X86VBroadcast
4422 (_Brdct.ScalarLdFrag addr:$src2)))))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004423 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4424 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004425}
4426
Robert Khasanov545d1b72014-10-14 14:36:19 +00004427defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4428 SSE_INTALU_ITINS_P, 1>;
4429defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4430 SSE_INTALU_ITINS_P, 0>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004431defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4432 SSE_INTALU_ITINS_P, HasBWI, 1>;
4433defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4434 SSE_INTALU_ITINS_P, HasBWI, 0>;
4435defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
Michael Liao66233b72015-08-06 09:06:20 +00004436 SSE_INTALU_ITINS_P, HasBWI, 1>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004437defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
Michael Liao66233b72015-08-06 09:06:20 +00004438 SSE_INTALU_ITINS_P, HasBWI, 0>;
Igor Bregerf2460112015-07-26 14:41:44 +00004439defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004440 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004441defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004442 SSE_INTMUL_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004443defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004444 SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4445defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
Asaf Badouh73f26f82015-07-05 12:23:20 +00004446 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004447defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004448 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004449defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004450 HasBWI, 1>, T8PD;
Asaf Badouh81f03c32015-06-18 12:30:53 +00004451defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
Michael Liao66233b72015-08-06 09:06:20 +00004452 SSE_INTALU_ITINS_P, HasBWI, 1>;
4453
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004454multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004455 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4456 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4457 let Predicates = [prd] in
4458 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4459 _SrcVTInfo.info512, _DstVTInfo.info512,
4460 v8i64_info, IsCommutable>,
4461 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4462 let Predicates = [HasVLX, prd] in {
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004463 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004464 _SrcVTInfo.info256, _DstVTInfo.info256,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004465 v4i64x_info, IsCommutable>,
4466 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004467 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004468 _SrcVTInfo.info128, _DstVTInfo.info128,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004469 v2i64x_info, IsCommutable>,
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004470 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4471 }
Michael Liao66233b72015-08-06 09:06:20 +00004472}
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004473
Craig Topper9b800c62017-12-26 05:43:04 +00004474defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004475 avx512vl_i32_info, avx512vl_i64_info,
4476 X86pmuldq, HasAVX512, 1>,T8PD;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004477defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004478 avx512vl_i32_info, avx512vl_i64_info,
4479 X86pmuludq, HasAVX512, 1>;
4480defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4481 avx512vl_i8_info, avx512vl_i8_info,
4482 X86multishift, HasVBMI, 0>, T8PD;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004483
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004484multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004485 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4486 OpndItins itins> {
Craig Toppere1cac152016-06-07 07:27:54 +00004487 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4488 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4489 OpcodeStr,
4490 "${src2}"##_Src.BroadcastStr##", $src1",
4491 "$src1, ${src2}"##_Src.BroadcastStr,
4492 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4493 (_Src.VT (X86VBroadcast
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004494 (_Src.ScalarLdFrag addr:$src2)))))),
4495 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4496 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004497}
4498
Michael Liao66233b72015-08-06 09:06:20 +00004499multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4500 SDNode OpNode,X86VectorVTInfo _Src,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004501 X86VectorVTInfo _Dst, OpndItins itins,
4502 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004503 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004504 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004505 "$src2, $src1","$src1, $src2",
4506 (_Dst.VT (OpNode
4507 (_Src.VT _Src.RC:$src1),
Craig Topper37e8c542016-08-14 17:57:22 +00004508 (_Src.VT _Src.RC:$src2))),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004509 itins.rr, IsCommutable>,
4510 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004511 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4512 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4513 "$src2, $src1", "$src1, $src2",
4514 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004515 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4516 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4517 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004518}
4519
4520multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4521 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004522 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004523 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004524 v32i16_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004525 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004526 v32i16_info, SSE_PACK>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004527 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004528 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004529 v16i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004530 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004531 v16i16x_info, SSE_PACK>, EVEX_V256;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004532 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004533 v8i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004534 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004535 v8i16x_info, SSE_PACK>, EVEX_V128;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004536 }
4537}
4538multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4539 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004540 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004541 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004542 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004543 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004544 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004545 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004546 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004547 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004548 }
4549}
Igor Bregerf7fd5472015-07-21 07:11:28 +00004550
4551multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4552 SDNode OpNode, AVX512VLVectorVTInfo _Src,
Craig Topper37e8c542016-08-14 17:57:22 +00004553 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004554 let Predicates = [HasBWI] in
Igor Bregerf7fd5472015-07-21 07:11:28 +00004555 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004556 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004557 let Predicates = [HasBWI, HasVLX] in {
Igor Bregerf7fd5472015-07-21 07:11:28 +00004558 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004559 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004560 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004561 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004562 }
4563}
4564
Craig Topperb6da6542016-05-01 17:38:32 +00004565defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4566defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4567defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4568defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004569
Craig Topper5acb5a12016-05-01 06:24:57 +00004570defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
Craig Toppera33846a2017-10-22 06:18:23 +00004571 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004572defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
Craig Toppera33846a2017-10-22 06:18:23 +00004573 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004574
Igor Bregerf2460112015-07-26 14:41:44 +00004575defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004576 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004577defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004578 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004579defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004580 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004581
Igor Bregerf2460112015-07-26 14:41:44 +00004582defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004583 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004584defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004585 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004586defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004587 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004588
Igor Bregerf2460112015-07-26 14:41:44 +00004589defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004590 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004591defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004592 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004593defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004594 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004595
Igor Bregerf2460112015-07-26 14:41:44 +00004596defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004597 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004598defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004599 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004600defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004601 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Craig Topperabe80cc2016-08-28 06:06:28 +00004602
Simon Pilgrim47c1ff72016-10-27 17:07:40 +00004603// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4604let Predicates = [HasDQI, NoVLX] in {
4605 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4606 (EXTRACT_SUBREG
4607 (VPMULLQZrr
4608 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4609 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4610 sub_ymm)>;
4611
4612 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4613 (EXTRACT_SUBREG
4614 (VPMULLQZrr
4615 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4616 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4617 sub_xmm)>;
4618}
4619
Craig Topper4520d4f2017-12-04 07:21:01 +00004620// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4621let Predicates = [HasDQI, NoVLX] in {
4622 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4623 (EXTRACT_SUBREG
4624 (VPMULLQZrr
4625 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4626 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4627 sub_ymm)>;
4628
4629 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4630 (EXTRACT_SUBREG
4631 (VPMULLQZrr
4632 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4633 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4634 sub_xmm)>;
4635}
4636
4637multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4638 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4639 (EXTRACT_SUBREG
4640 (Instr
4641 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4642 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4643 sub_ymm)>;
4644
4645 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4646 (EXTRACT_SUBREG
4647 (Instr
4648 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4649 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4650 sub_xmm)>;
4651}
4652
Craig Topper694c73a2018-01-01 01:11:32 +00004653let Predicates = [HasAVX512, NoVLX] in {
Craig Topper4520d4f2017-12-04 07:21:01 +00004654 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4655 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4656 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4657 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4658}
4659
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004660//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004661// AVX-512 Logical Instructions
4662//===----------------------------------------------------------------------===//
4663
Craig Topperafce0ba2017-08-30 16:38:33 +00004664// OpNodeMsk is the OpNode to use when element size is important. OpNode will
4665// be set to null_frag for 32-bit elements.
4666multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4667 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004668 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004669 bit IsCommutable = 0> {
4670 let hasSideEffects = 0 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004671 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4672 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4673 "$src2, $src1", "$src1, $src2",
4674 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4675 (bitconvert (_.VT _.RC:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004676 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4677 _.RC:$src2)))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004678 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4679 Sched<[itins.Sched]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004680
Craig Topperafce0ba2017-08-30 16:38:33 +00004681 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004682 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4683 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4684 "$src2, $src1", "$src1, $src2",
4685 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4686 (bitconvert (_.LdFrag addr:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004687 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004688 (bitconvert (_.LdFrag addr:$src2)))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004689 itins.rm>, AVX512BIBase, EVEX_4V,
4690 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004691}
4692
Craig Topperafce0ba2017-08-30 16:38:33 +00004693// OpNodeMsk is the OpNode to use where element size is important. So use
4694// for all of the broadcast patterns.
4695multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4696 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004697 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004698 bit IsCommutable = 0> :
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004699 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4700 IsCommutable> {
Craig Topperabe80cc2016-08-28 06:06:28 +00004701 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4702 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4703 "${src2}"##_.BroadcastStr##", $src1",
4704 "$src1, ${src2}"##_.BroadcastStr,
Craig Topperafce0ba2017-08-30 16:38:33 +00004705 (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004706 (bitconvert
4707 (_.VT (X86VBroadcast
4708 (_.ScalarLdFrag addr:$src2)))))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004709 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004710 (bitconvert
4711 (_.VT (X86VBroadcast
4712 (_.ScalarLdFrag addr:$src2)))))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004713 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4714 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004715}
4716
Craig Topperafce0ba2017-08-30 16:38:33 +00004717multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4718 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004719 SDNode OpNodeMsk, OpndItins itins,
4720 AVX512VLVectorVTInfo VTInfo,
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004721 bit IsCommutable = 0> {
4722 let Predicates = [HasAVX512] in
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004723 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4724 VTInfo.info512, IsCommutable>, EVEX_V512;
Craig Topperabe80cc2016-08-28 06:06:28 +00004725
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004726 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004727 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004728 VTInfo.info256, IsCommutable>, EVEX_V256;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004729 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004730 VTInfo.info128, IsCommutable>, EVEX_V128;
Craig Topperabe80cc2016-08-28 06:06:28 +00004731 }
4732}
4733
Craig Topperabe80cc2016-08-28 06:06:28 +00004734multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004735 SDNode OpNode, OpndItins itins,
4736 bit IsCommutable = 0> {
4737 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004738 avx512vl_i64_info, IsCommutable>,
4739 VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004740 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004741 avx512vl_i32_info, IsCommutable>,
4742 EVEX_CD8<32, CD8VF>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004743}
4744
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004745defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4746defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4747defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4748defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004749
4750//===----------------------------------------------------------------------===//
4751// AVX-512 FP arithmetic
4752//===----------------------------------------------------------------------===//
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004753multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4754 SDNode OpNode, SDNode VecNode, OpndItins itins,
4755 bit IsCommutable> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004756 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004757 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4758 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4759 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004760 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4761 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004762 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004763
4764 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00004765 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004766 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004767 (_.VT (VecNode _.RC:$src1,
4768 _.ScalarIntMemCPat:$src2,
4769 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004770 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper79011a62016-07-26 08:06:18 +00004771 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004772 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004773 (ins _.FRC:$src1, _.FRC:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004774 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4775 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004776 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004777 let isCommutable = IsCommutable;
4778 }
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004779 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004780 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004781 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4782 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004783 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4784 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004785 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004786 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004787}
4788
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004789multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00004790 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004791 let ExeDomain = _.ExeDomain in
Craig Topperda7e78e2017-12-10 04:07:28 +00004792 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004793 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4794 "$rc, $src2, $src1", "$src1, $src2, $rc",
4795 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004796 (i32 imm:$rc)), itins.rr, IsCommutable>,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004797 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004798}
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004799multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Craig Topper56d40222017-02-22 06:54:18 +00004800 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4801 OpndItins itins, bit IsCommutable> {
4802 let ExeDomain = _.ExeDomain in {
4803 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4804 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4805 "$src2, $src1", "$src1, $src2",
4806 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004807 itins.rr>, Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004808
4809 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4810 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4811 "$src2, $src1", "$src1, $src2",
4812 (_.VT (VecNode _.RC:$src1,
4813 _.ScalarIntMemCPat:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004814 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004815
4816 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4817 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4818 (ins _.FRC:$src1, _.FRC:$src2),
4819 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4820 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004821 itins.rr>, Sched<[itins.Sched]> {
Craig Topper56d40222017-02-22 06:54:18 +00004822 let isCommutable = IsCommutable;
4823 }
4824 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4825 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4826 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4827 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004828 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4829 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004830 }
4831
Craig Topperda7e78e2017-12-10 04:07:28 +00004832 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004833 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004834 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Topper56d40222017-02-22 06:54:18 +00004835 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +00004836 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4837 Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004838 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004839}
4840
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004841multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4842 SDNode VecNode,
4843 SizeItins itins, bit IsCommutable> {
4844 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4845 itins.s, IsCommutable>,
4846 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4847 itins.s, IsCommutable>,
4848 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4849 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4850 itins.d, IsCommutable>,
4851 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4852 itins.d, IsCommutable>,
4853 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4854}
4855
4856multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper56d40222017-02-22 06:54:18 +00004857 SDNode VecNode, SDNode SaeNode,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004858 SizeItins itins, bit IsCommutable> {
Craig Topper56d40222017-02-22 06:54:18 +00004859 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4860 VecNode, SaeNode, itins.s, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004861 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper56d40222017-02-22 06:54:18 +00004862 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4863 VecNode, SaeNode, itins.d, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004864 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4865}
Craig Topper8783bbb2017-02-24 07:21:10 +00004866defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4867defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4868defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4869defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4870defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004871 SSE_ALU_ITINS_S, 0>;
Craig Topper8783bbb2017-02-24 07:21:10 +00004872defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004873 SSE_ALU_ITINS_S, 0>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004874
4875// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4876// X86fminc and X86fmaxc instead of X86fmin and X86fmax
4877multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4878 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
Craig Topper03669332017-02-26 06:45:56 +00004879 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004880 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4881 (ins _.FRC:$src1, _.FRC:$src2),
4882 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4883 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004884 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004885 let isCommutable = 1;
4886 }
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004887 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4888 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4889 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4890 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004891 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4892 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004893 }
4894}
4895defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4896 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4897 EVEX_CD8<32, CD8VT1>;
4898
4899defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4900 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4901 EVEX_CD8<64, CD8VT1>;
4902
4903defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4904 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4905 EVEX_CD8<32, CD8VT1>;
4906
4907defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4908 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4909 EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004910
Craig Topper375aa902016-12-19 00:42:28 +00004911multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00004912 X86VectorVTInfo _, OpndItins itins,
4913 bit IsCommutable> {
Craig Topper375aa902016-12-19 00:42:28 +00004914 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004915 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4916 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4917 "$src2, $src1", "$src1, $src2",
Craig Topper9433f972016-08-02 06:16:53 +00004918 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004919 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper375aa902016-12-19 00:42:28 +00004920 let mayLoad = 1 in {
4921 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4922 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4923 "$src2, $src1", "$src1, $src2",
4924 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004925 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00004926 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4927 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4928 "${src2}"##_.BroadcastStr##", $src1",
4929 "$src1, ${src2}"##_.BroadcastStr,
4930 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4931 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004932 itins.rm>, EVEX_4V, EVEX_B,
4933 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00004934 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004935 }
Robert Khasanov595e5982014-10-29 15:43:02 +00004936}
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00004937
Craig Topper375aa902016-12-19 00:42:28 +00004938multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004939 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004940 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00004941 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004942 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4943 "$rc, $src2, $src1", "$src1, $src2, $rc",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004944 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
4945 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004946}
4947
Craig Topper375aa902016-12-19 00:42:28 +00004948multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004949 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004950 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00004951 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004952 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4953 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004954 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
4955 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004956}
4957
Craig Topper375aa902016-12-19 00:42:28 +00004958multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00004959 Predicate prd, SizeItins itins,
4960 bit IsCommutable = 0> {
Craig Topperdb290662016-05-01 05:57:06 +00004961 let Predicates = [prd] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004962 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
Craig Topper9433f972016-08-02 06:16:53 +00004963 itins.s, IsCommutable>, EVEX_V512, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00004964 EVEX_CD8<32, CD8VF>;
4965 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
Craig Topper9433f972016-08-02 06:16:53 +00004966 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00004967 EVEX_CD8<64, CD8VF>;
Craig Topperdb290662016-05-01 05:57:06 +00004968 }
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00004969
Robert Khasanov595e5982014-10-29 15:43:02 +00004970 // Define only if AVX512VL feature is present.
Craig Topperdb290662016-05-01 05:57:06 +00004971 let Predicates = [prd, HasVLX] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00004972 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004973 itins.s, IsCommutable>, EVEX_V128, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00004974 EVEX_CD8<32, CD8VF>;
4975 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004976 itins.s, IsCommutable>, EVEX_V256, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00004977 EVEX_CD8<32, CD8VF>;
4978 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004979 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00004980 EVEX_CD8<64, CD8VF>;
4981 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00004982 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00004983 EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00004984 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004985}
4986
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004987multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4988 SizeItins itins> {
4989 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004990 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004991 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00004992 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4993}
4994
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004995multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4996 SizeItins itins> {
4997 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004998 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004999 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005000 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5001}
5002
Craig Topper9433f972016-08-02 06:16:53 +00005003defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5004 SSE_ALU_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005005 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005006defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5007 SSE_MUL_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005008 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005009defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005010 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005011defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005012 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005013defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5014 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005015 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005016defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5017 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005018 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
Igor Breger58c07802016-05-03 11:51:45 +00005019let isCodeGenOnly = 1 in {
Craig Topper9433f972016-08-02 06:16:53 +00005020 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5021 SSE_ALU_ITINS_P, 1>;
5022 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5023 SSE_ALU_ITINS_P, 1>;
Igor Breger58c07802016-05-03 11:51:45 +00005024}
Craig Topper375aa902016-12-19 00:42:28 +00005025defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005026 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005027defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005028 SSE_ALU_ITINS_P, 0>;
Craig Topper375aa902016-12-19 00:42:28 +00005029defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005030 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005031defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005032 SSE_ALU_ITINS_P, 1>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00005033
Craig Topper8f6827c2016-08-31 05:37:52 +00005034// Patterns catch floating point selects with bitcasted integer logic ops.
Craig Topper45d65032016-09-02 05:29:13 +00005035multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5036 X86VectorVTInfo _, Predicate prd> {
5037let Predicates = [prd] in {
5038 // Masked register-register logical operations.
5039 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5040 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5041 _.RC:$src0)),
5042 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5043 _.RC:$src1, _.RC:$src2)>;
5044 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5045 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5046 _.ImmAllZerosV)),
5047 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5048 _.RC:$src2)>;
5049 // Masked register-memory logical operations.
5050 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5051 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5052 (load addr:$src2)))),
5053 _.RC:$src0)),
5054 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5055 _.RC:$src1, addr:$src2)>;
5056 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5057 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5058 _.ImmAllZerosV)),
5059 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5060 addr:$src2)>;
5061 // Register-broadcast logical operations.
5062 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5063 (bitconvert (_.VT (X86VBroadcast
5064 (_.ScalarLdFrag addr:$src2)))))),
5065 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5066 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5067 (bitconvert
5068 (_.i64VT (OpNode _.RC:$src1,
5069 (bitconvert (_.VT
5070 (X86VBroadcast
5071 (_.ScalarLdFrag addr:$src2))))))),
5072 _.RC:$src0)),
5073 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5074 _.RC:$src1, addr:$src2)>;
5075 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5076 (bitconvert
5077 (_.i64VT (OpNode _.RC:$src1,
5078 (bitconvert (_.VT
5079 (X86VBroadcast
5080 (_.ScalarLdFrag addr:$src2))))))),
5081 _.ImmAllZerosV)),
5082 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5083 _.RC:$src1, addr:$src2)>;
5084}
Craig Topper8f6827c2016-08-31 05:37:52 +00005085}
5086
Craig Topper45d65032016-09-02 05:29:13 +00005087multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5088 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5089 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5090 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5091 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5092 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5093 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
Craig Topper8f6827c2016-08-31 05:37:52 +00005094}
5095
Craig Topper45d65032016-09-02 05:29:13 +00005096defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5097defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5098defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5099defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5100
Craig Topper2baef8f2016-12-18 04:17:00 +00005101let Predicates = [HasVLX,HasDQI] in {
Craig Topperd3295c62016-12-17 19:26:00 +00005102 // Use packed logical operations for scalar ops.
5103 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5104 (COPY_TO_REGCLASS (VANDPDZ128rr
5105 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5106 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5107 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5108 (COPY_TO_REGCLASS (VORPDZ128rr
5109 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5110 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5111 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5112 (COPY_TO_REGCLASS (VXORPDZ128rr
5113 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5114 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5115 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5116 (COPY_TO_REGCLASS (VANDNPDZ128rr
5117 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5118 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5119
5120 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5121 (COPY_TO_REGCLASS (VANDPSZ128rr
5122 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5123 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5124 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5125 (COPY_TO_REGCLASS (VORPSZ128rr
5126 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5127 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5128 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5129 (COPY_TO_REGCLASS (VXORPSZ128rr
5130 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5131 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5132 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5133 (COPY_TO_REGCLASS (VANDNPSZ128rr
5134 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5135 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5136}
5137
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005138multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005139 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005140 let ExeDomain = _.ExeDomain in {
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005141 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5142 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5143 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005144 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5145 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005146 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5147 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5148 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005149 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5150 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005151 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5152 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5153 "${src2}"##_.BroadcastStr##", $src1",
5154 "$src1, ${src2}"##_.BroadcastStr,
5155 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005156 (_.ScalarLdFrag addr:$src2))),
5157 (i32 FROUND_CURRENT)), itins.rm>,
5158 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005159 }
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005160}
5161
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005162multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005163 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005164 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005165 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5166 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5167 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005168 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5169 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005170 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00005171 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
Craig Toppere1cac152016-06-07 07:27:54 +00005172 "$src2, $src1", "$src1, $src2",
Craig Topper75d71542017-11-13 08:07:33 +00005173 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005174 (i32 FROUND_CURRENT)), itins.rm>,
5175 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005176 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005177}
5178
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005179multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005180 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5181 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005182 EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005183 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5184 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005185 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005186 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005187 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005188 EVEX_4V,EVEX_CD8<32, CD8VT1>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005189 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005190 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005191 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5192
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005193 // Define only if AVX512VL feature is present.
5194 let Predicates = [HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005195 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005196 EVEX_V128, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005197 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005198 EVEX_V256, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005199 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005200 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005201 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005202 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5203 }
5204}
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005205defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005206
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005207//===----------------------------------------------------------------------===//
5208// AVX-512 VPTESTM instructions
5209//===----------------------------------------------------------------------===//
5210
Craig Topper15d69732018-01-28 00:56:30 +00005211multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5212 OpndItins itins, X86VectorVTInfo _, string Suffix> {
Craig Topper1a093932017-11-11 06:19:12 +00005213 let ExeDomain = _.ExeDomain in {
Igor Breger639fde72016-03-03 14:18:38 +00005214 let isCommutable = 1 in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005215 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5216 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5217 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005218 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5219 _.ImmAllZerosV), itins.rr>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005220 EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005221 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5222 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5223 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005224 (OpNode (bitconvert
5225 (_.i64VT (and _.RC:$src1,
5226 (bitconvert (_.LdFrag addr:$src2))))),
5227 _.ImmAllZerosV),
5228 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005229 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper1a093932017-11-11 06:19:12 +00005230 }
Craig Topper15d69732018-01-28 00:56:30 +00005231
5232 // Patterns for compare with 0 that just use the same source twice.
5233 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5234 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
5235 _.RC:$src, _.RC:$src))>;
5236
5237 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5238 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
5239 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005240}
5241
Craig Topper15d69732018-01-28 00:56:30 +00005242multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005243 OpndItins itins, X86VectorVTInfo _> {
Craig Topper1a093932017-11-11 06:19:12 +00005244 let ExeDomain = _.ExeDomain in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005245 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5246 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5247 "${src2}"##_.BroadcastStr##", $src1",
5248 "$src1, ${src2}"##_.BroadcastStr,
Craig Topper15d69732018-01-28 00:56:30 +00005249 (OpNode (and _.RC:$src1,
5250 (X86VBroadcast
5251 (_.ScalarLdFrag addr:$src2))),
5252 _.ImmAllZerosV),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005253 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5254 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005255}
Igor Bregerfca0a342016-01-28 13:19:25 +00005256
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005257// Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topper15d69732018-01-28 00:56:30 +00005258multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00005259 X86VectorVTInfo _, string Suffix> {
Craig Topper15d69732018-01-28 00:56:30 +00005260 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5261 _.ImmAllZerosV)),
Craig Topper5e4b4532018-01-27 23:49:14 +00005262 (_.KVT (COPY_TO_REGCLASS
5263 (!cast<Instruction>(NAME # Suffix # "Zrr")
5264 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5265 _.RC:$src1, _.SubRegIdx),
5266 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5267 _.RC:$src2, _.SubRegIdx)),
5268 _.KRC))>;
5269
5270 def : Pat<(_.KVT (and _.KRC:$mask,
Craig Topper15d69732018-01-28 00:56:30 +00005271 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5272 _.ImmAllZerosV))),
Craig Topper5e4b4532018-01-27 23:49:14 +00005273 (COPY_TO_REGCLASS
5274 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5275 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5276 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5277 _.RC:$src1, _.SubRegIdx),
5278 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5279 _.RC:$src2, _.SubRegIdx)),
5280 _.KRC)>;
Craig Topper15d69732018-01-28 00:56:30 +00005281
5282 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5283 (_.KVT (COPY_TO_REGCLASS
5284 (!cast<Instruction>(NAME # Suffix # "Zrr")
5285 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5286 _.RC:$src, _.SubRegIdx),
5287 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5288 _.RC:$src, _.SubRegIdx)),
5289 _.KRC))>;
5290
5291 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5292 (COPY_TO_REGCLASS
5293 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5294 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5295 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5296 _.RC:$src, _.SubRegIdx),
5297 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5298 _.RC:$src, _.SubRegIdx)),
5299 _.KRC)>;
Igor Bregerfca0a342016-01-28 13:19:25 +00005300}
5301
Craig Topper15d69732018-01-28 00:56:30 +00005302multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005303 OpndItins itins, AVX512VLVectorVTInfo _,
5304 string Suffix> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005305 let Predicates = [HasAVX512] in
Craig Topper15d69732018-01-28 00:56:30 +00005306 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005307 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005308
5309 let Predicates = [HasAVX512, HasVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005310 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005311 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005312 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005313 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005314 }
Igor Bregerfca0a342016-01-28 13:19:25 +00005315 let Predicates = [HasAVX512, NoVLX] in {
5316 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5317 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005318 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005319}
5320
Craig Topper15d69732018-01-28 00:56:30 +00005321multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005322 OpndItins itins> {
5323 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005324 avx512vl_i32_info, "D">;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005325 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005326 avx512vl_i64_info, "Q">, VEX_W;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005327}
5328
5329multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005330 PatFrag OpNode, OpndItins itins> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005331 let Predicates = [HasBWI] in {
Craig Topper15d69732018-01-28 00:56:30 +00005332 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005333 EVEX_V512, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005334 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005335 EVEX_V512;
5336 }
5337 let Predicates = [HasVLX, HasBWI] in {
5338
Craig Topper15d69732018-01-28 00:56:30 +00005339 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005340 EVEX_V256, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005341 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005342 EVEX_V128, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005343 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005344 EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005345 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005346 EVEX_V128;
5347 }
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005348
Igor Bregerfca0a342016-01-28 13:19:25 +00005349 let Predicates = [HasAVX512, NoVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005350 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
5351 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
5352 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
5353 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005354 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005355}
5356
5357multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005358 PatFrag OpNode, OpndItins itins> :
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005359 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5360 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005361
Craig Topper15d69732018-01-28 00:56:30 +00005362defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005363 SSE_BIT_ITINS_P>, T8PD;
Craig Topper15d69732018-01-28 00:56:30 +00005364defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005365 SSE_BIT_ITINS_P>, T8XS;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005366
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005367
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005368//===----------------------------------------------------------------------===//
5369// AVX-512 Shift instructions
5370//===----------------------------------------------------------------------===//
5371multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005372 string OpcodeStr, SDNode OpNode, OpndItins itins,
5373 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005374 let ExeDomain = _.ExeDomain in {
Cameron McInally04400442014-11-14 15:43:00 +00005375 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005376 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005377 "$src2, $src1", "$src1, $src2",
5378 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005379 itins.rr>, Sched<[itins.Sched]>;
Cameron McInally04400442014-11-14 15:43:00 +00005380 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005381 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005382 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005383 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5384 (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005385 itins.rm>, Sched<[itins.Sched.Folded]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005386 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005387}
5388
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005389multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005390 string OpcodeStr, SDNode OpNode, OpndItins itins,
5391 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005392 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005393 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5394 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5395 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5396 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005397 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005398}
5399
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005400multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005401 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5402 X86VectorVTInfo _> {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005403 // src2 is always 128-bit
Craig Topper05948fb2016-08-02 05:11:15 +00005404 let ExeDomain = _.ExeDomain in {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005405 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5406 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5407 "$src2, $src1", "$src1, $src2",
5408 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005409 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005410 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5411 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5412 "$src2, $src1", "$src1, $src2",
Craig Topper820d4922015-02-09 04:04:50 +00005413 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005414 itins.rm>, AVX512BIBase,
5415 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005416 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005417}
5418
Cameron McInally5fb084e2014-12-11 17:13:05 +00005419multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005420 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5421 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005422 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005423 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005424 VTInfo.info512>, EVEX_V512,
5425 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5426 let Predicates = [prd, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005427 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005428 VTInfo.info256>, EVEX_V256,
5429 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005430 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005431 VTInfo.info128>, EVEX_V128,
5432 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5433 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005434}
5435
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005436multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005437 string OpcodeStr, SDNode OpNode,
5438 OpndItins itins> {
5439 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5440 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5441 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5442 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5443 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5444 bc_v2i64, avx512vl_i16_info, HasBWI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005445}
5446
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005447multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005448 string OpcodeStr, SDNode OpNode,
5449 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005450 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005451 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005452 VTInfo.info512>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005453 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005454 VTInfo.info512>, EVEX_V512;
5455 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005456 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005457 VTInfo.info256>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005458 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005459 VTInfo.info256>, EVEX_V256;
5460 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005461 itins, VTInfo.info128>,
5462 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005463 VTInfo.info128>, EVEX_V128;
5464 }
5465}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005466
Michael Liao66233b72015-08-06 09:06:20 +00005467multiclass avx512_shift_rmi_w<bits<8> opcw,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005468 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005469 string OpcodeStr, SDNode OpNode,
5470 OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005471 let Predicates = [HasBWI] in
5472 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005473 itins, v32i16_info>, EVEX_V512, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005474 let Predicates = [HasVLX, HasBWI] in {
5475 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005476 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005477 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005478 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005479 }
5480}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005481
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005482multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5483 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005484 string OpcodeStr, SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005485 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005486 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005487 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005488 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005489}
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005490
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005491defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5492 SSE_INTSHIFT_P>,
5493 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5494 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005495
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005496defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5497 SSE_INTSHIFT_P>,
5498 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5499 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005500
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005501defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5502 SSE_INTSHIFT_P>,
5503 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5504 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005505
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005506defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5507 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5508defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5509 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005510
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005511defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5512defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5513defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005514
Simon Pilgrim5910ebe2017-02-20 12:16:38 +00005515// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5516let Predicates = [HasAVX512, NoVLX] in {
5517 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5518 (EXTRACT_SUBREG (v8i64
5519 (VPSRAQZrr
5520 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5521 VR128X:$src2)), sub_ymm)>;
5522
5523 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5524 (EXTRACT_SUBREG (v8i64
5525 (VPSRAQZrr
5526 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5527 VR128X:$src2)), sub_xmm)>;
5528
5529 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5530 (EXTRACT_SUBREG (v8i64
5531 (VPSRAQZri
5532 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5533 imm:$src2)), sub_ymm)>;
5534
5535 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5536 (EXTRACT_SUBREG (v8i64
5537 (VPSRAQZri
5538 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5539 imm:$src2)), sub_xmm)>;
5540}
5541
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005542//===-------------------------------------------------------------------===//
5543// Variable Bit Shifts
5544//===-------------------------------------------------------------------===//
5545multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005546 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005547 let ExeDomain = _.ExeDomain in {
Cameron McInally5fb084e2014-12-11 17:13:05 +00005548 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5549 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5550 "$src2, $src1", "$src1, $src2",
5551 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005552 itins.rr>, AVX5128IBase, EVEX_4V,
5553 Sched<[itins.Sched]>;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005554 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5555 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5556 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005557 (_.VT (OpNode _.RC:$src1,
5558 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005559 itins.rm>, AVX5128IBase, EVEX_4V,
5560 EVEX_CD8<_.EltSize, CD8VF>,
5561 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005562 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005563}
5564
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005565multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005566 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005567 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005568 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5569 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5570 "${src2}"##_.BroadcastStr##", $src1",
5571 "$src1, ${src2}"##_.BroadcastStr,
5572 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5573 (_.ScalarLdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005574 itins.rm>, AVX5128IBase, EVEX_B,
5575 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5576 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005577}
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005578
Cameron McInally5fb084e2014-12-11 17:13:05 +00005579multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005580 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005581 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005582 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5583 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005584
5585 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005586 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5587 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5588 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5589 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005590 }
Cameron McInally5fb084e2014-12-11 17:13:05 +00005591}
5592
5593multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005594 SDNode OpNode, OpndItins itins> {
5595 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005596 avx512vl_i32_info>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005597 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005598 avx512vl_i64_info>, VEX_W;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005599}
5600
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005601// Use 512bit version to implement 128/256 bit in case NoVLX.
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005602multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5603 SDNode OpNode, list<Predicate> p> {
5604 let Predicates = p in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005605 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005606 (_.info256.VT _.info256.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005607 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005608 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005609 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5610 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5611 sub_ymm)>;
5612
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005613 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005614 (_.info128.VT _.info128.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005615 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005616 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005617 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5618 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5619 sub_xmm)>;
5620 }
5621}
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005622multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005623 SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005624 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005625 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005626 EVEX_V512, VEX_W;
5627 let Predicates = [HasVLX, HasBWI] in {
5628
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005629 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005630 EVEX_V256, VEX_W;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005631 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005632 EVEX_V128, VEX_W;
5633 }
5634}
5635
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005636defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5637 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005638
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005639defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5640 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005641
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005642defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5643 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005644
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005645defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5646defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005647
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005648defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5649defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5650defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5651defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5652
Craig Topper05629d02016-07-24 07:32:45 +00005653// Special handing for handling VPSRAV intrinsics.
5654multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5655 list<Predicate> p> {
5656 let Predicates = p in {
5657 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5658 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5659 _.RC:$src2)>;
5660 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5661 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5662 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005663 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5664 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5665 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5666 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5667 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5668 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5669 _.RC:$src0)),
5670 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5671 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005672 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5673 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5674 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5675 _.RC:$src1, _.RC:$src2)>;
5676 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5677 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5678 _.ImmAllZerosV)),
5679 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5680 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005681 }
5682}
5683
5684multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5685 list<Predicate> p> :
5686 avx512_var_shift_int_lowering<InstrStr, _, p> {
5687 let Predicates = p in {
5688 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5689 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5690 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5691 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005692 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5693 (X86vsrav _.RC:$src1,
5694 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5695 _.RC:$src0)),
5696 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5697 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005698 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5699 (X86vsrav _.RC:$src1,
5700 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5701 _.ImmAllZerosV)),
5702 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5703 _.RC:$src1, addr:$src2)>;
5704 }
5705}
5706
5707defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5708defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5709defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5710defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5711defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5712defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5713defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5714defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5715defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5716
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005717
5718// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5719let Predicates = [HasAVX512, NoVLX] in {
5720 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5721 (EXTRACT_SUBREG (v8i64
5722 (VPROLVQZrr
5723 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005724 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005725 sub_xmm)>;
5726 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5727 (EXTRACT_SUBREG (v8i64
5728 (VPROLVQZrr
5729 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005730 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005731 sub_ymm)>;
5732
5733 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5734 (EXTRACT_SUBREG (v16i32
5735 (VPROLVDZrr
5736 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005737 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005738 sub_xmm)>;
5739 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5740 (EXTRACT_SUBREG (v16i32
5741 (VPROLVDZrr
5742 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005743 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005744 sub_ymm)>;
5745
5746 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5747 (EXTRACT_SUBREG (v8i64
5748 (VPROLQZri
5749 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5750 imm:$src2)), sub_xmm)>;
5751 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5752 (EXTRACT_SUBREG (v8i64
5753 (VPROLQZri
5754 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5755 imm:$src2)), sub_ymm)>;
5756
5757 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5758 (EXTRACT_SUBREG (v16i32
5759 (VPROLDZri
5760 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5761 imm:$src2)), sub_xmm)>;
5762 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5763 (EXTRACT_SUBREG (v16i32
5764 (VPROLDZri
5765 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5766 imm:$src2)), sub_ymm)>;
5767}
5768
5769// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5770let Predicates = [HasAVX512, NoVLX] in {
5771 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5772 (EXTRACT_SUBREG (v8i64
5773 (VPRORVQZrr
5774 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005775 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005776 sub_xmm)>;
5777 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5778 (EXTRACT_SUBREG (v8i64
5779 (VPRORVQZrr
5780 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005781 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005782 sub_ymm)>;
5783
5784 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5785 (EXTRACT_SUBREG (v16i32
5786 (VPRORVDZrr
5787 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005788 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005789 sub_xmm)>;
5790 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5791 (EXTRACT_SUBREG (v16i32
5792 (VPRORVDZrr
5793 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005794 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005795 sub_ymm)>;
5796
5797 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5798 (EXTRACT_SUBREG (v8i64
5799 (VPRORQZri
5800 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5801 imm:$src2)), sub_xmm)>;
5802 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5803 (EXTRACT_SUBREG (v8i64
5804 (VPRORQZri
5805 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5806 imm:$src2)), sub_ymm)>;
5807
5808 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5809 (EXTRACT_SUBREG (v16i32
5810 (VPRORDZri
5811 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5812 imm:$src2)), sub_xmm)>;
5813 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5814 (EXTRACT_SUBREG (v16i32
5815 (VPRORDZri
5816 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5817 imm:$src2)), sub_ymm)>;
5818}
5819
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005820//===-------------------------------------------------------------------===//
5821// 1-src variable permutation VPERMW/D/Q
5822//===-------------------------------------------------------------------===//
5823multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005824 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005825 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005826 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5827 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005828
5829 let Predicates = [HasAVX512, HasVLX] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005830 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5831 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005832}
5833
5834multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5835 string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005836 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005837 let Predicates = [HasAVX512] in
5838 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005839 itins, VTInfo.info512>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005840 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005841 itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005842 let Predicates = [HasAVX512, HasVLX] in
5843 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005844 itins, VTInfo.info256>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005845 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005846 itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005847}
5848
Michael Zuckermand9cac592016-01-19 17:07:43 +00005849multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5850 Predicate prd, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005851 OpndItins itins, AVX512VLVectorVTInfo _> {
Michael Zuckermand9cac592016-01-19 17:07:43 +00005852 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005853 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005854 EVEX_V512 ;
5855 let Predicates = [HasVLX, prd] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005856 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005857 EVEX_V256 ;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005858 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005859 EVEX_V128 ;
5860 }
5861}
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005862
Michael Zuckermand9cac592016-01-19 17:07:43 +00005863defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005864 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
Michael Zuckermand9cac592016-01-19 17:07:43 +00005865defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005866 AVX2_PERMV_I, avx512vl_i8_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005867
5868defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005869 AVX2_PERMV_I, avx512vl_i32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005870defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005871 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005872defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005873 AVX2_PERMV_F, avx512vl_f32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005874defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005875 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005876
5877defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005878 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005879 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5880defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005881 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005882 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger78741a12015-10-04 07:20:41 +00005883//===----------------------------------------------------------------------===//
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005884// AVX-512 - VPERMIL
Igor Breger78741a12015-10-04 07:20:41 +00005885//===----------------------------------------------------------------------===//
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005886
Simon Pilgrim1401a752017-11-29 14:58:34 +00005887multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5888 OpndItins itins, X86VectorVTInfo _,
5889 X86VectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005890 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5891 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5892 "$src2, $src1", "$src1, $src2",
5893 (_.VT (OpNode _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005894 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5895 T8PD, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005896 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5897 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5898 "$src2, $src1", "$src1, $src2",
5899 (_.VT (OpNode
5900 _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005901 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5902 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5903 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005904 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5905 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5906 "${src2}"##_.BroadcastStr##", $src1",
5907 "$src1, ${src2}"##_.BroadcastStr,
5908 (_.VT (OpNode
5909 _.RC:$src1,
5910 (Ctrl.VT (X86VBroadcast
Simon Pilgrim1401a752017-11-29 14:58:34 +00005911 (Ctrl.ScalarLdFrag addr:$src2))))),
5912 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5913 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger78741a12015-10-04 07:20:41 +00005914}
5915
5916multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
Simon Pilgrim1401a752017-11-29 14:58:34 +00005917 OpndItins itins, AVX512VLVectorVTInfo _,
5918 AVX512VLVectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00005919 let Predicates = [HasAVX512] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00005920 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5921 _.info512, Ctrl.info512>, EVEX_V512;
Igor Breger78741a12015-10-04 07:20:41 +00005922 }
5923 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00005924 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5925 _.info128, Ctrl.info128>, EVEX_V128;
5926 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5927 _.info256, Ctrl.info256>, EVEX_V256;
Igor Breger78741a12015-10-04 07:20:41 +00005928 }
5929}
5930
5931multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5932 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
Simon Pilgrim1401a752017-11-29 14:58:34 +00005933 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
Igor Breger78741a12015-10-04 07:20:41 +00005934 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005935 X86VPermilpi, AVX_VPERMILV, _>,
Igor Breger78741a12015-10-04 07:20:41 +00005936 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
Igor Breger78741a12015-10-04 07:20:41 +00005937}
5938
Craig Topper05948fb2016-08-02 05:11:15 +00005939let ExeDomain = SSEPackedSingle in
Igor Breger78741a12015-10-04 07:20:41 +00005940defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5941 avx512vl_i32_info>;
Craig Topper05948fb2016-08-02 05:11:15 +00005942let ExeDomain = SSEPackedDouble in
Igor Breger78741a12015-10-04 07:20:41 +00005943defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5944 avx512vl_i64_info>, VEX_W;
Simon Pilgrim1401a752017-11-29 14:58:34 +00005945
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005946//===----------------------------------------------------------------------===//
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005947// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5948//===----------------------------------------------------------------------===//
5949
5950defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005951 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005952 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5953defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005954 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005955defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005956 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
Michael Liao66233b72015-08-06 09:06:20 +00005957
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005958multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5959 OpndItins itins> {
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005960 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005961 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005962
5963 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005964 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
5965 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005966 }
5967}
5968
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005969defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00005970
Elena Demikhovsky75ede682015-06-01 07:17:23 +00005971//===----------------------------------------------------------------------===//
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00005972// Move Low to High and High to Low packed FP Instructions
5973//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005974def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5975 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00005976 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005977 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5978 IIC_SSE_MOV_LH>, EVEX_4V;
5979def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5980 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00005981 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005982 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5983 IIC_SSE_MOV_LH>, EVEX_4V;
5984
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005985//===----------------------------------------------------------------------===//
Igor Bregerb6b27af2015-11-10 07:09:07 +00005986// VMOVHPS/PD VMOVLPS Instructions
5987// All patterns was taken from SSS implementation.
5988//===----------------------------------------------------------------------===//
5989multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5990 X86VectorVTInfo _> {
Craig Toppere70231b2017-02-26 06:45:54 +00005991 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00005992 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5993 (ins _.RC:$src1, f64mem:$src2),
5994 !strconcat(OpcodeStr,
5995 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5996 [(set _.RC:$dst,
5997 (OpNode _.RC:$src1,
5998 (_.VT (bitconvert
5999 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
6000 IIC_SSE_MOV_LH>, EVEX_4V;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006001}
6002
6003defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
6004 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
Craig Topper3b11fca2017-09-18 00:20:53 +00006005defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006006 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6007defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
6008 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6009defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
6010 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6011
6012let Predicates = [HasAVX512] in {
6013 // VMOVHPS patterns
6014 def : Pat<(X86Movlhps VR128X:$src1,
6015 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
6016 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6017 def : Pat<(X86Movlhps VR128X:$src1,
Craig Topper0a197df2017-09-17 18:59:32 +00006018 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006019 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6020 // VMOVHPD patterns
6021 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006022 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6023 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6024 // VMOVLPS patterns
6025 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
6026 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006027 // VMOVLPD patterns
6028 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6029 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006030 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6031 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6032 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6033}
6034
Igor Bregerb6b27af2015-11-10 07:09:07 +00006035def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6036 (ins f64mem:$dst, VR128X:$src),
6037 "vmovhps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006038 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006039 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6040 (bc_v2f64 (v4f32 VR128X:$src))),
6041 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6042 EVEX, EVEX_CD8<32, CD8VT2>;
6043def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6044 (ins f64mem:$dst, VR128X:$src),
6045 "vmovhpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006046 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006047 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6048 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6049 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6050def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6051 (ins f64mem:$dst, VR128X:$src),
6052 "vmovlps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006053 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006054 (iPTR 0))), addr:$dst)],
6055 IIC_SSE_MOV_LH>,
6056 EVEX, EVEX_CD8<32, CD8VT2>;
6057def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6058 (ins f64mem:$dst, VR128X:$src),
6059 "vmovlpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006060 [(store (f64 (extractelt (v2f64 VR128X:$src),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006061 (iPTR 0))), addr:$dst)],
6062 IIC_SSE_MOV_LH>,
6063 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
Craig Toppere1cac152016-06-07 07:27:54 +00006064
Igor Bregerb6b27af2015-11-10 07:09:07 +00006065let Predicates = [HasAVX512] in {
6066 // VMOVHPD patterns
Craig Topperc9b19232016-05-01 04:59:44 +00006067 def : Pat<(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006068 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6069 (iPTR 0))), addr:$dst),
6070 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6071 // VMOVLPS patterns
6072 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6073 addr:$src1),
6074 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006075 // VMOVLPD patterns
6076 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6077 addr:$src1),
6078 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006079}
6080//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006081// FMA - Fused Multiply Operations
6082//
Adam Nemet26371ce2014-10-24 00:02:55 +00006083
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006084multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006085 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006086 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Adam Nemet34801422014-10-08 23:25:39 +00006087 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Adam Nemet6bddb8c2014-09-29 22:54:41 +00006088 (ins _.RC:$src2, _.RC:$src3),
Adam Nemet2e91ee52014-08-14 17:13:19 +00006089 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006090 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006091 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006092
Craig Toppere1cac152016-06-07 07:27:54 +00006093 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6094 (ins _.RC:$src2, _.MemOp:$src3),
6095 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006096 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
Craig Topper468a8132017-12-12 07:06:35 +00006097 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006098
Craig Toppere1cac152016-06-07 07:27:54 +00006099 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6100 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6101 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6102 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper6bcbf532016-07-25 07:20:28 +00006103 (OpNode _.RC:$src2,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006104 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6105 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006106 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006107 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006108}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006109
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006110multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006111 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006112 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006113 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006114 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6115 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006116 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6117 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006118}
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006119
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006120multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006121 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6122 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006123 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006124 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6125 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6126 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006127 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006128 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006129 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006130 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006131 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006132 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006133 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006134}
6135
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006136multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006137 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006138 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006139 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006140 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006141 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006142}
6143
Craig Topperaf0b9922017-09-04 06:59:50 +00006144defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006145defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6146defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6147defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6148defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6149defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6150
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006151
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006152multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006153 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006154 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006155 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6156 (ins _.RC:$src2, _.RC:$src3),
6157 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006158 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6159 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006160
Craig Toppere1cac152016-06-07 07:27:54 +00006161 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6162 (ins _.RC:$src2, _.MemOp:$src3),
6163 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006164 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
Craig Topper468a8132017-12-12 07:06:35 +00006165 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006166
Craig Toppere1cac152016-06-07 07:27:54 +00006167 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6168 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6169 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6170 "$src2, ${src3}"##_.BroadcastStr,
6171 (_.VT (OpNode _.RC:$src2,
6172 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006173 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006174 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006175 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006176}
6177
6178multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006179 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006180 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006181 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6182 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6183 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006184 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6185 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006186 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006187}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006188
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006189multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006190 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6191 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006192 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006193 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6194 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6195 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006196 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006197 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006198 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006199 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006200 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006201 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006202 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006203}
6204
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006205multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006206 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006207 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006208 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006209 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006210 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006211}
6212
Craig Topperaf0b9922017-09-04 06:59:50 +00006213defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006214defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6215defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6216defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6217defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6218defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6219
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006220multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006221 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006222 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006223 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006224 (ins _.RC:$src2, _.RC:$src3),
6225 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006226 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6227 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006228
Craig Topper69e22782017-09-04 07:35:05 +00006229 // Pattern is 312 order so that the load is in a different place from the
6230 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006231 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006232 (ins _.RC:$src2, _.MemOp:$src3),
6233 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006234 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
Craig Topper468a8132017-12-12 07:06:35 +00006235 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006236
Craig Topper69e22782017-09-04 07:35:05 +00006237 // Pattern is 312 order so that the load is in a different place from the
6238 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006239 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006240 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6241 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6242 "$src2, ${src3}"##_.BroadcastStr,
Craig Topper69e22782017-09-04 07:35:05 +00006243 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006244 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
Craig Topper468a8132017-12-12 07:06:35 +00006245 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006246 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006247}
6248
6249multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006250 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006251 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006252 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006253 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6254 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006255 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6256 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006257 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006258}
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006259
6260multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006261 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6262 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006263 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006264 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6265 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6266 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006267 }
6268 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006269 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006270 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006271 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006272 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6273 }
6274}
6275
6276multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006277 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006278 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006279 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006280 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006281 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006282}
6283
Craig Topperaf0b9922017-09-04 06:59:50 +00006284defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006285defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6286defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6287defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6288defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6289defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006290
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006291// Scalar FMA
Igor Breger15820b02015-07-01 13:24:28 +00006292multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6293 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
Craig Topper69e22782017-09-04 07:35:05 +00006294 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
Craig Topperb16598d2017-09-01 07:58:16 +00006295let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
Igor Breger15820b02015-07-01 13:24:28 +00006296 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6297 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006298 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6299 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006300
Craig Toppere1cac152016-06-07 07:27:54 +00006301 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00006302 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006303 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
Craig Topper468a8132017-12-12 07:06:35 +00006304 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006305
6306 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6307 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006308 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6309 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
Craig Toppera2f55282017-12-10 03:16:36 +00006310 Sched<[WriteFMA]>;
Igor Breger15820b02015-07-01 13:24:28 +00006311
Craig Toppereafdbec2016-08-13 06:48:41 +00006312 let isCodeGenOnly = 1, isCommutable = 1 in {
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006313 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
Igor Breger15820b02015-07-01 13:24:28 +00006314 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6315 !strconcat(OpcodeStr,
6316 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Simon Pilgrim97160be2017-11-27 10:41:32 +00006317 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006318 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00006319 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6320 !strconcat(OpcodeStr,
6321 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Craig Topper468a8132017-12-12 07:06:35 +00006322 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006323 }// isCodeGenOnly = 1
Igor Breger15820b02015-07-01 13:24:28 +00006324}// Constraints = "$src1 = $dst"
Craig Topperb16598d2017-09-01 07:58:16 +00006325}
Igor Breger15820b02015-07-01 13:24:28 +00006326
6327multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006328 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6329 SDNode OpNodeRnds1, SDNode OpNodes3,
6330 SDNode OpNodeRnds3, X86VectorVTInfo _,
6331 string SUFF> {
Craig Topper2caa97c2017-02-25 19:36:28 +00006332 let ExeDomain = _.ExeDomain in {
Craig Topperb16598d2017-09-01 07:58:16 +00006333 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
Craig Toppera55b4832016-12-09 06:42:28 +00006334 // Operands for intrinsic are in 123 order to preserve passthu
6335 // semantics.
Craig Topper07dac552017-11-06 05:48:25 +00006336 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6337 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6338 _.ScalarIntMemCPat:$src3)),
Craig Toppera55b4832016-12-09 06:42:28 +00006339 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
Igor Breger15820b02015-07-01 13:24:28 +00006340 (i32 imm:$rc))),
6341 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6342 _.FRC:$src3))),
6343 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
Craig Topper69e22782017-09-04 07:35:05 +00006344 (_.ScalarLdFrag addr:$src3)))), 0>;
Igor Breger15820b02015-07-01 13:24:28 +00006345
Craig Topperb16598d2017-09-01 07:58:16 +00006346 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
Craig Topper07dac552017-11-06 05:48:25 +00006347 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6348 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6349 _.RC:$src1)),
Craig Toppera55b4832016-12-09 06:42:28 +00006350 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
Igor Breger15820b02015-07-01 13:24:28 +00006351 (i32 imm:$rc))),
6352 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6353 _.FRC:$src1))),
6354 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
Craig Topper69e22782017-09-04 07:35:05 +00006355 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
Igor Breger15820b02015-07-01 13:24:28 +00006356
Craig Toppereec768b2017-09-06 03:35:58 +00006357 // One pattern is 312 order so that the load is in a different place from the
6358 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Topperb16598d2017-09-01 07:58:16 +00006359 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
Craig Topper69e22782017-09-04 07:35:05 +00006360 (null_frag),
Craig Topper07dac552017-11-06 05:48:25 +00006361 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6362 _.RC:$src2)),
Craig Topper69e22782017-09-04 07:35:05 +00006363 (null_frag),
Igor Breger15820b02015-07-01 13:24:28 +00006364 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6365 _.FRC:$src2))),
Craig Toppereec768b2017-09-06 03:35:58 +00006366 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6367 _.FRC:$src1, _.FRC:$src2))), 1>;
Craig Topper2caa97c2017-02-25 19:36:28 +00006368 }
Igor Breger15820b02015-07-01 13:24:28 +00006369}
6370
6371multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006372 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6373 SDNode OpNodeRnds1, SDNode OpNodes3,
Craig Toppera55b4832016-12-09 06:42:28 +00006374 SDNode OpNodeRnds3> {
Igor Breger15820b02015-07-01 13:24:28 +00006375 let Predicates = [HasAVX512] in {
6376 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006377 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6378 f32x_info, "SS">,
Craig Toppera55b4832016-12-09 06:42:28 +00006379 EVEX_CD8<32, CD8VT1>, VEX_LIG;
Igor Breger15820b02015-07-01 13:24:28 +00006380 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006381 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6382 f64x_info, "SD">,
Craig Toppera55b4832016-12-09 06:42:28 +00006383 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
Igor Breger15820b02015-07-01 13:24:28 +00006384 }
6385}
6386
Craig Topper07dac552017-11-06 05:48:25 +00006387defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6388 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6389defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6390 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6391defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6392 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6393defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6394 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006395
6396//===----------------------------------------------------------------------===//
Asaf Badouh655822a2016-01-25 11:14:24 +00006397// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6398//===----------------------------------------------------------------------===//
6399let Constraints = "$src1 = $dst" in {
6400multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006401 OpndItins itins, X86VectorVTInfo _> {
Craig Topper47e14ea2017-09-24 19:30:55 +00006402 // NOTE: The SDNode have the multiply operands first with the add last.
6403 // This enables commuted load patterns to be autogenerated by tablegen.
Craig Topper6bf9b802017-02-26 06:45:45 +00006404 let ExeDomain = _.ExeDomain in {
Asaf Badouh655822a2016-01-25 11:14:24 +00006405 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6406 (ins _.RC:$src2, _.RC:$src3),
6407 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006408 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6409 AVX512FMA3Base, Sched<[itins.Sched]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006410
Craig Toppere1cac152016-06-07 07:27:54 +00006411 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6412 (ins _.RC:$src2, _.MemOp:$src3),
6413 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006414 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6415 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006416
Craig Toppere1cac152016-06-07 07:27:54 +00006417 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6418 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6419 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6420 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper47e14ea2017-09-24 19:30:55 +00006421 (OpNode _.RC:$src2,
6422 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006423 _.RC:$src1), itins.rm>,
6424 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper6bf9b802017-02-26 06:45:45 +00006425 }
Asaf Badouh655822a2016-01-25 11:14:24 +00006426}
6427} // Constraints = "$src1 = $dst"
6428
6429multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006430 OpndItins itins, AVX512VLVectorVTInfo _> {
Asaf Badouh655822a2016-01-25 11:14:24 +00006431 let Predicates = [HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006432 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006433 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6434 }
6435 let Predicates = [HasVLX, HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006436 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006437 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006438 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006439 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6440 }
6441}
6442
6443defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006444 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006445defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006446 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006447
6448//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006449// AVX-512 Scalar convert from sign integer to float/double
6450//===----------------------------------------------------------------------===//
6451
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006452multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6453 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6454 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006455 let hasSideEffects = 0 in {
6456 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6457 (ins DstVT.FRC:$src1, SrcRC:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006458 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6459 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006460 let mayLoad = 1 in
6461 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6462 (ins DstVT.FRC:$src1, x86memop:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006463 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6464 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006465 } // hasSideEffects = 0
6466 let isCodeGenOnly = 1 in {
6467 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6468 (ins DstVT.RC:$src1, SrcRC:$src2),
6469 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6470 [(set DstVT.RC:$dst,
6471 (OpNode (DstVT.VT DstVT.RC:$src1),
6472 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006473 (i32 FROUND_CURRENT)))], itins.rr>,
6474 EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006475
6476 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6477 (ins DstVT.RC:$src1, x86memop:$src2),
6478 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6479 [(set DstVT.RC:$dst,
6480 (OpNode (DstVT.VT DstVT.RC:$src1),
6481 (ld_frag addr:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006482 (i32 FROUND_CURRENT)))], itins.rm>,
6483 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006484 }//isCodeGenOnly = 1
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006485}
Elena Demikhovskyd8fda622015-03-30 09:29:28 +00006486
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006487multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6488 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
Igor Bregerabe4a792015-06-14 12:44:55 +00006489 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6490 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006491 !strconcat(asm,
6492 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
Igor Bregerabe4a792015-06-14 12:44:55 +00006493 [(set DstVT.RC:$dst,
6494 (OpNode (DstVT.VT DstVT.RC:$src1),
6495 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006496 (i32 imm:$rc)))], itins.rr>,
6497 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Bregerabe4a792015-06-14 12:44:55 +00006498}
6499
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006500multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6501 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6502 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6503 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6504 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6505 ld_frag, asm>, VEX_LIG;
Igor Bregerabe4a792015-06-14 12:44:55 +00006506}
6507
Andrew Trick15a47742013-10-09 05:11:10 +00006508let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006509defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006510 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6511 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006512defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006513 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6514 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006515defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006516 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6517 XD, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006518defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006519 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6520 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006521
Craig Topper8f85ad12016-11-14 02:46:58 +00006522def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6523 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6524def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6525 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6526
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006527def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6528 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6529def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006530 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006531def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6532 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6533def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006534 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006535
6536def : Pat<(f32 (sint_to_fp GR32:$src)),
6537 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6538def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006539 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006540def : Pat<(f64 (sint_to_fp GR32:$src)),
6541 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6542def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006543 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6544
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006545defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006546 v4f32x_info, i32mem, loadi32,
6547 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006548defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006549 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6550 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006551defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006552 i32mem, loadi32, "cvtusi2sd{l}">,
6553 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006554defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006555 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6556 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006557
Craig Topper8f85ad12016-11-14 02:46:58 +00006558def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6559 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6560def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6561 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6562
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006563def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6564 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6565def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6566 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6567def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6568 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6569def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6570 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6571
6572def : Pat<(f32 (uint_to_fp GR32:$src)),
6573 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6574def : Pat<(f32 (uint_to_fp GR64:$src)),
6575 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6576def : Pat<(f64 (uint_to_fp GR32:$src)),
6577 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6578def : Pat<(f64 (uint_to_fp GR64:$src)),
6579 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00006580}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006581
6582//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006583// AVX-512 Scalar convert from float/double to integer
6584//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006585
6586multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6587 X86VectorVTInfo DstVT, SDNode OpNode,
Craig Toppera49c3542018-01-06 19:20:33 +00006588 OpndItins itins, string asm,
6589 string aliasStr,
6590 bit CodeGenOnly = 1> {
Craig Toppere1cac152016-06-07 07:27:54 +00006591 let Predicates = [HasAVX512] in {
Craig Toppera0be5a02017-12-10 19:47:56 +00006592 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006593 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006594 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6595 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
Craig Toppera0be5a02017-12-10 19:47:56 +00006596 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
Craig Topper1de942b2017-12-10 17:42:44 +00006597 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6598 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6599 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6600 Sched<[itins.Sched]>;
Craig Toppera49c3542018-01-06 19:20:33 +00006601 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Toppera0be5a02017-12-10 19:47:56 +00006602 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006603 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006604 [(set DstVT.RC:$dst, (OpNode
Craig Topper5a63ca22017-03-13 03:59:06 +00006605 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006606 (i32 FROUND_CURRENT)))], itins.rm>,
6607 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere2659d82018-01-05 23:13:54 +00006608
6609 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6610 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>;
6611 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
6612 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0>;
Craig Toppera49c3542018-01-06 19:20:33 +00006613 } // Predicates = [HasAVX512]
6614}
6615
6616multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
6617 X86VectorVTInfo DstVT, SDNode OpNode,
6618 OpndItins itins, string asm,
6619 string aliasStr> :
6620 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, itins, asm, aliasStr, 0> {
6621 let Predicates = [HasAVX512] in {
Craig Toppere2659d82018-01-05 23:13:54 +00006622 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6623 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
6624 SrcVT.IntScalarMemOp:$src), 0>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006625 } // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006626}
Asaf Badouh2744d212015-09-20 14:31:19 +00006627
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006628// Convert float/double to signed/unsigned int 32/64
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006629defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006630 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006631 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006632defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006633 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006634 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006635defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006636 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006637 XS, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006638defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006639 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006640 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006641defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006642 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006643 XD, EVEX_CD8<64, CD8VT1>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006644defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006645 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006646 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006647defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006648 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006649 XD, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006650defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006651 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006652 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006653
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006654// The SSE version of these instructions are disabled for AVX512.
6655// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6656let Predicates = [HasAVX512] in {
6657 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006658 (VCVTSS2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006659 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006660 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006661 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006662 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006663 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006664 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006665 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006666 (VCVTSD2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006667 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006668 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006669 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006670 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006671 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006672 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006673} // HasAVX512
6674
Craig Topperac941b92016-09-25 16:33:53 +00006675let Predicates = [HasAVX512] in {
6676 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6677 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6678 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6679 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6680 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6681 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6682 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6683 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6684 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6685 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6686 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6687 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6688 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6689 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6690 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6691 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6692 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6693 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6694 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6695 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6696} // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006697
Elad Cohen0c260102017-01-11 09:11:48 +00006698// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6699// which produce unnecessary vmovs{s,d} instructions
6700let Predicates = [HasAVX512] in {
6701def : Pat<(v4f32 (X86Movss
6702 (v4f32 VR128X:$dst),
6703 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6704 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6705
6706def : Pat<(v4f32 (X86Movss
6707 (v4f32 VR128X:$dst),
6708 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6709 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6710
6711def : Pat<(v2f64 (X86Movsd
6712 (v2f64 VR128X:$dst),
6713 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6714 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6715
6716def : Pat<(v2f64 (X86Movsd
6717 (v2f64 VR128X:$dst),
6718 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6719 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6720} // Predicates = [HasAVX512]
6721
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006722// Convert float/double to signed/unsigned int 32/64 with truncation
Simon Pilgrim18bcf932016-02-03 09:41:59 +00006723multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6724 X86VectorVTInfo _DstRC, SDNode OpNode,
Craig Topper61d8a602018-01-06 21:27:25 +00006725 SDNode OpNodeRnd, OpndItins itins, string aliasStr,
6726 bit CodeGenOnly = 1>{
Asaf Badouh2744d212015-09-20 14:31:19 +00006727let Predicates = [HasAVX512] in {
Craig Topper90353a92018-01-06 21:02:22 +00006728 let isCodeGenOnly = 1 in {
Igor Bregerc59b3a22016-08-03 10:58:05 +00006729 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006730 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006731 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6732 EVEX, Sched<[itins.Sched]>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006733 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006734 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006735 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6736 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper90353a92018-01-06 21:02:22 +00006737 }
6738
6739 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6740 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6741 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6742 (i32 FROUND_CURRENT)))], itins.rr>,
6743 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6744 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6745 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6746 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6747 (i32 FROUND_NO_EXC)))], itins.rr>,
6748 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
Craig Topper61d8a602018-01-06 21:27:25 +00006749 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Topper0f4ccb72018-01-06 21:02:26 +00006750 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6751 (ins _SrcRC.IntScalarMemOp:$src),
6752 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6753 [(set _DstRC.RC:$dst, (OpNodeRnd
6754 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
6755 (i32 FROUND_CURRENT)))], itins.rm>,
6756 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Simon Pilgrim916485c2016-08-18 11:22:22 +00006757
Igor Bregerc59b3a22016-08-03 10:58:05 +00006758 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
Craig Topper90353a92018-01-06 21:02:22 +00006759 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Craig Toppere2659d82018-01-05 23:13:54 +00006760 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
Craig Topper90353a92018-01-06 21:02:22 +00006761 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006762} //HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006763}
6764
Craig Topper61d8a602018-01-06 21:27:25 +00006765multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
6766 X86VectorVTInfo _SrcRC,
6767 X86VectorVTInfo _DstRC, SDNode OpNode,
6768 SDNode OpNodeRnd, OpndItins itins,
6769 string aliasStr> :
6770 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, itins,
6771 aliasStr, 0> {
6772let Predicates = [HasAVX512] in {
6773 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6774 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
6775 _SrcRC.IntScalarMemOp:$src), 0>;
6776}
6777}
Asaf Badouh2744d212015-09-20 14:31:19 +00006778
Igor Bregerc59b3a22016-08-03 10:58:05 +00006779defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006780 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006781 XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006782defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006783 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006784 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006785defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006786 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006787 XD, EVEX_CD8<64, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006788defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006789 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006790 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6791
Craig Topper61d8a602018-01-06 21:27:25 +00006792defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006793 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006794 XS, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006795defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006796 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006797 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006798defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006799 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006800 XD, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006801defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006802 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006803 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006804
Asaf Badouh2744d212015-09-20 14:31:19 +00006805let Predicates = [HasAVX512] in {
6806 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006807 (VCVTTSS2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006808 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6809 (VCVTTSS2SIZrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006810 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006811 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006812 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6813 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006814 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006815 (VCVTTSD2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006816 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6817 (VCVTTSD2SIZrm_Int sdmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006818 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006819 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006820 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6821 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00006822} // HasAVX512
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006823
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006824//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006825// AVX-512 Convert form float to double and back
6826//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006827
Asaf Badouh2744d212015-09-20 14:31:19 +00006828multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006829 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006830 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006831 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006832 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006833 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Toppera02e3942016-09-23 06:24:43 +00006834 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006835 (i32 FROUND_CURRENT))), itins.rr>,
6836 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006837 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper08b413a2017-03-13 05:14:44 +00006838 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006839 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006840 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Topper08b413a2017-03-13 05:14:44 +00006841 (_Src.VT _Src.ScalarIntMemCPat:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006842 (i32 FROUND_CURRENT))), itins.rm>,
6843 EVEX_4V, VEX_LIG,
6844 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006845
Craig Topperd2011e32017-02-25 18:43:42 +00006846 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6847 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6848 (ins _.FRC:$src1, _Src.FRC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006849 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6850 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006851 let mayLoad = 1 in
6852 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6853 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006854 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6855 itins.rm>, EVEX_4V, VEX_LIG,
6856 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006857 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006858}
6859
Asaf Badouh2744d212015-09-20 14:31:19 +00006860// Scalar Coversion with SAE - suppress all exceptions
6861multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006862 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006863 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006864 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006865 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Toppera58abd12016-05-09 05:34:12 +00006866 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Asaf Badouh2744d212015-09-20 14:31:19 +00006867 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006868 (i32 FROUND_NO_EXC))), itins.rr>,
6869 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006870}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006871
Asaf Badouh2744d212015-09-20 14:31:19 +00006872// Scalar Conversion with rounding control (RC)
6873multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006874 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006875 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006876 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006877 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Toppera58abd12016-05-09 05:34:12 +00006878 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006879 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
Craig Toppera2f55282017-12-10 03:16:36 +00006880 itins.rr>,
6881 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006882 EVEX_B, EVEX_RC;
6883}
Craig Toppera02e3942016-09-23 06:24:43 +00006884multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006885 SDNode OpNodeRnd, OpndItins itins,
6886 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006887 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006888 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006889 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006890 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
Asaf Badouh2744d212015-09-20 14:31:19 +00006891 }
6892}
6893
Craig Toppera02e3942016-09-23 06:24:43 +00006894multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006895 SDNode OpNodeRnd, OpndItins itins,
6896 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00006897 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006898 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6899 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Michael Zuckerman4b88a772016-12-18 14:29:00 +00006900 EVEX_CD8<32, CD8VT1>, XS;
Asaf Badouh2744d212015-09-20 14:31:19 +00006901 }
6902}
Craig Toppera02e3942016-09-23 06:24:43 +00006903defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006904 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6905 f32x_info>, NotMemoryFoldable;
Craig Toppera02e3942016-09-23 06:24:43 +00006906defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006907 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6908 f64x_info>, NotMemoryFoldable;
Asaf Badouh2744d212015-09-20 14:31:19 +00006909
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006910def : Pat<(f64 (fpextend FR32X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00006911 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006912 Requires<[HasAVX512]>;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006913def : Pat<(f64 (fpextend (loadf32 addr:$src))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006914 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006915 Requires<[HasAVX512]>;
6916
6917def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006918 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006919 Requires<[HasAVX512, OptForSize]>;
6920
Asaf Badouh2744d212015-09-20 14:31:19 +00006921def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006922 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Asaf Badouh2744d212015-09-20 14:31:19 +00006923 Requires<[HasAVX512, OptForSpeed]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006924
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00006925def : Pat<(f32 (fpround FR64X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00006926 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006927 Requires<[HasAVX512]>;
Elad Cohen0c260102017-01-11 09:11:48 +00006928
6929def : Pat<(v4f32 (X86Movss
6930 (v4f32 VR128X:$dst),
6931 (v4f32 (scalar_to_vector
6932 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006933 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00006934 Requires<[HasAVX512]>;
6935
6936def : Pat<(v2f64 (X86Movsd
6937 (v2f64 VR128X:$dst),
6938 (v2f64 (scalar_to_vector
6939 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00006940 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00006941 Requires<[HasAVX512]>;
6942
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006943//===----------------------------------------------------------------------===//
6944// AVX-512 Vector convert from signed/unsigned integer to float/double
6945// and from float/double to signed/unsigned integer
6946//===----------------------------------------------------------------------===//
6947
6948multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006949 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006950 string Broadcast = _.BroadcastStr,
Coby Tayree97e9cf62016-11-20 17:09:56 +00006951 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006952
6953 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6954 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006955 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
6956 EVEX, Sched<[itins.Sched]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006957
6958 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Coby Tayree97e9cf62016-11-20 17:09:56 +00006959 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006960 (_.VT (OpNode (_Src.VT
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006961 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
6962 EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006963
6964 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00006965 (ins _Src.ScalarMemOp:$src), OpcodeStr,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006966 "${src}"##Broadcast, "${src}"##Broadcast,
6967 (_.VT (OpNode (_Src.VT
6968 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006969 )), itins.rm>, EVEX, EVEX_B,
6970 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006971}
6972// Coversion with SAE - suppress all exceptions
6973multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006974 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6975 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006976 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6977 (ins _Src.RC:$src), OpcodeStr,
6978 "{sae}, $src", "$src, {sae}",
6979 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006980 (i32 FROUND_NO_EXC))), itins.rr>,
6981 EVEX, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006982}
6983
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006984// Conversion with rounding control (RC)
6985multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006986 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6987 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006988 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6989 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6990 "$rc, $src", "$src, $rc",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006991 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
6992 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00006993}
6994
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006995// Extend Float to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006996multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
6997 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00006998 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00006999 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7000 fpextend, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007001 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007002 X86vfpextRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007003 }
7004 let Predicates = [HasVLX] in {
7005 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007006 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
7007 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7008 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007009 }
7010}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007011
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007012// Truncate Double to Float
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007013multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007014 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007015 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007016 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007017 X86vfproundRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007018 }
7019 let Predicates = [HasVLX] in {
7020 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007021 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007022 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007023 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007024
7025 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7026 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7027 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7028 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7029 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7030 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7031 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7032 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007033 }
7034}
7035
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007036defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007037 VEX_W, PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007038defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007039 PS, EVEX_CD8<32, CD8VH>;
7040
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007041def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7042 (VCVTPS2PDZrm addr:$src)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007043
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007044let Predicates = [HasVLX] in {
Craig Topperee277e12017-10-14 05:55:42 +00007045 let AddedComplexity = 15 in {
7046 def : Pat<(X86vzmovl (v2f64 (bitconvert
7047 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7048 (VCVTPD2PSZ128rr VR128X:$src)>;
7049 def : Pat<(X86vzmovl (v2f64 (bitconvert
7050 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7051 (VCVTPD2PSZ128rm addr:$src)>;
7052 }
Craig Topper5471fc22016-11-06 04:12:52 +00007053 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7054 (VCVTPS2PDZ128rm addr:$src)>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007055 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7056 (VCVTPS2PDZ256rm addr:$src)>;
7057}
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00007058
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007059// Convert Signed/Unsigned Doubleword to Double
7060multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007061 SDNode OpNode128, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007062 // No rounding in this op
7063 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007064 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7065 itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007066
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007067 let Predicates = [HasVLX] in {
7068 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007069 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
7070 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7071 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007072 }
7073}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007074
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007075// Convert Signed/Unsigned Doubleword to Float
7076multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007077 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007078 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007079 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7080 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007081 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007082 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007083
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007084 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007085 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7086 itins>, EVEX_V128;
7087 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7088 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007089 }
7090}
7091
7092// Convert Float to Signed/Unsigned Doubleword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007093multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7094 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007095 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007096 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7097 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007098 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007099 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007100 }
7101 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007102 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7103 itins>, EVEX_V128;
7104 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7105 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007106 }
7107}
7108
7109// Convert Float to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007110multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7111 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007112 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007113 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7114 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007115 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007116 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007117 }
7118 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007119 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7120 itins>, EVEX_V128;
7121 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7122 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007123 }
7124}
7125
7126// Convert Double to Signed/Unsigned Doubleword with truncation
Craig Topper731bf9c2016-11-09 07:31:32 +00007127multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007128 SDNode OpNode128, SDNode OpNodeRnd,
7129 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007130 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007131 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7132 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007133 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007134 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007135 }
7136 let Predicates = [HasVLX] in {
7137 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
Craig Topper731bf9c2016-11-09 07:31:32 +00007138 // memory forms of these instructions in Asm Parser. They have the same
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007139 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7140 // due to the same reason.
Craig Topper731bf9c2016-11-09 07:31:32 +00007141 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007142 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007143 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007144 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007145
7146 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7147 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7148 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7149 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7150 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7151 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7152 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7153 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007154 }
7155}
7156
7157// Convert Double to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007158multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7159 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007160 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007161 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7162 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007163 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007164 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007165 }
7166 let Predicates = [HasVLX] in {
7167 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7168 // memory forms of these instructions in Asm Parcer. They have the same
7169 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7170 // due to the same reason.
7171 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007172 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007173 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007174 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007175
7176 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7177 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7178 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7179 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7180 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7181 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7182 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7183 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007184 }
7185}
7186
7187// Convert Double to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007188multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7189 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007190 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007191 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7192 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007193 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007194 OpNodeRnd,itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007195 }
7196 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007197 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7198 itins>, EVEX_V128;
7199 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7200 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007201 }
7202}
7203
7204// Convert Double to Signed/Unsigned Quardword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007205multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7206 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007207 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007208 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7209 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007210 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007211 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007212 }
7213 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007214 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7215 itins>, EVEX_V128;
7216 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7217 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007218 }
7219}
7220
7221// Convert Signed/Unsigned Quardword to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007222multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7223 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007224 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007225 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7226 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007227 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007228 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007229 }
7230 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007231 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7232 itins>, EVEX_V128;
7233 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7234 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007235 }
7236}
7237
7238// Convert Float to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007239multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7240 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007241 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007242 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7243 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007244 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007245 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007246 }
7247 let Predicates = [HasDQI, HasVLX] in {
7248 // Explicitly specified broadcast string, since we take only 2 elements
7249 // from v4f32x_info source
7250 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007251 itins, "{1to2}", "", f64mem>, EVEX_V128;
7252 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7253 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007254 }
7255}
7256
7257// Convert Float to Signed/Unsigned Quardword with truncation
Craig Toppera39b6502016-12-10 06:02:48 +00007258multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007259 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007260 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007261 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7262 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007263 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007264 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007265 }
7266 let Predicates = [HasDQI, HasVLX] in {
7267 // Explicitly specified broadcast string, since we take only 2 elements
7268 // from v4f32x_info source
Craig Toppera39b6502016-12-10 06:02:48 +00007269 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007270 itins, "{1to2}", "", f64mem>, EVEX_V128;
7271 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7272 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007273 }
7274}
7275
7276// Convert Signed/Unsigned Quardword to Float
Simon Pilgrima3af7962016-11-24 12:13:46 +00007277multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007278 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007279 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007280 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7281 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007282 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007283 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007284 }
7285 let Predicates = [HasDQI, HasVLX] in {
7286 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7287 // memory forms of these instructions in Asm Parcer. They have the same
7288 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7289 // due to the same reason.
Simon Pilgrima3af7962016-11-24 12:13:46 +00007290 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007291 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007292 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007293 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007294
7295 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7296 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7297 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7298 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7299 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7300 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7301 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7302 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007303 }
7304}
7305
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007306defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7307 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007308
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007309defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007310 X86VSintToFpRnd, SSE_CVT_I2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007311 PS, EVEX_CD8<32, CD8VF>;
7312
7313defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007314 X86cvttp2siRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007315 XS, EVEX_CD8<32, CD8VF>;
7316
Simon Pilgrima3af7962016-11-24 12:13:46 +00007317defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007318 X86cvttp2siRnd, SSE_CVT_PD2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007319 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7320
7321defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007322 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007323 EVEX_CD8<32, CD8VF>;
7324
Craig Topperf334ac192016-11-09 07:48:51 +00007325defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007326 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7327 PS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007328
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007329defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7330 X86VUintToFP, SSE_CVT_I2PD>, XS,
7331 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007332
7333defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007334 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007335 EVEX_CD8<32, CD8VF>;
7336
Craig Topper19e04b62016-05-19 06:13:58 +00007337defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007338 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7339 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007340
Craig Topper19e04b62016-05-19 06:13:58 +00007341defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007342 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7343 VEX_W, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007344
Craig Topper19e04b62016-05-19 06:13:58 +00007345defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007346 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007347 PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007348
Craig Topper19e04b62016-05-19 06:13:58 +00007349defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007350 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007351 PS, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007352
Craig Topper19e04b62016-05-19 06:13:58 +00007353defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007354 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007355 PD, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007356
Craig Topper19e04b62016-05-19 06:13:58 +00007357defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007358 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7359 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007360
Craig Topper19e04b62016-05-19 06:13:58 +00007361defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007362 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007363 PD, EVEX_CD8<64, CD8VF>;
7364
Craig Topper19e04b62016-05-19 06:13:58 +00007365defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007366 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7367 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007368
7369defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007370 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007371 PD, EVEX_CD8<64, CD8VF>;
7372
Craig Toppera39b6502016-12-10 06:02:48 +00007373defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007374 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7375 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007376
7377defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007378 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007379 PD, EVEX_CD8<64, CD8VF>;
7380
Craig Toppera39b6502016-12-10 06:02:48 +00007381defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007382 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7383 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007384
7385defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007386 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7387 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007388
7389defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007390 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7391 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007392
Simon Pilgrima3af7962016-11-24 12:13:46 +00007393defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007394 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7395 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007396
Simon Pilgrima3af7962016-11-24 12:13:46 +00007397defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007398 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7399 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007400
Craig Toppere38c57a2015-11-27 05:44:02 +00007401let Predicates = [HasAVX512, NoVLX] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007402def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
Michael Liao5bf95782014-12-04 05:20:33 +00007403 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007404 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7405 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007406
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007407def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7408 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007409 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7410 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007411
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007412def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7413 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007414 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7415 VR256X:$src1, sub_ymm)))), sub_xmm)>;
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007416
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007417def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7418 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007419 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7420 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007421
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007422def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7423 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007424 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7425 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007426
Cameron McInallyf10a7c92014-06-18 14:04:37 +00007427def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7428 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
Craig Topper61403202016-09-19 02:53:43 +00007429 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7430 VR128X:$src1, sub_xmm)))), sub_ymm)>;
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007431
Simon Pilgrima3af7962016-11-24 12:13:46 +00007432def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007433 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7434 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7435 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007436}
7437
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007438let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007439 let AddedComplexity = 15 in {
7440 def : Pat<(X86vzmovl (v2i64 (bitconvert
7441 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007442 (VCVTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007443 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007444 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7445 (VCVTPD2DQZ128rm addr:$src)>;
7446 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007447 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007448 (VCVTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007449 def : Pat<(X86vzmovl (v2i64 (bitconvert
Simon Pilgrima3af7962016-11-24 12:13:46 +00007450 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007451 (VCVTTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007452 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007453 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7454 (VCVTTPD2DQZ128rm addr:$src)>;
7455 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007456 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007457 (VCVTTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007458 }
Craig Topperd7467472017-10-14 04:18:09 +00007459
7460 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7461 (VCVTDQ2PDZ128rm addr:$src)>;
7462 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7463 (VCVTDQ2PDZ128rm addr:$src)>;
7464
7465 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7466 (VCVTUDQ2PDZ128rm addr:$src)>;
7467 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7468 (VCVTUDQ2PDZ128rm addr:$src)>;
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007469}
7470
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007471let Predicates = [HasAVX512] in {
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007472 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007473 (VCVTPD2PSZrm addr:$src)>;
7474 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7475 (VCVTPS2PDZrm addr:$src)>;
7476}
7477
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007478let Predicates = [HasDQI, HasVLX] in {
7479 let AddedComplexity = 15 in {
7480 def : Pat<(X86vzmovl (v2f64 (bitconvert
7481 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007482 (VCVTQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007483 def : Pat<(X86vzmovl (v2f64 (bitconvert
7484 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007485 (VCVTUQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007486 }
7487}
7488
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007489let Predicates = [HasDQI, NoVLX] in {
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007490def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7491 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7492 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7493 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7494
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007495def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7496 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7497 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7498 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7499
7500def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7501 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7502 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7503 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7504
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007505def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7506 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7507 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7508 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7509
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007510def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7511 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7512 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7513 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7514
7515def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7516 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7517 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7518 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7519
7520def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7521 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7522 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7523 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7524
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007525def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7526 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7527 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7528 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7529
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007530def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7531 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7532 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7533 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7534
7535def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7536 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7537 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7538 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7539
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007540def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7541 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7542 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7543 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7544
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007545def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7546 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7547 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7548 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7549}
7550
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007551//===----------------------------------------------------------------------===//
7552// Half precision conversion instructions
7553//===----------------------------------------------------------------------===//
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007554
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007555multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007556 X86MemOperand x86memop, PatFrag ld_frag,
7557 OpndItins itins> {
Craig Toppercf8e6d02017-11-07 07:13:03 +00007558 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7559 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007560 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7561 T8PD, Sched<[itins.Sched]>;
Craig Toppercf8e6d02017-11-07 07:13:03 +00007562 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7563 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7564 (X86cvtph2ps (_src.VT
7565 (bitconvert
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007566 (ld_frag addr:$src)))), itins.rm>,
7567 T8PD, Sched<[itins.Sched.Folded]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007568}
7569
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007570multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7571 OpndItins itins> {
Craig Topperc89e2822017-12-10 09:14:38 +00007572 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7573 (ins _src.RC:$src), "vcvtph2ps",
7574 "{sae}, $src", "$src, {sae}",
7575 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7576 (i32 FROUND_NO_EXC)), itins.rr>,
7577 T8PD, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007578}
7579
Craig Toppere7fb3002017-11-07 07:13:07 +00007580let Predicates = [HasAVX512] in
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007581 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7582 SSE_CVT_PH2PS>,
7583 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
Asaf Badouh7c522452015-10-22 14:01:16 +00007584 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007585
7586let Predicates = [HasVLX] in {
7587 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007588 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7589 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007590 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007591 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7592 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007593
7594 // Pattern match vcvtph2ps of a scalar i64 load.
7595 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7596 (VCVTPH2PSZ128rm addr:$src)>;
7597 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7598 (VCVTPH2PSZ128rm addr:$src)>;
7599 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7600 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7601 (VCVTPH2PSZ128rm addr:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007602}
7603
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007604multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007605 X86MemOperand x86memop, OpndItins itins> {
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007606 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007607 (ins _src.RC:$src1, i32u8imm:$src2),
7608 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007609 (X86cvtps2ph (_src.VT _src.RC:$src1),
Craig Topperd8688702016-09-21 03:58:44 +00007610 (i32 imm:$src2)),
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007611 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007612 let hasSideEffects = 0, mayStore = 1 in {
7613 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7614 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7615 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007616 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007617 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7618 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7619 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007620 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007621 }
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007622}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007623
7624multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7625 OpndItins itins> {
Craig Topperd8688702016-09-21 03:58:44 +00007626 let hasSideEffects = 0 in
Craig Topper1de942b2017-12-10 17:42:44 +00007627 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
Craig Topperd8688702016-09-21 03:58:44 +00007628 (outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007629 (ins _src.RC:$src1, i32u8imm:$src2),
7630 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007631 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007632}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007633
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007634let Predicates = [HasAVX512] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007635 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7636 SSE_CVT_PS2PH>,
7637 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7638 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7639 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007640 let Predicates = [HasVLX] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007641 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7642 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7643 EVEX_CD8<32, CD8VH>;
7644 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7645 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7646 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007647 }
Craig Topper65e6d0b2017-11-08 04:00:31 +00007648
7649 def : Pat<(store (f64 (extractelt
7650 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7651 (iPTR 0))), addr:$dst),
7652 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7653 def : Pat<(store (i64 (extractelt
7654 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7655 (iPTR 0))), addr:$dst),
7656 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7657 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7658 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7659 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7660 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007661}
Asaf Badouh2489f352015-12-02 08:17:51 +00007662
Craig Topper9820e342016-09-20 05:44:47 +00007663// Patterns for matching conversions from float to half-float and vice versa.
Craig Topperb3b50332016-09-19 02:53:37 +00007664let Predicates = [HasVLX] in {
7665 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7666 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7667 // configurations we support (the default). However, falling back to MXCSR is
7668 // more consistent with other instructions, which are always controlled by it.
7669 // It's encoded as 0b100.
7670 def : Pat<(fp_to_f16 FR32X:$src),
7671 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7672 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7673
7674 def : Pat<(f16_to_fp GR16:$src),
7675 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7676 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7677
7678 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7679 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7680 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7681}
7682
Asaf Badouh2489f352015-12-02 08:17:51 +00007683// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
Craig Topper7e664da2016-09-24 21:42:43 +00007684multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007685 string OpcodeStr, OpndItins itins> {
Craig Topper07a7d562017-07-23 03:59:39 +00007686 let hasSideEffects = 0 in
Craig Topperc89e2822017-12-10 09:14:38 +00007687 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7688 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7689 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7690 Sched<[itins.Sched]>;
Asaf Badouh2489f352015-12-02 08:17:51 +00007691}
7692
7693let Defs = [EFLAGS], Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007694 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007695 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007696 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007697 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007698 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007699 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007700 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007701 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7702}
7703
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007704let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7705 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007706 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007707 EVEX_CD8<32, CD8VT1>;
7708 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007709 "ucomisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007710 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7711 let Pattern = []<dag> in {
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007712 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007713 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007714 EVEX_CD8<32, CD8VT1>;
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007715 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007716 "comisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007717 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7718 }
Craig Topper9dd48c82014-01-02 17:28:14 +00007719 let isCodeGenOnly = 1 in {
Craig Topper00265772018-01-23 21:37:51 +00007720 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7721 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7722 EVEX_CD8<32, CD8VT1>;
7723 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7724 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7725 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007726
Craig Topper00265772018-01-23 21:37:51 +00007727 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7728 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7729 EVEX_CD8<32, CD8VT1>;
7730 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7731 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7732 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00007733 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007734}
Michael Liao5bf95782014-12-04 05:20:33 +00007735
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007736/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
Asaf Badouheaf2da12015-09-21 10:23:53 +00007737multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007738 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007739 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Asaf Badouheaf2da12015-09-21 10:23:53 +00007740 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7741 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7742 "$src2, $src1", "$src1, $src2",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007743 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7744 EVEX_4V, Sched<[itins.Sched]>;
Asaf Badouheaf2da12015-09-21 10:23:53 +00007745 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00007746 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouheaf2da12015-09-21 10:23:53 +00007747 "$src2, $src1", "$src1, $src2",
7748 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007749 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7750 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007751}
7752}
7753
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007754defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007755 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007756defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007757 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007758defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007759 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007760defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007761 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007762
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007763/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7764multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007765 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007766 let ExeDomain = _.ExeDomain in {
Robert Khasanov3e534c92014-10-28 16:37:13 +00007767 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7768 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007769 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7770 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007771 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7772 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7773 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007774 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7775 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007776 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7777 (ins _.ScalarMemOp:$src), OpcodeStr,
7778 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7779 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007780 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7781 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007782 }
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007783}
Robert Khasanov3e534c92014-10-28 16:37:13 +00007784
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007785multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7786 SizeItins itins> {
7787 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7788 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7789 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7790 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov3e534c92014-10-28 16:37:13 +00007791
7792 // Define only if AVX512VL feature is present.
7793 let Predicates = [HasVLX] in {
7794 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007795 OpNode, itins.s, v4f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007796 EVEX_V128, EVEX_CD8<32, CD8VF>;
7797 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007798 OpNode, itins.s, v8f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007799 EVEX_V256, EVEX_CD8<32, CD8VF>;
7800 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007801 OpNode, itins.d, v2f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007802 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7803 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007804 OpNode, itins.d, v4f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007805 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7806 }
7807}
7808
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007809defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7810defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007811
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007812/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007813multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007814 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007815 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007816 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7817 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7818 "$src2, $src1", "$src1, $src2",
7819 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007820 (i32 FROUND_CURRENT)), itins.rr>,
7821 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007822
7823 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7824 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00007825 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007826 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007827 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
Craig Toppera2f55282017-12-10 03:16:36 +00007828 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007829
7830 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper512e9e72017-11-19 05:42:54 +00007831 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007832 "$src2, $src1", "$src1, $src2",
Craig Topper512e9e72017-11-19 05:42:54 +00007833 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007834 (i32 FROUND_CURRENT)), itins.rm>,
7835 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007836 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007837}
7838
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007839multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7840 SizeItins itins> {
7841 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007842 EVEX_CD8<32, CD8VT1>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007843 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007844 EVEX_CD8<64, CD8VT1>, VEX_W;
7845}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007846
Craig Toppere1cac152016-06-07 07:27:54 +00007847let Predicates = [HasERI] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007848 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7849 T8PD, EVEX_4V;
7850 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7851 T8PD, EVEX_4V;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007852}
Igor Breger8352a0d2015-07-28 06:53:28 +00007853
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007854defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7855 T8PD, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007856/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007857
7858multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007859 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007860 let ExeDomain = _.ExeDomain in {
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007861 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7862 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007863 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7864 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007865
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007866 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7867 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7868 (OpNode (_.FloatVT
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007869 (bitconvert (_.LdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007870 (i32 FROUND_CURRENT)), itins.rm>,
7871 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007872
7873 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007874 (ins _.ScalarMemOp:$src), OpcodeStr,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007875 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007876 (OpNode (_.FloatVT
7877 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007878 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7879 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007880 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007881}
Asaf Badouh402ebb32015-06-03 13:41:48 +00007882multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007883 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007884 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00007885 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7886 (ins _.RC:$src), OpcodeStr,
7887 "{sae}, $src", "$src, {sae}",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007888 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7889 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007890}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007891
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007892multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7893 SizeItins itins> {
7894 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7895 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007896 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007897 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7898 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007899 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007900}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007901
Asaf Badouh402ebb32015-06-03 13:41:48 +00007902multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007903 SDNode OpNode, SizeItins itins> {
Asaf Badouh402ebb32015-06-03 13:41:48 +00007904 // Define only if AVX512VL feature is present.
7905 let Predicates = [HasVLX] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007906 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007907 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007908 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007909 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007910 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007911 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007912 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007913 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7914 }
7915}
Craig Toppere1cac152016-06-07 07:27:54 +00007916let Predicates = [HasERI] in {
Michael Liao5bf95782014-12-04 05:20:33 +00007917
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007918 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7919 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7920 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007921}
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007922defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7923 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
7924 SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00007925
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007926multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00007927 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00007928 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00007929 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7930 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007931 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
7932 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007933}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007934
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007935multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00007936 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00007937 let ExeDomain = _.ExeDomain in {
Robert Khasanov1cf354c2014-10-28 18:22:41 +00007938 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Robert Khasanoveb126392014-10-28 18:15:20 +00007939 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007940 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
7941 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007942 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7943 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper80405072017-11-11 08:24:12 +00007944 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007945 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
7946 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007947 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7948 (ins _.ScalarMemOp:$src), OpcodeStr,
7949 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Craig Topper80405072017-11-11 08:24:12 +00007950 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007951 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7952 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007953 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007954}
7955
Craig Topper80405072017-11-11 08:24:12 +00007956multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007957 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007958 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007959 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007960 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7961 // Define only if AVX512VL feature is present.
7962 let Predicates = [HasVLX] in {
7963 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007964 SSE_SQRTPS, v4f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007965 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7966 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007967 SSE_SQRTPS, v8f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007968 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7969 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007970 SSE_SQRTPD, v2f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007971 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7972 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007973 SSE_SQRTPD, v4f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00007974 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7975 }
7976}
7977
Craig Topper80405072017-11-11 08:24:12 +00007978multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007979 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007980 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007981 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007982 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7983}
7984
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007985multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
7986 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
Craig Topper176f3312017-02-25 19:18:11 +00007987 let ExeDomain = _.ExeDomain in {
Clement Courbet41a13742018-01-15 12:05:33 +00007988 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00007989 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7990 "$src2, $src1", "$src1, $src2",
Craig Topper80405072017-11-11 08:24:12 +00007991 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00007992 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00007993 (i32 FROUND_CURRENT)), itins.rr>,
7994 Sched<[itins.Sched]>;
Clement Courbet41a13742018-01-15 12:05:33 +00007995 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7996 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7997 "$src2, $src1", "$src1, $src2",
7998 (X86fsqrtRnds (_.VT _.RC:$src1),
7999 _.ScalarIntMemCPat:$src2,
8000 (i32 FROUND_CURRENT)), itins.rm>,
8001 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8002 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008003 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8004 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Topper80405072017-11-11 08:24:12 +00008005 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008006 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008007 (i32 imm:$rc)), itins.rr>,
Craig Toppera2f55282017-12-10 03:16:36 +00008008 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008009
Clement Courbet41a13742018-01-15 12:05:33 +00008010 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8011 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8012 (ins _.FRC:$src1, _.FRC:$src2),
8013 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8014 itins.rr>, Sched<[itins.Sched]>;
8015 let mayLoad = 1 in
8016 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8017 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8018 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8019 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
8020 }
Craig Topper176f3312017-02-25 19:18:11 +00008021 }
Igor Breger4c4cd782015-09-20 09:13:41 +00008022
Clement Courbet41a13742018-01-15 12:05:33 +00008023 let Predicates = [HasAVX512] in {
8024 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8025 (!cast<Instruction>(NAME#SUFF#Zr)
8026 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008027
Clement Courbet41a13742018-01-15 12:05:33 +00008028 def : Pat<(Intr VR128X:$src),
8029 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
Craig Toppereff606c2017-11-06 04:04:01 +00008030 VR128X:$src)>;
Clement Courbet41a13742018-01-15 12:05:33 +00008031 }
Craig Toppereff606c2017-11-06 04:04:01 +00008032
Clement Courbet41a13742018-01-15 12:05:33 +00008033 let Predicates = [HasAVX512, OptForSize] in {
8034 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8035 (!cast<Instruction>(NAME#SUFF#Zm)
8036 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
Craig Toppereff606c2017-11-06 04:04:01 +00008037
Clement Courbet41a13742018-01-15 12:05:33 +00008038 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
8039 (!cast<Instruction>(NAME#SUFF#Zm_Int)
8040 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
8041 }
Craig Topperd6471cb2017-11-05 21:14:06 +00008042}
Igor Breger4c4cd782015-09-20 09:13:41 +00008043
8044multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008045 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
Craig Topper80405072017-11-11 08:24:12 +00008046 int_x86_sse_sqrt_ss>,
Craig Toppereff606c2017-11-06 04:04:01 +00008047 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008048 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
Craig Topper80405072017-11-11 08:24:12 +00008049 int_x86_sse2_sqrt_sd>,
Craig Toppereff606c2017-11-06 04:04:01 +00008050 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00008051 NotMemoryFoldable;
Igor Breger4c4cd782015-09-20 09:13:41 +00008052}
8053
Craig Topper80405072017-11-11 08:24:12 +00008054defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
8055 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008056
Igor Breger4c4cd782015-09-20 09:13:41 +00008057defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008058
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008059multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8060 OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008061 let ExeDomain = _.ExeDomain in {
Craig Topper0ccec702017-11-11 08:24:15 +00008062 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008063 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8064 "$src3, $src2, $src1", "$src1, $src2, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008065 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008066 (i32 imm:$src3))), itins.rr>,
8067 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008068
Craig Topper0ccec702017-11-11 08:24:15 +00008069 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008070 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008071 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
Craig Topper0af48f12017-11-13 02:02:58 +00008072 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008073 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
8074 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008075
Craig Topper0ccec702017-11-11 08:24:15 +00008076 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperbece74c2017-11-19 06:24:26 +00008077 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008078 OpcodeStr,
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008079 "$src3, $src2, $src1", "$src1, $src2, $src3",
Craig Topperdeee24b2017-11-13 02:03:01 +00008080 (_.VT (X86RndScales _.RC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008081 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
8082 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008083
Clement Courbetda1fad32018-01-15 14:24:07 +00008084 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
Craig Topper0ccec702017-11-11 08:24:15 +00008085 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8086 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8087 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008088 [], itins.rr>, Sched<[itins.Sched]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008089
8090 let mayLoad = 1 in
8091 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8092 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8093 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008094 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008095 }
8096 }
8097
8098 let Predicates = [HasAVX512] in {
8099 def : Pat<(ffloor _.FRC:$src),
8100 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8101 _.FRC:$src, (i32 0x9)))>;
8102 def : Pat<(fceil _.FRC:$src),
8103 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8104 _.FRC:$src, (i32 0xa)))>;
8105 def : Pat<(ftrunc _.FRC:$src),
8106 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8107 _.FRC:$src, (i32 0xb)))>;
8108 def : Pat<(frint _.FRC:$src),
8109 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8110 _.FRC:$src, (i32 0x4)))>;
8111 def : Pat<(fnearbyint _.FRC:$src),
8112 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8113 _.FRC:$src, (i32 0xc)))>;
8114 }
8115
8116 let Predicates = [HasAVX512, OptForSize] in {
8117 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8118 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8119 addr:$src, (i32 0x9)))>;
8120 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8121 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8122 addr:$src, (i32 0xa)))>;
8123 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8124 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8125 addr:$src, (i32 0xb)))>;
8126 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8127 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8128 addr:$src, (i32 0x4)))>;
8129 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8130 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8131 addr:$src, (i32 0xc)))>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008132 }
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00008133}
8134
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008135defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8136 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008137
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008138defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8139 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8140 EVEX_CD8<64, CD8VT1>;
Eric Christopher0d94fa92015-02-20 00:45:28 +00008141
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008142//-------------------------------------------------
8143// Integer truncate and extend operations
8144//-------------------------------------------------
8145
Simon Pilgrim833c2602017-12-05 19:21:28 +00008146let Sched = WriteShuffle256 in
8147def AVX512_EXTEND : OpndItins<
8148 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8149>;
8150
8151let Sched = WriteShuffle256 in
8152def AVX512_TRUNCATE : OpndItins<
8153 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8154>;
8155
Igor Breger074a64e2015-07-24 17:24:15 +00008156multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008157 OpndItins itins, X86VectorVTInfo SrcInfo,
8158 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
Craig Topper52e2e832016-07-22 05:46:44 +00008159 let ExeDomain = DestInfo.ExeDomain in
Igor Breger074a64e2015-07-24 17:24:15 +00008160 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8161 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008162 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8163 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
Igor Breger074a64e2015-07-24 17:24:15 +00008164
Craig Topper52e2e832016-07-22 05:46:44 +00008165 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8166 ExeDomain = DestInfo.ExeDomain in {
Igor Breger074a64e2015-07-24 17:24:15 +00008167 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8168 (ins x86memop:$dst, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008169 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008170 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008171
Igor Breger074a64e2015-07-24 17:24:15 +00008172 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8173 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008174 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008175 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
Craig Topper99f6b622016-05-01 01:03:56 +00008176 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008177}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008178
Igor Breger074a64e2015-07-24 17:24:15 +00008179multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8180 X86VectorVTInfo DestInfo,
8181 PatFrag truncFrag, PatFrag mtruncFrag > {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008182
Igor Breger074a64e2015-07-24 17:24:15 +00008183 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8184 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8185 addr:$dst, SrcInfo.RC:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008186
Igor Breger074a64e2015-07-24 17:24:15 +00008187 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8188 (SrcInfo.VT SrcInfo.RC:$src)),
8189 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8190 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8191}
8192
Craig Topperb2868232018-01-14 08:11:36 +00008193multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
8194 SDNode OpNode256, SDNode OpNode512, OpndItins itins,
8195 AVX512VLVectorVTInfo VTSrcInfo,
8196 X86VectorVTInfo DestInfoZ128,
8197 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8198 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8199 X86MemOperand x86memopZ, PatFrag truncFrag,
8200 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
Igor Breger074a64e2015-07-24 17:24:15 +00008201
8202 let Predicates = [HasVLX, prd] in {
Craig Topperb2868232018-01-14 08:11:36 +00008203 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008204 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
Igor Breger074a64e2015-07-24 17:24:15 +00008205 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8206 truncFrag, mtruncFrag>, EVEX_V128;
8207
Craig Topperb2868232018-01-14 08:11:36 +00008208 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008209 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
Igor Breger074a64e2015-07-24 17:24:15 +00008210 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8211 truncFrag, mtruncFrag>, EVEX_V256;
8212 }
8213 let Predicates = [prd] in
Craig Topperb2868232018-01-14 08:11:36 +00008214 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008215 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
Igor Breger074a64e2015-07-24 17:24:15 +00008216 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8217 truncFrag, mtruncFrag>, EVEX_V512;
8218}
8219
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008220multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008221 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008222 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8223 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, itins,
8224 avx512vl_i64_info, v16i8x_info, v16i8x_info,
8225 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
8226 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
Igor Breger074a64e2015-07-24 17:24:15 +00008227}
8228
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008229multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008230 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008231 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8232 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8233 avx512vl_i64_info, v8i16x_info, v8i16x_info,
8234 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
8235 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008236}
8237
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008238multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008239 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008240 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8241 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8242 avx512vl_i64_info, v4i32x_info, v4i32x_info,
8243 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
8244 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008245}
8246
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008247multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008248 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008249 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8250 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8251 avx512vl_i32_info, v16i8x_info, v16i8x_info,
8252 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
8253 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008254}
8255
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008256multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008257 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008258 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8259 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8260 avx512vl_i32_info, v8i16x_info, v8i16x_info,
8261 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
8262 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008263}
8264
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008265multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008266 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008267 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8268 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
8269 itins, avx512vl_i16_info, v16i8x_info, v16i8x_info,
8270 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
8271 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008272}
8273
Craig Topperb2868232018-01-14 08:11:36 +00008274defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE,
8275 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008276defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008277 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008278defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008279 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008280
Craig Topperb2868232018-01-14 08:11:36 +00008281defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE,
8282 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008283defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008284 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008285defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008286 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008287
Craig Topperb2868232018-01-14 08:11:36 +00008288defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE,
8289 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008290defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008291 truncstore_s_vi32, masked_truncstore_s_vi32>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008292defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008293 truncstore_us_vi32, masked_truncstore_us_vi32>;
Igor Breger074a64e2015-07-24 17:24:15 +00008294
Craig Topperb2868232018-01-14 08:11:36 +00008295defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE,
8296 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008297defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008298 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008299defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008300 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008301
Craig Topperb2868232018-01-14 08:11:36 +00008302defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE,
8303 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008304defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008305 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008306defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008307 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008308
Craig Topperb2868232018-01-14 08:11:36 +00008309defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE,
8310 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008311defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008312 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008313defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008314 truncstore_us_vi8, masked_truncstore_us_vi8>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008315
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008316let Predicates = [HasAVX512, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008317def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008318 (v8i16 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008319 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008320 VR256X:$src, sub_ymm)))), sub_xmm))>;
Craig Topperb2868232018-01-14 08:11:36 +00008321def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008322 (v4i32 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008323 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008324 VR256X:$src, sub_ymm)))), sub_xmm))>;
8325}
8326
8327let Predicates = [HasBWI, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008328def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
Craig Topper61403202016-09-19 02:53:43 +00008329 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008330 VR256X:$src, sub_ymm))), sub_xmm))>;
8331}
8332
Simon Pilgrim833c2602017-12-05 19:21:28 +00008333multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
Igor Breger2ba64ab2016-05-22 10:21:04 +00008334 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
Craig Topper6840f112016-07-14 06:41:34 +00008335 X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
Craig Topper52e2e832016-07-22 05:46:44 +00008336 let ExeDomain = DestInfo.ExeDomain in {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008337 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8338 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008339 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8340 EVEX, Sched<[itins.Sched]>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008341
Craig Toppere1cac152016-06-07 07:27:54 +00008342 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8343 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008344 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8345 EVEX, Sched<[itins.Sched.Folded]>;
Craig Topper52e2e832016-07-22 05:46:44 +00008346 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008347}
8348
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008349multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008350 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8351 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008352 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008353 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008354 v16i8x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008355 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008356
Simon Pilgrim833c2602017-12-05 19:21:28 +00008357 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008358 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008359 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008360 }
8361 let Predicates = [HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008362 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
Craig Topper6840f112016-07-14 06:41:34 +00008363 v32i8x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008364 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008365 }
8366}
8367
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008368multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008369 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8370 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008371 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008372 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008373 v16i8x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008374 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008375
Simon Pilgrim833c2602017-12-05 19:21:28 +00008376 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008377 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008378 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008379 }
8380 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008381 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008382 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008383 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008384 }
8385}
8386
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008387multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008388 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8389 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008390 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008391 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008392 v16i8x_info, i16mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008393 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008394
Simon Pilgrim833c2602017-12-05 19:21:28 +00008395 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008396 v16i8x_info, i32mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008397 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008398 }
8399 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008400 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008401 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008402 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008403 }
8404}
8405
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008406multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008407 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8408 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008409 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008410 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008411 v8i16x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008412 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008413
Simon Pilgrim833c2602017-12-05 19:21:28 +00008414 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008415 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008416 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008417 }
8418 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008419 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008420 v16i16x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008421 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008422 }
8423}
8424
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008425multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008426 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8427 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008428 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008429 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008430 v8i16x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008431 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008432
Simon Pilgrim833c2602017-12-05 19:21:28 +00008433 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008434 v8i16x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008435 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008436 }
8437 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008438 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008439 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008440 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008441 }
8442}
8443
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008444multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008445 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8446 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008447
8448 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008449 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008450 v4i32x_info, i64mem, LdFrag, InVecNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008451 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8452
Simon Pilgrim833c2602017-12-05 19:21:28 +00008453 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008454 v4i32x_info, i128mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008455 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8456 }
8457 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008458 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008459 v8i32x_info, i256mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008460 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8461 }
8462}
8463
Simon Pilgrim833c2602017-12-05 19:21:28 +00008464defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8465defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8466defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8467defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8468defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8469defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008470
Simon Pilgrim833c2602017-12-05 19:21:28 +00008471defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8472defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8473defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8474defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8475defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8476defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008477
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008478
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008479multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8480 SDNode InVecOp, PatFrag ExtLoad16> {
Craig Topper64378f42016-10-09 23:08:39 +00008481 // 128-bit patterns
8482 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008483 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008484 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008485 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008486 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008487 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008488 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008489 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008490 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008491 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008492 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8493 }
8494 let Predicates = [HasVLX] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008495 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008496 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008497 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008498 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008499 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008500 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008501 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008502 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8503
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008504 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008505 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008506 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008507 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008508 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008509 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008510 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008511 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8512
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008513 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008514 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008515 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008516 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008517 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008518 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008519 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008520 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008521 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008522 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8523
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008524 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008525 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008526 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008527 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008528 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008529 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008530 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008531 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8532
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008533 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008534 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008535 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008536 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008537 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008538 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008539 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008540 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008541 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008542 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8543 }
8544 // 256-bit patterns
8545 let Predicates = [HasVLX, HasBWI] in {
8546 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8547 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8548 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8549 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8550 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8551 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8552 }
8553 let Predicates = [HasVLX] in {
8554 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8555 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8556 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8557 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8558 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8559 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8560 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8561 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8562
8563 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8564 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8565 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8566 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8567 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8568 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8569 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8570 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8571
8572 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8573 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8574 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8575 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8576 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8577 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8578
8579 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8580 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8581 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8582 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8583 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8584 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8585 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8586 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8587
8588 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8589 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8590 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8591 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8592 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8593 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8594 }
8595 // 512-bit patterns
8596 let Predicates = [HasBWI] in {
8597 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8598 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8599 }
8600 let Predicates = [HasAVX512] in {
8601 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8602 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8603
8604 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8605 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper9ece2f72016-10-10 06:25:48 +00008606 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8607 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper64378f42016-10-09 23:08:39 +00008608
8609 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8610 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8611
8612 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8613 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8614
8615 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8616 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8617 }
8618}
8619
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008620defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8621defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
Craig Topper64378f42016-10-09 23:08:39 +00008622
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008623//===----------------------------------------------------------------------===//
8624// GATHER - SCATTER Operations
8625
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008626// FIXME: Improve scheduling of gather/scatter instructions.
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008627multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper16a91ce2017-11-15 07:46:43 +00008628 X86MemOperand memop, PatFrag GatherNode,
8629 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008630 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8631 ExeDomain = _.ExeDomain in
Craig Topper16a91ce2017-11-15 07:46:43 +00008632 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8633 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008634 !strconcat(OpcodeStr#_.Suffix,
Craig Topperedb09112014-11-25 20:11:23 +00008635 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Craig Topper16a91ce2017-11-15 07:46:43 +00008636 [(set _.RC:$dst, MaskRC:$mask_wb,
8637 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008638 vectoraddr:$src2))]>, EVEX, EVEX_K,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008639 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008640}
Cameron McInally45325962014-03-26 13:50:50 +00008641
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008642multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8643 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8644 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008645 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008646 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008647 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008648let Predicates = [HasVLX] in {
8649 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008650 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008651 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008652 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008653 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008654 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008655 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008656 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008657}
Cameron McInally45325962014-03-26 13:50:50 +00008658}
8659
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008660multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8661 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008662 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008663 mgatherv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008664 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008665 mgatherv8i64>, EVEX_V512;
8666let Predicates = [HasVLX] in {
8667 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008668 vy256xmem, mgatherv8i32>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008669 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008670 vy128xmem, mgatherv4i64>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008671 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008672 vx128xmem, mgatherv4i32>, EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008673 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Craig Topperc1e7b3f2017-11-22 07:11:03 +00008674 vx64xmem, mgatherv2i64, VK2WM>,
Craig Topper16a91ce2017-11-15 07:46:43 +00008675 EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008676}
Cameron McInally45325962014-03-26 13:50:50 +00008677}
Michael Liao5bf95782014-12-04 05:20:33 +00008678
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008679
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008680defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8681 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8682
8683defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8684 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008685
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008686multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper0b590342018-01-11 06:31:28 +00008687 X86MemOperand memop, PatFrag ScatterNode,
8688 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008689
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008690let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008691
Craig Topper0b590342018-01-11 06:31:28 +00008692 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
8693 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008694 !strconcat(OpcodeStr#_.Suffix,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008695 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Craig Topper0b590342018-01-11 06:31:28 +00008696 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8697 MaskRC:$mask, vectoraddr:$dst))]>,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008698 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8699 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008700}
8701
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008702multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8703 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8704 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008705 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008706 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008707 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008708let Predicates = [HasVLX] in {
8709 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008710 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008711 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008712 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008713 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008714 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008715 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008716 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008717}
Cameron McInally45325962014-03-26 13:50:50 +00008718}
8719
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008720multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8721 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008722 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008723 mscatterv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008724 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008725 mscatterv8i64>, EVEX_V512;
8726let Predicates = [HasVLX] in {
8727 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008728 vy256xmem, mscatterv8i32>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008729 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008730 vy128xmem, mscatterv4i64>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008731 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008732 vx128xmem, mscatterv4i32>, EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008733 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Craig Topper0b590342018-01-11 06:31:28 +00008734 vx64xmem, mscatterv2i64, VK2WM>,
8735 EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008736}
Cameron McInally45325962014-03-26 13:50:50 +00008737}
8738
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008739defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8740 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008741
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008742defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8743 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008744
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008745// prefetch
8746multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8747 RegisterClass KRC, X86MemOperand memop> {
8748 let Predicates = [HasPFI], hasSideEffects = 1 in
8749 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008750 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008751 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008752}
8753
8754defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008755 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008756
8757defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008758 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008759
8760defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008761 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008762
8763defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008764 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008765
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008766defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008767 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008768
8769defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008770 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008771
8772defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008773 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008774
8775defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008776 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008777
8778defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008779 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008780
8781defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008782 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008783
8784defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008785 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008786
8787defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008788 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008789
8790defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008791 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008792
8793defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008794 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008795
8796defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008797 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008798
8799defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008800 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008801
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008802multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008803def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008804 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
Craig Topper0321ebc2018-01-24 04:51:17 +00008805 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008806 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008807}
Michael Liao5bf95782014-12-04 05:20:33 +00008808
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008809multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8810 string OpcodeStr, Predicate prd> {
8811let Predicates = [prd] in
8812 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8813
8814 let Predicates = [prd, HasVLX] in {
8815 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8816 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8817 }
8818}
8819
Michael Zuckerman85436ec2017-03-23 09:57:01 +00008820defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8821defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8822defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8823defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008824
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008825multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
Igor Bregerfca0a342016-01-28 13:19:25 +00008826 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8827 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topperf090e8a2018-01-08 06:53:54 +00008828 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008829 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Igor Bregerfca0a342016-01-28 13:19:25 +00008830}
8831
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008832// Use 512bit version to implement 128/256 bit in case NoVLX.
8833multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00008834 X86VectorVTInfo _> {
8835
Craig Topperf090e8a2018-01-08 06:53:54 +00008836 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
Igor Bregerfca0a342016-01-28 13:19:25 +00008837 (_.KVT (COPY_TO_REGCLASS
8838 (!cast<Instruction>(NAME#"Zrr")
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008839 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00008840 _.RC:$src, _.SubRegIdx)),
8841 _.KRC))>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008842}
8843
8844multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
Igor Bregerfca0a342016-01-28 13:19:25 +00008845 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8846 let Predicates = [prd] in
8847 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8848 EVEX_V512;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008849
8850 let Predicates = [prd, HasVLX] in {
8851 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008852 EVEX_V256;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008853 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008854 EVEX_V128;
8855 }
8856 let Predicates = [prd, NoVLX] in {
8857 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8858 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008859 }
8860}
8861
8862defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8863 avx512vl_i8_info, HasBWI>;
8864defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8865 avx512vl_i16_info, HasBWI>, VEX_W;
8866defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8867 avx512vl_i32_info, HasDQI>;
8868defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8869 avx512vl_i64_info, HasDQI>, VEX_W;
8870
Craig Topper0321ebc2018-01-24 04:51:17 +00008871// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
8872// is available, but BWI is not. We can't handle this in lowering because
8873// a target independent DAG combine likes to combine sext and trunc.
8874let Predicates = [HasDQI, NoBWI] in {
8875 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
8876 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8877 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
8878 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8879}
8880
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008881//===----------------------------------------------------------------------===//
8882// AVX-512 - COMPRESS and EXPAND
8883//
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008884
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008885// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8886let Sched = WriteShuffle256 in {
8887def AVX512_COMPRESS : OpndItins<
8888 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8889>;
8890def AVX512_EXPAND : OpndItins<
8891 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8892>;
8893}
8894
Ayman Musad7a5ed42016-09-26 06:22:08 +00008895multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008896 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008897 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00008898 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008899 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8900 Sched<[itins.Sched]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008901
Craig Toppere1cac152016-06-07 07:27:54 +00008902 let mayStore = 1, hasSideEffects = 0 in
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008903 def mr : AVX5128I<opc, MRMDestMem, (outs),
8904 (ins _.MemOp:$dst, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008905 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008906 []>, EVEX_CD8<_.EltSize, CD8VT1>,
8907 Sched<[itins.Sched.Folded]>;
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008908
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008909 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8910 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008911 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Ayman Musad7a5ed42016-09-26 06:22:08 +00008912 []>,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008913 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8914 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008915}
8916
Ayman Musad7a5ed42016-09-26 06:22:08 +00008917multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
Ayman Musad7a5ed42016-09-26 06:22:08 +00008918 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8919 (_.VT _.RC:$src)),
8920 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8921 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8922}
8923
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008924multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008925 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00008926 AVX512VLVectorVTInfo VTInfo,
8927 Predicate Pred = HasAVX512> {
8928 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008929 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00008930 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008931
Coby Tayree71e37cc2017-11-21 09:48:44 +00008932 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008933 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00008934 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008935 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00008936 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008937 }
8938}
8939
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008940defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
8941 avx512vl_i32_info>, EVEX;
8942defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
8943 avx512vl_i64_info>, EVEX, VEX_W;
8944defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
8945 avx512vl_f32_info>, EVEX;
8946defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
8947 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008948
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008949// expand
8950multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008951 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008952 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00008953 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008954 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
8955 Sched<[itins.Sched]>;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00008956
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00008957 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8958 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8959 (_.VT (X86expand (_.VT (bitconvert
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008960 (_.LdFrag addr:$src1))))), itins.rm>,
8961 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
8962 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008963}
8964
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008965multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8966
8967 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8968 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8969 _.KRCWM:$mask, addr:$src)>;
8970
8971 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8972 (_.VT _.RC:$src0))),
8973 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8974 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8975}
8976
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008977multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008978 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00008979 AVX512VLVectorVTInfo VTInfo,
8980 Predicate Pred = HasAVX512> {
8981 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008982 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008983 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008984
Coby Tayree71e37cc2017-11-21 09:48:44 +00008985 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008986 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008987 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008988 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00008989 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00008990 }
8991}
8992
Simon Pilgrim904d1a82017-12-01 16:20:03 +00008993defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
8994 avx512vl_i32_info>, EVEX;
8995defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
8996 avx512vl_i64_info>, EVEX, VEX_W;
8997defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
8998 avx512vl_f32_info>, EVEX;
8999defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
9000 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009001
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009002//handle instruction reg_vec1 = op(reg_vec,imm)
9003// op(mem_vec,imm)
9004// op(broadcast(eltVt),imm)
9005//all instruction created with FROUND_CURRENT
9006multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009007 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009008 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009009 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9010 (ins _.RC:$src1, i32u8imm:$src2),
Igor Breger252c2d92016-02-22 12:37:41 +00009011 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009012 (OpNode (_.VT _.RC:$src1),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009013 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009014 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9015 (ins _.MemOp:$src1, i32u8imm:$src2),
9016 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9017 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009018 (i32 imm:$src2)), itins.rm>,
9019 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009020 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9021 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9022 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9023 "${src1}"##_.BroadcastStr##", $src2",
9024 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009025 (i32 imm:$src2)), itins.rm>, EVEX_B,
9026 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009027 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009028}
9029
9030//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9031multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009032 SDNode OpNode, OpndItins itins,
9033 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009034 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009035 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9036 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009037 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009038 "$src1, {sae}, $src2",
9039 (OpNode (_.VT _.RC:$src1),
9040 (i32 imm:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009041 (i32 FROUND_NO_EXC)), itins.rr>,
9042 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009043}
9044
9045multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009046 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009047 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009048 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009049 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9050 _.info512>,
9051 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9052 itins, _.info512>, EVEX_V512;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009053 }
9054 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009055 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9056 _.info128>, EVEX_V128;
9057 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9058 _.info256>, EVEX_V256;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009059 }
9060}
9061
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009062//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9063// op(reg_vec2,mem_vec,imm)
9064// op(reg_vec2,broadcast(eltVt),imm)
9065//all instruction created with FROUND_CURRENT
9066multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009067 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +00009068 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009069 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009070 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009071 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9072 (OpNode (_.VT _.RC:$src1),
9073 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009074 (i32 imm:$src3)), itins.rr>,
9075 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009076 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9077 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9078 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9079 (OpNode (_.VT _.RC:$src1),
9080 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009081 (i32 imm:$src3)), itins.rm>,
9082 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009083 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9084 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9085 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9086 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9087 (OpNode (_.VT _.RC:$src1),
9088 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009089 (i32 imm:$src3)), itins.rm>, EVEX_B,
9090 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009091 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009092}
9093
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009094//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9095// op(reg_vec2,mem_vec,imm)
Igor Breger2ae0fe32015-08-31 11:14:02 +00009096multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009097 OpndItins itins, X86VectorVTInfo DestInfo,
9098 X86VectorVTInfo SrcInfo>{
Craig Topper05948fb2016-08-02 05:11:15 +00009099 let ExeDomain = DestInfo.ExeDomain in {
Igor Breger2ae0fe32015-08-31 11:14:02 +00009100 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9101 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9102 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9103 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9104 (SrcInfo.VT SrcInfo.RC:$src2),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009105 (i8 imm:$src3))), itins.rr>,
9106 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009107 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9108 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9109 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9110 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9111 (SrcInfo.VT (bitconvert
9112 (SrcInfo.LdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009113 (i8 imm:$src3))), itins.rm>,
9114 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009115 }
Igor Breger2ae0fe32015-08-31 11:14:02 +00009116}
9117
9118//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9119// op(reg_vec2,mem_vec,imm)
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009120// op(reg_vec2,broadcast(eltVt),imm)
9121multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009122 OpndItins itins, X86VectorVTInfo _>:
9123 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
Igor Breger2ae0fe32015-08-31 11:14:02 +00009124
Craig Topper05948fb2016-08-02 05:11:15 +00009125 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00009126 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9127 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9128 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9129 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9130 (OpNode (_.VT _.RC:$src1),
9131 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009132 (i8 imm:$src3)), itins.rm>, EVEX_B,
9133 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009134}
9135
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009136//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9137// op(reg_vec2,mem_scalar,imm)
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009138multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009139 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009140 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009141 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009142 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009143 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9144 (OpNode (_.VT _.RC:$src1),
9145 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009146 (i32 imm:$src3)), itins.rr>,
9147 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009148 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Bregere73ef852016-09-11 12:38:46 +00009149 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
Craig Toppere1cac152016-06-07 07:27:54 +00009150 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9151 (OpNode (_.VT _.RC:$src1),
9152 (_.VT (scalar_to_vector
9153 (_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009154 (i32 imm:$src3)), itins.rm>,
9155 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009156 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009157}
9158
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009159//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9160multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009161 SDNode OpNode, OpndItins itins,
9162 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009163 let ExeDomain = _.ExeDomain in
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009164 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009165 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009166 OpcodeStr, "$src3, {sae}, $src2, $src1",
9167 "$src1, $src2, {sae}, $src3",
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009168 (OpNode (_.VT _.RC:$src1),
9169 (_.VT _.RC:$src2),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009170 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009171 (i32 FROUND_NO_EXC)), itins.rr>,
9172 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009173}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009174
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009175//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009176multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9177 OpndItins itins, X86VectorVTInfo _> {
Craig Toppercac5d692017-02-26 06:45:37 +00009178 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009179 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9180 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009181 OpcodeStr, "$src3, {sae}, $src2, $src1",
9182 "$src1, $src2, {sae}, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009183 (OpNode (_.VT _.RC:$src1),
9184 (_.VT _.RC:$src2),
9185 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009186 (i32 FROUND_NO_EXC)), itins.rr>,
9187 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009188}
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009189
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009190multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009191 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009192 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009193 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009194 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9195 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009196 EVEX_V512;
9197
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009198 }
9199 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009200 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009201 EVEX_V128;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009202 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009203 EVEX_V256;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009204 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009205}
9206
Igor Breger2ae0fe32015-08-31 11:14:02 +00009207multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009208 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9209 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +00009210 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009211 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009212 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9213 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009214 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009215 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009216 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009217 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009218 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9219 }
9220}
9221
Igor Breger00d9f842015-06-08 14:03:17 +00009222multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009223 bits<8> opc, SDNode OpNode, OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009224 Predicate Pred = HasAVX512> {
9225 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009226 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Igor Breger00d9f842015-06-08 14:03:17 +00009227 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009228 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009229 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9230 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Igor Breger00d9f842015-06-08 14:03:17 +00009231 }
9232}
9233
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009234multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009235 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009236 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009237 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009238 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9239 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009240 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009241}
9242
Igor Breger1e58e8a2015-09-02 11:18:55 +00009243multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009244 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009245 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
Igor Breger1e58e8a2015-09-02 11:18:55 +00009246 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009247 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9248 EVEX_CD8<32, CD8VF>;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009249 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009250 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9251 EVEX_CD8<64, CD8VF>, VEX_W;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009252}
9253
Igor Breger1e58e8a2015-09-02 11:18:55 +00009254defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009255 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
Craig Topper0af48f12017-11-13 02:02:58 +00009256 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009257defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009258 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009259 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009260defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009261 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009262 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009263
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009264defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009265 0x50, X86VRange, X86VRangeRnd,
9266 SSE_ALU_F64P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009267 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9268defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009269 0x50, X86VRange, X86VRangeRnd,
9270 SSE_ALU_F32P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009271 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9272
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009273defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9274 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009275 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9276defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009277 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009278 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9279
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009280defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009281 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009282 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9283defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009284 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009285 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009286
Igor Breger1e58e8a2015-09-02 11:18:55 +00009287defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009288 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009289 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9290defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009291 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009292 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9293
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009294let Predicates = [HasAVX512] in {
9295def : Pat<(v16f32 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009296 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009297def : Pat<(v16f32 (fnearbyint VR512:$src)),
9298 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9299def : Pat<(v16f32 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009300 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009301def : Pat<(v16f32 (frint VR512:$src)),
9302 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9303def : Pat<(v16f32 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009304 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009305
9306def : Pat<(v8f64 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009307 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009308def : Pat<(v8f64 (fnearbyint VR512:$src)),
9309 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9310def : Pat<(v8f64 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009311 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009312def : Pat<(v8f64 (frint VR512:$src)),
9313 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9314def : Pat<(v8f64 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009315 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009316}
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009317
Craig Topperac2508252017-11-11 21:44:51 +00009318let Predicates = [HasVLX] in {
9319def : Pat<(v4f32 (ffloor VR128X:$src)),
9320 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9321def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9322 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9323def : Pat<(v4f32 (fceil VR128X:$src)),
9324 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9325def : Pat<(v4f32 (frint VR128X:$src)),
9326 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9327def : Pat<(v4f32 (ftrunc VR128X:$src)),
9328 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9329
9330def : Pat<(v2f64 (ffloor VR128X:$src)),
9331 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9332def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9333 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9334def : Pat<(v2f64 (fceil VR128X:$src)),
9335 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9336def : Pat<(v2f64 (frint VR128X:$src)),
9337 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9338def : Pat<(v2f64 (ftrunc VR128X:$src)),
9339 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9340
9341def : Pat<(v8f32 (ffloor VR256X:$src)),
9342 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9343def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9344 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9345def : Pat<(v8f32 (fceil VR256X:$src)),
9346 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9347def : Pat<(v8f32 (frint VR256X:$src)),
9348 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9349def : Pat<(v8f32 (ftrunc VR256X:$src)),
9350 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9351
9352def : Pat<(v4f64 (ffloor VR256X:$src)),
9353 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9354def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9355 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9356def : Pat<(v4f64 (fceil VR256X:$src)),
9357 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9358def : Pat<(v4f64 (frint VR256X:$src)),
9359 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9360def : Pat<(v4f64 (ftrunc VR256X:$src)),
9361 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9362}
9363
Simon Pilgrim36be8522017-11-29 18:52:20 +00009364multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9365 AVX512VLVectorVTInfo _, bits<8> opc>{
Craig Topper42a53532017-08-16 23:38:25 +00009366 let Predicates = [HasAVX512] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009367 defm Z : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
Craig Topper42a53532017-08-16 23:38:25 +00009368
9369 }
9370 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009371 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
Craig Topper42a53532017-08-16 23:38:25 +00009372 }
9373}
9374
Simon Pilgrim36be8522017-11-29 18:52:20 +00009375defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
9376 avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9377defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
9378 avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9379defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
9380 avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9381defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
9382 avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger00d9f842015-06-08 14:03:17 +00009383
Craig Topperb561e662017-01-19 02:34:29 +00009384let Predicates = [HasAVX512] in {
9385// Provide fallback in case the load node that is used in the broadcast
9386// patterns above is used by additional users, which prevents the pattern
9387// selection.
9388def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9389 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9390 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9391 0)>;
9392def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9393 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9394 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9395 0)>;
9396
9397def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9398 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9399 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9400 0)>;
9401def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9402 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9403 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9404 0)>;
9405
9406def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9407 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9408 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9409 0)>;
9410
9411def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9412 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9413 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9414 0)>;
9415}
9416
Simon Pilgrim36be8522017-11-29 18:52:20 +00009417multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9418 AVX512VLVectorVTInfo VTInfo_I> {
9419 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
Igor Breger00d9f842015-06-08 14:03:17 +00009420 AVX512AIi8Base, EVEX_4V;
Igor Breger00d9f842015-06-08 14:03:17 +00009421}
9422
Simon Pilgrim36be8522017-11-29 18:52:20 +00009423defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009424 EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009425defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009426 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009427
Simon Pilgrim36be8522017-11-29 18:52:20 +00009428defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009429 avx512vl_i8_info, avx512vl_i8_info>,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009430 EVEX_CD8<8, CD8VF>;
9431
Craig Topper333897e2017-11-03 06:48:02 +00009432// Fragments to help convert valignq into masked valignd. Or valignq/valignd
9433// into vpalignr.
9434def ValignqImm32XForm : SDNodeXForm<imm, [{
9435 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9436}]>;
9437def ValignqImm8XForm : SDNodeXForm<imm, [{
9438 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9439}]>;
9440def ValigndImm8XForm : SDNodeXForm<imm, [{
9441 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9442}]>;
9443
9444multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9445 X86VectorVTInfo From, X86VectorVTInfo To,
9446 SDNodeXForm ImmXForm> {
9447 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9448 (bitconvert
9449 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9450 imm:$src3))),
9451 To.RC:$src0)),
9452 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9453 To.RC:$src1, To.RC:$src2,
9454 (ImmXForm imm:$src3))>;
9455
9456 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9457 (bitconvert
9458 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9459 imm:$src3))),
9460 To.ImmAllZerosV)),
9461 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9462 To.RC:$src1, To.RC:$src2,
9463 (ImmXForm imm:$src3))>;
9464
9465 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9466 (bitconvert
9467 (From.VT (OpNode From.RC:$src1,
9468 (bitconvert (To.LdFrag addr:$src2)),
9469 imm:$src3))),
9470 To.RC:$src0)),
9471 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9472 To.RC:$src1, addr:$src2,
9473 (ImmXForm imm:$src3))>;
9474
9475 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9476 (bitconvert
9477 (From.VT (OpNode From.RC:$src1,
9478 (bitconvert (To.LdFrag addr:$src2)),
9479 imm:$src3))),
9480 To.ImmAllZerosV)),
9481 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9482 To.RC:$src1, addr:$src2,
9483 (ImmXForm imm:$src3))>;
9484}
9485
9486multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9487 X86VectorVTInfo From,
9488 X86VectorVTInfo To,
9489 SDNodeXForm ImmXForm> :
9490 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9491 def : Pat<(From.VT (OpNode From.RC:$src1,
9492 (bitconvert (To.VT (X86VBroadcast
9493 (To.ScalarLdFrag addr:$src2)))),
9494 imm:$src3)),
9495 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9496 (ImmXForm imm:$src3))>;
9497
9498 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9499 (bitconvert
9500 (From.VT (OpNode From.RC:$src1,
9501 (bitconvert
9502 (To.VT (X86VBroadcast
9503 (To.ScalarLdFrag addr:$src2)))),
9504 imm:$src3))),
9505 To.RC:$src0)),
9506 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9507 To.RC:$src1, addr:$src2,
9508 (ImmXForm imm:$src3))>;
9509
9510 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9511 (bitconvert
9512 (From.VT (OpNode From.RC:$src1,
9513 (bitconvert
9514 (To.VT (X86VBroadcast
9515 (To.ScalarLdFrag addr:$src2)))),
9516 imm:$src3))),
9517 To.ImmAllZerosV)),
9518 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9519 To.RC:$src1, addr:$src2,
9520 (ImmXForm imm:$src3))>;
9521}
9522
9523let Predicates = [HasAVX512] in {
9524 // For 512-bit we lower to the widest element type we can. So we only need
9525 // to handle converting valignq to valignd.
9526 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9527 v16i32_info, ValignqImm32XForm>;
9528}
9529
9530let Predicates = [HasVLX] in {
9531 // For 128-bit we lower to the widest element type we can. So we only need
9532 // to handle converting valignq to valignd.
9533 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9534 v4i32x_info, ValignqImm32XForm>;
9535 // For 256-bit we lower to the widest element type we can. So we only need
9536 // to handle converting valignq to valignd.
9537 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9538 v8i32x_info, ValignqImm32XForm>;
9539}
9540
9541let Predicates = [HasVLX, HasBWI] in {
9542 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9543 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9544 v16i8x_info, ValignqImm8XForm>;
9545 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9546 v16i8x_info, ValigndImm8XForm>;
9547}
9548
Simon Pilgrim36be8522017-11-29 18:52:20 +00009549defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9550 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9551 EVEX_CD8<8, CD8VF>;
Igor Bregerf3ded812015-08-31 13:09:30 +00009552
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009553multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009554 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009555 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009556 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger24cab0f2015-11-16 07:22:00 +00009557 (ins _.RC:$src1), OpcodeStr,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009558 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009559 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9560 Sched<[itins.Sched]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009561
Craig Toppere1cac152016-06-07 07:27:54 +00009562 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9563 (ins _.MemOp:$src1), OpcodeStr,
9564 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009565 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9566 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9567 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009568 }
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009569}
9570
9571multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009572 OpndItins itins, X86VectorVTInfo _> :
9573 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009574 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9575 (ins _.ScalarMemOp:$src1), OpcodeStr,
9576 "${src1}"##_.BroadcastStr,
9577 "${src1}"##_.BroadcastStr,
9578 (_.VT (OpNode (X86VBroadcast
Simon Pilgrim756348c2017-11-29 13:49:51 +00009579 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9580 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9581 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009582}
9583
9584multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009585 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9586 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009587 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009588 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9589 EVEX_V512;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009590
9591 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009592 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009593 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009594 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009595 EVEX_V128;
9596 }
9597}
9598
9599multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009600 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9601 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009602 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009603 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009604 EVEX_V512;
9605
9606 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009607 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009608 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009609 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009610 EVEX_V128;
9611 }
9612}
9613
9614multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009615 SDNode OpNode, OpndItins itins, Predicate prd> {
9616 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9617 avx512vl_i64_info, prd>, VEX_W;
9618 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9619 avx512vl_i32_info, prd>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009620}
9621
9622multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009623 SDNode OpNode, OpndItins itins, Predicate prd> {
9624 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9625 avx512vl_i16_info, prd>, VEX_WIG;
9626 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9627 avx512vl_i8_info, prd>, VEX_WIG;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009628}
9629
9630multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9631 bits<8> opc_d, bits<8> opc_q,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009632 string OpcodeStr, SDNode OpNode,
9633 OpndItins itins> {
9634 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009635 HasAVX512>,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009636 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009637 HasBWI>;
9638}
9639
Simon Pilgrim756348c2017-11-29 13:49:51 +00009640defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
Igor Bregerf2460112015-07-26 14:41:44 +00009641
Simon Pilgrimfea153f2017-05-06 19:11:59 +00009642// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9643let Predicates = [HasAVX512, NoVLX] in {
9644 def : Pat<(v4i64 (abs VR256X:$src)),
9645 (EXTRACT_SUBREG
9646 (VPABSQZrr
9647 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9648 sub_ymm)>;
9649 def : Pat<(v2i64 (abs VR128X:$src)),
9650 (EXTRACT_SUBREG
9651 (VPABSQZrr
9652 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9653 sub_xmm)>;
9654}
9655
Craig Topperc0896052017-12-16 02:40:28 +00009656// Use 512bit version to implement 128/256 bit.
9657multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9658 AVX512VLVectorVTInfo _, Predicate prd> {
9659 let Predicates = [prd, NoVLX] in {
9660 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9661 (EXTRACT_SUBREG
9662 (!cast<Instruction>(InstrStr # "Zrr")
9663 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9664 _.info256.RC:$src1,
9665 _.info256.SubRegIdx)),
9666 _.info256.SubRegIdx)>;
9667
9668 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9669 (EXTRACT_SUBREG
9670 (!cast<Instruction>(InstrStr # "Zrr")
9671 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9672 _.info128.RC:$src1,
9673 _.info128.SubRegIdx)),
9674 _.info128.SubRegIdx)>;
9675 }
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009676}
9677
Simon Pilgrim756348c2017-11-29 13:49:51 +00009678// FIXME: Is there a better scheduler itinerary for VPLZCNT?
Craig Topperc0896052017-12-16 02:40:28 +00009679defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9680 SSE_INTALU_ITINS_P, HasCDI>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009681
9682// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9683defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9684 SSE_INTALU_ITINS_P, HasCDI>;
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009685
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009686// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topperc0896052017-12-16 02:40:28 +00009687defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9688defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009689
Igor Breger24cab0f2015-11-16 07:22:00 +00009690//===---------------------------------------------------------------------===//
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009691// Counts number of ones - VPOPCNTD and VPOPCNTQ
9692//===---------------------------------------------------------------------===//
9693
Simon Pilgrim756348c2017-11-29 13:49:51 +00009694// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
Craig Topperc0896052017-12-16 02:40:28 +00009695defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9696 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009697
Craig Topperc0896052017-12-16 02:40:28 +00009698defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9699defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009700
9701//===---------------------------------------------------------------------===//
Igor Breger24cab0f2015-11-16 07:22:00 +00009702// Replicate Single FP - MOVSHDUP and MOVSLDUP
9703//===---------------------------------------------------------------------===//
Simon Pilgrim756348c2017-11-29 13:49:51 +00009704multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9705 OpndItins itins> {
9706 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9707 avx512vl_f32_info, HasAVX512>, XS;
Igor Breger24cab0f2015-11-16 07:22:00 +00009708}
9709
Simon Pilgrim756348c2017-11-29 13:49:51 +00009710defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9711defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009712
9713//===----------------------------------------------------------------------===//
9714// AVX-512 - MOVDDUP
9715//===----------------------------------------------------------------------===//
9716
9717multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009718 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009719 let ExeDomain = _.ExeDomain in {
Igor Breger1f782962015-11-19 08:26:56 +00009720 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9721 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009722 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9723 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009724 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9725 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9726 (_.VT (OpNode (_.VT (scalar_to_vector
Simon Pilgrim756348c2017-11-29 13:49:51 +00009727 (_.ScalarLdFrag addr:$src))))),
9728 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9729 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009730 }
Igor Breger1f782962015-11-19 08:26:56 +00009731}
9732
9733multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009734 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Igor Breger1f782962015-11-19 08:26:56 +00009735
Simon Pilgrim756348c2017-11-29 13:49:51 +00009736 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
Igor Breger1f782962015-11-19 08:26:56 +00009737
9738 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009739 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
Igor Breger1f782962015-11-19 08:26:56 +00009740 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009741 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
Craig Topperf6c69562017-10-13 21:56:48 +00009742 EVEX_V128;
Igor Breger1f782962015-11-19 08:26:56 +00009743 }
9744}
9745
Simon Pilgrim756348c2017-11-29 13:49:51 +00009746multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9747 OpndItins itins> {
9748 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
Igor Breger1f782962015-11-19 08:26:56 +00009749 avx512vl_f64_info>, XD, VEX_W;
Igor Breger1f782962015-11-19 08:26:56 +00009750}
9751
Simon Pilgrim756348c2017-11-29 13:49:51 +00009752defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009753
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009754let Predicates = [HasVLX] in {
Igor Breger1f782962015-11-19 08:26:56 +00009755def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009756 (VMOVDDUPZ128rm addr:$src)>;
9757def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9758 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topperf6c69562017-10-13 21:56:48 +00009759def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9760 (VMOVDDUPZ128rm addr:$src)>;
Craig Topperda84ff32017-01-07 22:20:23 +00009761
9762def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9763 (v2f64 VR128X:$src0)),
9764 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9765 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9766def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9767 (bitconvert (v4i32 immAllZerosV))),
9768 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9769
9770def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9771 (v2f64 VR128X:$src0)),
9772 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9773def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9774 (bitconvert (v4i32 immAllZerosV))),
9775 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topperf6c69562017-10-13 21:56:48 +00009776
9777def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9778 (v2f64 VR128X:$src0)),
9779 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9780def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9781 (bitconvert (v4i32 immAllZerosV))),
9782 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009783}
Igor Breger1f782962015-11-19 08:26:56 +00009784
Igor Bregerf2460112015-07-26 14:41:44 +00009785//===----------------------------------------------------------------------===//
9786// AVX-512 - Unpack Instructions
9787//===----------------------------------------------------------------------===//
Craig Topper9433f972016-08-02 06:16:53 +00009788defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9789 SSE_ALU_ITINS_S>;
9790defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9791 SSE_ALU_ITINS_S>;
Igor Bregerf2460112015-07-26 14:41:44 +00009792
9793defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9794 SSE_INTALU_ITINS_P, HasBWI>;
9795defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9796 SSE_INTALU_ITINS_P, HasBWI>;
9797defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9798 SSE_INTALU_ITINS_P, HasBWI>;
9799defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9800 SSE_INTALU_ITINS_P, HasBWI>;
9801
9802defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9803 SSE_INTALU_ITINS_P, HasAVX512>;
9804defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9805 SSE_INTALU_ITINS_P, HasAVX512>;
9806defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9807 SSE_INTALU_ITINS_P, HasAVX512>;
9808defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9809 SSE_INTALU_ITINS_P, HasAVX512>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009810
9811//===----------------------------------------------------------------------===//
9812// AVX-512 - Extract & Insert Integer Instructions
9813//===----------------------------------------------------------------------===//
9814
9815multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9816 X86VectorVTInfo _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009817 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9818 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9819 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim1dcb9132017-10-23 16:00:57 +00009820 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9821 addr:$dst)]>,
Craig Topper05af43f2018-01-24 17:58:57 +00009822 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009823}
9824
9825multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9826 let Predicates = [HasBWI] in {
9827 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9828 (ins _.RC:$src1, u8imm:$src2),
9829 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9830 [(set GR32orGR64:$dst,
9831 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009832 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009833
9834 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9835 }
9836}
9837
9838multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9839 let Predicates = [HasBWI] in {
9840 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9841 (ins _.RC:$src1, u8imm:$src2),
9842 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9843 [(set GR32orGR64:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009844 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9845 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009846
Craig Topper99f6b622016-05-01 01:03:56 +00009847 let hasSideEffects = 0 in
Igor Breger55747302015-11-18 08:46:16 +00009848 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9849 (ins _.RC:$src1, u8imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00009850 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9851 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9852 Sched<[WriteShuffle]>;
Igor Breger55747302015-11-18 08:46:16 +00009853
Igor Bregerdefab3c2015-10-08 12:55:01 +00009854 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9855 }
9856}
9857
9858multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9859 RegisterClass GRC> {
9860 let Predicates = [HasDQI] in {
9861 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9862 (ins _.RC:$src1, u8imm:$src2),
9863 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9864 [(set GRC:$dst,
9865 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009866 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009867
Craig Toppere1cac152016-06-07 07:27:54 +00009868 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9869 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9870 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9871 [(store (extractelt (_.VT _.RC:$src1),
9872 imm:$src2),addr:$dst)]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009873 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
Craig Topper05af43f2018-01-24 17:58:57 +00009874 Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009875 }
9876}
9877
Craig Toppera33846a2017-10-22 06:18:23 +00009878defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9879defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009880defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9881defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9882
9883multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9884 X86VectorVTInfo _, PatFrag LdFrag> {
9885 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9886 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9887 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9888 [(set _.RC:$dst,
9889 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009890 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009891}
9892
9893multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9894 X86VectorVTInfo _, PatFrag LdFrag> {
9895 let Predicates = [HasBWI] in {
9896 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9897 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9898 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9899 [(set _.RC:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009900 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
9901 Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009902
9903 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9904 }
9905}
9906
9907multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9908 X86VectorVTInfo _, RegisterClass GRC> {
9909 let Predicates = [HasDQI] in {
9910 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9911 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9912 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9913 [(set _.RC:$dst,
9914 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009915 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009916
9917 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9918 _.ScalarLdFrag>, TAPD;
9919 }
9920}
9921
9922defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
Craig Toppera33846a2017-10-22 06:18:23 +00009923 extloadi8>, TAPD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009924defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
Craig Toppera33846a2017-10-22 06:18:23 +00009925 extloadi16>, PD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009926defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9927defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009928
Igor Bregera6297c72015-09-02 10:50:58 +00009929//===----------------------------------------------------------------------===//
9930// VSHUFPS - VSHUFPD Operations
9931//===----------------------------------------------------------------------===//
Simon Pilgrim36be8522017-11-29 18:52:20 +00009932
Igor Bregera6297c72015-09-02 10:50:58 +00009933multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9934 AVX512VLVectorVTInfo VTInfo_FP>{
Simon Pilgrim36be8522017-11-29 18:52:20 +00009935 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
9936 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9937 AVX512AIi8Base, EVEX_4V;
Igor Bregera6297c72015-09-02 10:50:58 +00009938}
9939
9940defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9941defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009942
Asaf Badouhd2c35992015-09-02 14:21:54 +00009943//===----------------------------------------------------------------------===//
9944// AVX-512 - Byte shift Left/Right
9945//===----------------------------------------------------------------------===//
9946
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009947let Sched = WriteVecShift in
9948def AVX512_BYTESHIFT : OpndItins<
9949 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
9950>;
9951
Asaf Badouhd2c35992015-09-02 14:21:54 +00009952multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009953 Format MRMm, string OpcodeStr,
9954 OpndItins itins, X86VectorVTInfo _>{
Asaf Badouhd2c35992015-09-02 14:21:54 +00009955 def rr : AVX512<opc, MRMr,
9956 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9957 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009958 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
9959 itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009960 def rm : AVX512<opc, MRMm,
9961 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9962 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9963 [(set _.RC:$dst,(_.VT (OpNode
Simon Pilgrim255fdd02016-06-11 12:54:37 +00009964 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009965 (i8 imm:$src2))))], itins.rm>,
9966 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009967}
9968
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009969multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009970 Format MRMm, string OpcodeStr,
9971 OpndItins itins, Predicate prd>{
Asaf Badouhd2c35992015-09-02 14:21:54 +00009972 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +00009973 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9974 OpcodeStr, itins, v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009975 let Predicates = [prd, HasVLX] in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009976 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009977 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009978 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009979 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009980 }
9981}
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009982defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009983 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9984 EVEX_4V, VEX_WIG;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009985defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +00009986 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9987 EVEX_4V, VEX_WIG;
Asaf Badouhd2c35992015-09-02 14:21:54 +00009988
9989
Simon Pilgrim18bcf932016-02-03 09:41:59 +00009990multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009991 string OpcodeStr, OpndItins itins,
9992 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
Asaf Badouhd2c35992015-09-02 14:21:54 +00009993 def rr : AVX512BI<opc, MRMSrcReg,
Cong Houdb6220f2015-11-24 19:51:26 +00009994 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
Asaf Badouhd2c35992015-09-02 14:21:54 +00009995 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Cong Houdb6220f2015-11-24 19:51:26 +00009996 [(set _dst.RC:$dst,(_dst.VT
9997 (OpNode (_src.VT _src.RC:$src1),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00009998 (_src.VT _src.RC:$src2))))], itins.rr>,
9999 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010000 def rm : AVX512BI<opc, MRMSrcMem,
10001 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10002 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10003 [(set _dst.RC:$dst,(_dst.VT
10004 (OpNode (_src.VT _src.RC:$src1),
10005 (_src.VT (bitconvert
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010006 (_src.LdFrag addr:$src2))))))], itins.rm>,
10007 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010008}
10009
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010010multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010011 string OpcodeStr, OpndItins itins,
10012 Predicate prd> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010013 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010014 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
10015 v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010016 let Predicates = [prd, HasVLX] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010017 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010018 v32i8x_info>, EVEX_V256;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010019 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010020 v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010021 }
10022}
10023
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010024defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010025 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010026
Craig Topper4e794c72017-02-19 19:36:58 +000010027// Transforms to swizzle an immediate to enable better matching when
10028// memory operand isn't in the right place.
10029def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
10030 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
10031 uint8_t Imm = N->getZExtValue();
10032 // Swap bits 1/4 and 3/6.
10033 uint8_t NewImm = Imm & 0xa5;
10034 if (Imm & 0x02) NewImm |= 0x10;
10035 if (Imm & 0x10) NewImm |= 0x02;
10036 if (Imm & 0x08) NewImm |= 0x40;
10037 if (Imm & 0x40) NewImm |= 0x08;
10038 return getI8Imm(NewImm, SDLoc(N));
10039}]>;
10040def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
10041 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10042 uint8_t Imm = N->getZExtValue();
10043 // Swap bits 2/4 and 3/5.
10044 uint8_t NewImm = Imm & 0xc3;
Craig Toppera5fa2e42017-02-20 07:00:34 +000010045 if (Imm & 0x04) NewImm |= 0x10;
10046 if (Imm & 0x10) NewImm |= 0x04;
Craig Topper4e794c72017-02-19 19:36:58 +000010047 if (Imm & 0x08) NewImm |= 0x20;
10048 if (Imm & 0x20) NewImm |= 0x08;
10049 return getI8Imm(NewImm, SDLoc(N));
10050}]>;
Craig Topper48905772017-02-19 21:32:15 +000010051def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
10052 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10053 uint8_t Imm = N->getZExtValue();
10054 // Swap bits 1/2 and 5/6.
10055 uint8_t NewImm = Imm & 0x99;
10056 if (Imm & 0x02) NewImm |= 0x04;
10057 if (Imm & 0x04) NewImm |= 0x02;
10058 if (Imm & 0x20) NewImm |= 0x40;
10059 if (Imm & 0x40) NewImm |= 0x20;
10060 return getI8Imm(NewImm, SDLoc(N));
10061}]>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010062def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
10063 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
10064 uint8_t Imm = N->getZExtValue();
10065 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
10066 uint8_t NewImm = Imm & 0x81;
10067 if (Imm & 0x02) NewImm |= 0x04;
10068 if (Imm & 0x04) NewImm |= 0x10;
10069 if (Imm & 0x08) NewImm |= 0x40;
10070 if (Imm & 0x10) NewImm |= 0x02;
10071 if (Imm & 0x20) NewImm |= 0x08;
10072 if (Imm & 0x40) NewImm |= 0x20;
10073 return getI8Imm(NewImm, SDLoc(N));
10074}]>;
10075def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
10076 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
10077 uint8_t Imm = N->getZExtValue();
10078 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
10079 uint8_t NewImm = Imm & 0x81;
10080 if (Imm & 0x02) NewImm |= 0x10;
10081 if (Imm & 0x04) NewImm |= 0x02;
10082 if (Imm & 0x08) NewImm |= 0x20;
10083 if (Imm & 0x10) NewImm |= 0x04;
10084 if (Imm & 0x20) NewImm |= 0x40;
10085 if (Imm & 0x40) NewImm |= 0x08;
10086 return getI8Imm(NewImm, SDLoc(N));
10087}]>;
Craig Topper4e794c72017-02-19 19:36:58 +000010088
Igor Bregerb4bb1902015-10-15 12:33:24 +000010089multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010090 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010091 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010092 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10093 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
Igor Breger252c2d92016-02-22 12:37:41 +000010094 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
Igor Bregerb4bb1902015-10-15 12:33:24 +000010095 (OpNode (_.VT _.RC:$src1),
10096 (_.VT _.RC:$src2),
10097 (_.VT _.RC:$src3),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010098 (i8 imm:$src4)), itins.rr, 1, 1>,
10099 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010100 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10101 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10102 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10103 (OpNode (_.VT _.RC:$src1),
10104 (_.VT _.RC:$src2),
10105 (_.VT (bitconvert (_.LdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010106 (i8 imm:$src4)), itins.rm, 1, 0>,
10107 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10108 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010109 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10110 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10111 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10112 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10113 (OpNode (_.VT _.RC:$src1),
10114 (_.VT _.RC:$src2),
10115 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010116 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10117 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10118 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010119 }// Constraints = "$src1 = $dst"
Craig Topper4e794c72017-02-19 19:36:58 +000010120
10121 // Additional patterns for matching passthru operand in other positions.
Craig Topper4e794c72017-02-19 19:36:58 +000010122 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10123 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10124 _.RC:$src1)),
10125 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10126 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10127 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10128 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10129 _.RC:$src1)),
10130 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10131 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010132
10133 // Additional patterns for matching loads in other positions.
10134 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10135 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10136 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10137 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10138 def : Pat<(_.VT (OpNode _.RC:$src1,
10139 (bitconvert (_.LdFrag addr:$src3)),
10140 _.RC:$src2, (i8 imm:$src4))),
10141 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10142 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10143
10144 // Additional patterns for matching zero masking with loads in other
10145 // positions.
Craig Topper48905772017-02-19 21:32:15 +000010146 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10147 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10148 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10149 _.ImmAllZerosV)),
10150 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10151 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10152 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10153 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10154 _.RC:$src2, (i8 imm:$src4)),
10155 _.ImmAllZerosV)),
10156 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10157 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010158
10159 // Additional patterns for matching masked loads with different
10160 // operand orders.
Craig Topper48905772017-02-19 21:32:15 +000010161 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10162 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10163 _.RC:$src2, (i8 imm:$src4)),
10164 _.RC:$src1)),
10165 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10166 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010167 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10168 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10169 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10170 _.RC:$src1)),
10171 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10172 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10173 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10174 (OpNode _.RC:$src2, _.RC:$src1,
10175 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10176 _.RC:$src1)),
10177 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10178 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10179 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10180 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10181 _.RC:$src1, (i8 imm:$src4)),
10182 _.RC:$src1)),
10183 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10184 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10185 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10186 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10187 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10188 _.RC:$src1)),
10189 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10190 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Craig Topper5b4e36a2017-02-20 02:47:42 +000010191
10192 // Additional patterns for matching broadcasts in other positions.
10193 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10194 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10195 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10196 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10197 def : Pat<(_.VT (OpNode _.RC:$src1,
10198 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10199 _.RC:$src2, (i8 imm:$src4))),
10200 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10201 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10202
10203 // Additional patterns for matching zero masking with broadcasts in other
10204 // positions.
10205 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10206 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10207 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10208 _.ImmAllZerosV)),
10209 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10210 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10211 (VPTERNLOG321_imm8 imm:$src4))>;
10212 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10213 (OpNode _.RC:$src1,
10214 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10215 _.RC:$src2, (i8 imm:$src4)),
10216 _.ImmAllZerosV)),
10217 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10218 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10219 (VPTERNLOG132_imm8 imm:$src4))>;
10220
10221 // Additional patterns for matching masked broadcasts with different
10222 // operand orders.
10223 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10224 (OpNode _.RC:$src1,
10225 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10226 _.RC:$src2, (i8 imm:$src4)),
10227 _.RC:$src1)),
10228 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10229 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper2012dda2017-02-20 17:44:09 +000010230 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10231 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10232 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10233 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010234 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010235 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10236 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10237 (OpNode _.RC:$src2, _.RC:$src1,
10238 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10239 (i8 imm:$src4)), _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010240 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010241 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10242 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10243 (OpNode _.RC:$src2,
10244 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10245 _.RC:$src1, (i8 imm:$src4)),
10246 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010247 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010248 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10249 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10250 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10251 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10252 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010253 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010254 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010255}
10256
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010257multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10258 AVX512VLVectorVTInfo _> {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010259 let Predicates = [HasAVX512] in
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010260 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010261 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010262 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10263 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010264 }
10265}
10266
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010267defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10268 avx512vl_i32_info>;
10269defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10270 avx512vl_i64_info>, VEX_W;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010271
Craig Topper8a444ee2018-01-26 22:17:40 +000010272
10273// Patterns to implement vnot using vpternlog instead of creating all ones
10274// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
10275// so that the result is only dependent on src0. But we use the same source
10276// for all operands to prevent a false dependency.
10277// TODO: We should maybe have a more generalized algorithm for folding to
10278// vpternlog.
10279let Predicates = [HasAVX512] in {
10280 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
10281 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
10282}
10283
10284let Predicates = [HasAVX512, NoVLX] in {
10285 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10286 (EXTRACT_SUBREG
10287 (VPTERNLOGQZrri
10288 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10289 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10290 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10291 (i8 15)), sub_xmm)>;
10292 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10293 (EXTRACT_SUBREG
10294 (VPTERNLOGQZrri
10295 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10296 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10297 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10298 (i8 15)), sub_ymm)>;
10299}
10300
10301let Predicates = [HasVLX] in {
10302 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10303 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
10304 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10305 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
10306}
10307
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010308//===----------------------------------------------------------------------===//
10309// AVX-512 - FixupImm
10310//===----------------------------------------------------------------------===//
10311
10312multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010313 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010314 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010315 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10316 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10317 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10318 (OpNode (_.VT _.RC:$src1),
10319 (_.VT _.RC:$src2),
10320 (_.IntVT _.RC:$src3),
10321 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010322 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010323 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10324 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10325 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10326 (OpNode (_.VT _.RC:$src1),
10327 (_.VT _.RC:$src2),
10328 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10329 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010330 (i32 FROUND_CURRENT)), itins.rm>,
10331 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010332 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10333 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10334 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10335 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10336 (OpNode (_.VT _.RC:$src1),
10337 (_.VT _.RC:$src2),
10338 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10339 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010340 (i32 FROUND_CURRENT)), itins.rm>,
10341 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010342 } // Constraints = "$src1 = $dst"
10343}
10344
10345multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010346 SDNode OpNode, OpndItins itins,
10347 X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010348let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010349 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10350 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010351 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010352 "$src2, $src3, {sae}, $src4",
10353 (OpNode (_.VT _.RC:$src1),
10354 (_.VT _.RC:$src2),
10355 (_.IntVT _.RC:$src3),
10356 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010357 (i32 FROUND_NO_EXC)), itins.rr>,
10358 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010359 }
10360}
10361
10362multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010363 OpndItins itins, X86VectorVTInfo _,
10364 X86VectorVTInfo _src3VT> {
Craig Topper05948fb2016-08-02 05:11:15 +000010365 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10366 ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010367 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10368 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10369 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10370 (OpNode (_.VT _.RC:$src1),
10371 (_.VT _.RC:$src2),
10372 (_src3VT.VT _src3VT.RC:$src3),
10373 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010374 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010375 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10376 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10377 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10378 "$src2, $src3, {sae}, $src4",
10379 (OpNode (_.VT _.RC:$src1),
10380 (_.VT _.RC:$src2),
10381 (_src3VT.VT _src3VT.RC:$src3),
10382 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010383 (i32 FROUND_NO_EXC)), itins.rm>,
10384 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010385 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10386 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10387 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10388 (OpNode (_.VT _.RC:$src1),
10389 (_.VT _.RC:$src2),
10390 (_src3VT.VT (scalar_to_vector
10391 (_src3VT.ScalarLdFrag addr:$src3))),
10392 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010393 (i32 FROUND_CURRENT)), itins.rm>,
10394 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010395 }
10396}
10397
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010398multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010399 let Predicates = [HasAVX512] in
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010400 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10401 _Vec.info512>,
10402 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10403 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010404 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010405 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10406 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10407 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10408 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010409 }
10410}
10411
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010412defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010413 SSE_ALU_F32S, f32x_info, v4i32x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010414 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010415defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010416 SSE_ALU_F64S, f64x_info, v2i64x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010417 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010418defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010419 EVEX_CD8<32, CD8VF>;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010420defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010421 EVEX_CD8<64, CD8VF>, VEX_W;
Craig Topper5625d242016-07-29 06:06:00 +000010422
10423
10424
10425// Patterns used to select SSE scalar fp arithmetic instructions from
10426// either:
10427//
10428// (1) a scalar fp operation followed by a blend
10429//
10430// The effect is that the backend no longer emits unnecessary vector
10431// insert instructions immediately after SSE scalar fp instructions
10432// like addss or mulss.
10433//
10434// For example, given the following code:
10435// __m128 foo(__m128 A, __m128 B) {
10436// A[0] += B[0];
10437// return A;
10438// }
10439//
10440// Previously we generated:
10441// addss %xmm0, %xmm1
10442// movss %xmm1, %xmm0
10443//
10444// We now generate:
10445// addss %xmm1, %xmm0
10446//
10447// (2) a vector packed single/double fp operation followed by a vector insert
10448//
10449// The effect is that the backend converts the packed fp instruction
10450// followed by a vector insert into a single SSE scalar fp instruction.
10451//
10452// For example, given the following code:
10453// __m128 foo(__m128 A, __m128 B) {
10454// __m128 C = A + B;
10455// return (__m128) {c[0], a[1], a[2], a[3]};
10456// }
10457//
10458// Previously we generated:
10459// addps %xmm0, %xmm1
10460// movss %xmm1, %xmm0
10461//
10462// We now generate:
10463// addss %xmm1, %xmm0
10464
10465// TODO: Some canonicalization in lowering would simplify the number of
10466// patterns we have to try to match.
10467multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10468 let Predicates = [HasAVX512] in {
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010469 // extracted scalar math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010470 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10471 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10472 FR32X:$src))))),
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010473 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010474 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010475
Craig Topper5625d242016-07-29 06:06:00 +000010476 // vector math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010477 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10478 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010479 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10480
Craig Topper83f21452016-12-27 01:56:24 +000010481 // extracted masked scalar math op with insert via movss
10482 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10483 (scalar_to_vector
10484 (X86selects VK1WM:$mask,
10485 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10486 FR32X:$src2),
10487 FR32X:$src0))),
10488 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10489 VK1WM:$mask, v4f32:$src1,
10490 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010491 }
10492}
10493
10494defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10495defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10496defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10497defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10498
10499multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10500 let Predicates = [HasAVX512] in {
10501 // extracted scalar math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010502 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10503 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10504 FR64X:$src))))),
Craig Topper5625d242016-07-29 06:06:00 +000010505 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010506 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010507
Craig Topper5625d242016-07-29 06:06:00 +000010508 // vector math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010509 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10510 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010511 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10512
Craig Topper83f21452016-12-27 01:56:24 +000010513 // extracted masked scalar math op with insert via movss
10514 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10515 (scalar_to_vector
10516 (X86selects VK1WM:$mask,
10517 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10518 FR64X:$src2),
10519 FR64X:$src0))),
10520 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10521 VK1WM:$mask, v2f64:$src1,
10522 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010523 }
10524}
10525
10526defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10527defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10528defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10529defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010530
10531//===----------------------------------------------------------------------===//
10532// AES instructions
10533//===----------------------------------------------------------------------===//
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010534
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010535multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10536 let Predicates = [HasVLX, HasVAES] in {
10537 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10538 !cast<Intrinsic>(IntPrefix),
10539 loadv2i64, 0, VR128X, i128mem>,
10540 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10541 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10542 !cast<Intrinsic>(IntPrefix##"_256"),
10543 loadv4i64, 0, VR256X, i256mem>,
10544 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10545 }
10546 let Predicates = [HasAVX512, HasVAES] in
10547 defm Z : AESI_binop_rm_int<Op, OpStr,
10548 !cast<Intrinsic>(IntPrefix##"_512"),
10549 loadv8i64, 0, VR512, i512mem>,
10550 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10551}
10552
10553defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10554defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10555defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10556defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10557
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010558//===----------------------------------------------------------------------===//
10559// PCLMUL instructions - Carry less multiplication
10560//===----------------------------------------------------------------------===//
10561
10562let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10563defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10564 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10565
10566let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10567defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10568 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10569
10570defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10571 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10572 EVEX_CD8<64, CD8VF>, VEX_WIG;
10573}
10574
10575// Aliases
10576defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10577defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10578defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10579
Coby Tayree71e37cc2017-11-21 09:48:44 +000010580//===----------------------------------------------------------------------===//
10581// VBMI2
10582//===----------------------------------------------------------------------===//
10583
10584multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010585 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010586 let Constraints = "$src1 = $dst",
10587 ExeDomain = VTI.ExeDomain in {
10588 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10589 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10590 "$src3, $src2", "$src2, $src3",
Simon Pilgrim36be8522017-11-29 18:52:20 +000010591 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10592 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010593 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10594 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10595 "$src3, $src2", "$src2, $src3",
10596 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010597 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10598 itins.rm>, AVX512FMA3Base,
10599 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010600 }
10601}
10602
10603multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010604 OpndItins itins, X86VectorVTInfo VTI>
10605 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010606 let Constraints = "$src1 = $dst",
10607 ExeDomain = VTI.ExeDomain in
10608 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10609 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10610 "${src3}"##VTI.BroadcastStr##", $src2",
10611 "$src2, ${src3}"##VTI.BroadcastStr,
10612 (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010613 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10614 itins.rm>, AVX512FMA3Base, EVEX_B,
10615 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010616}
10617
10618multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010619 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010620 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010621 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010622 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010623 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10624 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010625 }
10626}
10627
10628multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010629 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010630 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010631 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010632 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010633 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10634 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010635 }
10636}
10637multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010638 SDNode OpNode, OpndItins itins> {
10639 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010640 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010641 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010642 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010643 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010644 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10645}
10646
10647multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010648 SDNode OpNode, OpndItins itins> {
10649 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10650 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10651 VEX_W, EVEX_CD8<16, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010652 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010653 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010654 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010655 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010656}
10657
10658// Concat & Shift
Simon Pilgrim36be8522017-11-29 18:52:20 +000010659defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10660defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10661defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10662defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10663
Coby Tayree71e37cc2017-11-21 09:48:44 +000010664// Compress
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010665defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10666 avx512vl_i8_info, HasVBMI2>, EVEX;
10667defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10668 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010669// Expand
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010670defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10671 avx512vl_i8_info, HasVBMI2>, EVEX;
10672defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10673 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010674
Coby Tayree3880f2a2017-11-21 10:04:28 +000010675//===----------------------------------------------------------------------===//
10676// VNNI
10677//===----------------------------------------------------------------------===//
10678
10679let Constraints = "$src1 = $dst" in
10680multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010681 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010682 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10683 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10684 "$src3, $src2", "$src2, $src3",
10685 (VTI.VT (OpNode VTI.RC:$src1,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010686 VTI.RC:$src2, VTI.RC:$src3)),
10687 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010688 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10689 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10690 "$src3, $src2", "$src2, $src3",
10691 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10692 (VTI.VT (bitconvert
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010693 (VTI.LdFrag addr:$src3))))),
10694 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10695 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010696 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10697 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10698 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10699 "$src2, ${src3}"##VTI.BroadcastStr,
10700 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10701 (VTI.VT (X86VBroadcast
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010702 (VTI.ScalarLdFrag addr:$src3)))),
10703 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10704 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010705}
10706
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010707multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010708 let Predicates = [HasVNNI] in
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010709 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010710 let Predicates = [HasVNNI, HasVLX] in {
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010711 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10712 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010713 }
10714}
10715
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010716// FIXME: Is there a better scheduler itinerary for VPDP?
10717defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10718defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10719defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10720defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010721
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010722//===----------------------------------------------------------------------===//
10723// Bit Algorithms
10724//===----------------------------------------------------------------------===//
10725
Simon Pilgrim756348c2017-11-29 13:49:51 +000010726// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10727defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010728 avx512vl_i8_info, HasBITALG>;
Simon Pilgrim756348c2017-11-29 13:49:51 +000010729defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010730 avx512vl_i16_info, HasBITALG>, VEX_W;
10731
10732defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10733defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010734
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010735multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010736 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10737 (ins VTI.RC:$src1, VTI.RC:$src2),
10738 "vpshufbitqmb",
10739 "$src2, $src1", "$src1, $src2",
10740 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010741 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10742 Sched<[itins.Sched]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010743 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10744 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10745 "vpshufbitqmb",
10746 "$src2, $src1", "$src1, $src2",
10747 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010748 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10749 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10750 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010751}
10752
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010753multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010754 let Predicates = [HasBITALG] in
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010755 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010756 let Predicates = [HasBITALG, HasVLX] in {
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010757 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10758 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010759 }
10760}
10761
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010762// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10763defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010764
Coby Tayreed8b17be2017-11-26 09:36:41 +000010765//===----------------------------------------------------------------------===//
10766// GFNI
10767//===----------------------------------------------------------------------===//
10768
10769multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10770 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10771 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10772 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10773 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10774 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10775 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10776 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10777 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10778 }
10779}
10780
Craig Topperb18d6222018-01-06 07:18:08 +000010781defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10782 EVEX_CD8<8, CD8VF>, T8PD;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010783
10784multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010785 OpndItins itins, X86VectorVTInfo VTI,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010786 X86VectorVTInfo BcstVTI>
Simon Pilgrim36be8522017-11-29 18:52:20 +000010787 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010788 let ExeDomain = VTI.ExeDomain in
10789 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10790 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10791 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10792 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10793 (OpNode (VTI.VT VTI.RC:$src1),
10794 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
Simon Pilgrim36be8522017-11-29 18:52:20 +000010795 (i8 imm:$src3)), itins.rm>, EVEX_B,
10796 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010797}
10798
Simon Pilgrim36be8522017-11-29 18:52:20 +000010799multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10800 OpndItins itins> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010801 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010802 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010803 v8i64_info>, EVEX_V512;
10804 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010805 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010806 v4i64x_info>, EVEX_V256;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010807 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010808 v2i64x_info>, EVEX_V128;
10809 }
10810}
10811
Craig Topperb18d6222018-01-06 07:18:08 +000010812defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10813 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10814 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10815defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10816 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10817 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010818