blob: 2e6566f62167b9720333af44078dffa06cba2c90 [file] [log] [blame]
Eric Christopher06b32cd2015-02-20 00:36:53 +00001//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
Adam Nemet5ed17da2014-08-21 19:50:07 +000016// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT). These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000020// The template is also used for scalar types, in this case numelts is 1.
Robert Khasanov4204c1a2014-12-12 14:21:30 +000021class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Adam Nemet5ed17da2014-08-21 19:50:07 +000022 string suffix = ""> {
23 RegisterClass RC = rc;
Robert Khasanov4204c1a2014-12-12 14:21:30 +000024 ValueType EltVT = eltvt;
Adam Nemet449b3f02014-10-15 23:42:09 +000025 int NumElts = numelts;
Adam Nemet5ed17da2014-08-21 19:50:07 +000026
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
Igor Bregerfca0a342016-01-28 13:19:25 +000033 // The mask VT.
Guy Blank548e22a2017-05-19 12:35:15 +000034 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
Simon Pilgrimb13961d2016-06-11 14:34:10 +000035
Adam Nemet5ed17da2014-08-21 19:50:07 +000036 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
38
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +000039 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
Robert Khasanov2ea081d2014-08-25 14:49:34 +000046
Adam Nemet5ed17da2014-08-21 19:50:07 +000047 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000048 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000049
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000052 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000057
58 // Size of RC in bits, e.g. 512 for VR512.
59 int Size = VT.Size;
60
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000063 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
Ayman Musaf77219e2017-02-13 09:55:48 +000064 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
Robert Khasanov2ea081d2014-08-25 14:49:34 +000067
68 // Load patterns
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
Craig Toppera78b7682016-08-11 06:04:07 +000075 !if (!eq (Size, 512), "v8i64",
76 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000077
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
Craig Toppera78b7682016-08-11 06:04:07 +000079 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
83 VTName))), VTName));
Elena Demikhovsky2689d782015-03-02 12:46:21 +000084
Robert Khasanov2ea081d2014-08-25 14:49:34 +000085 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000086
Craig Topperd9fe6642017-02-21 04:26:10 +000087 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
91 ?));
92
Adam Nemet5ed17da2014-08-21 19:50:07 +000093 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000094 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
98 VTName,
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
101 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000102
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +0000103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
105 VTName,
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
108 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +0000111
Adam Nemet449b3f02014-10-15 23:42:09 +0000112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
Adam Nemet55536c62014-09-25 23:48:45 +0000117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
119
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122 SSEPackedInt));
Adam Nemet09377232014-10-08 23:25:31 +0000123
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +0000124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
Craig Topperabe80cc2016-08-28 06:06:28 +0000126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
Adam Nemet09377232014-10-08 23:25:31 +0000130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
Elena Demikhovskyd207f172015-03-03 15:03:35 +0000134
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
Adam Nemet5ed17da2014-08-21 19:50:07 +0000137}
138
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000139def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000141def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
Adam Nemet6bddb8c2014-09-29 22:54:41 +0000143def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
Adam Nemet5ed17da2014-08-21 19:50:07 +0000145
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000146// "x" in v32i8x_info means RC = VR256X
147def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000151def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000153
154def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
Robert Khasanov3e534c92014-10-28 16:37:13 +0000158def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000160
Elena Demikhovskyfa4a6c12014-12-09 07:06:32 +0000161// We map scalar types to the smallest (128-bit) vector type
162// with the appropriate element type. This allows to use the same masking logic.
Asaf Badouh2744d212015-09-20 14:31:19 +0000163def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000165def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
167
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000168class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
173}
174
175def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176 v16i8x_info>;
177def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178 v8i16x_info>;
179def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180 v4i32x_info>;
181def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182 v2i64x_info>;
Robert Khasanovaf318f72014-10-30 14:21:47 +0000183def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184 v4f32x_info>;
185def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186 v2f64x_info>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000187
Ayman Musa721d97f2017-06-27 12:08:37 +0000188class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189 ValueType _vt> {
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
192 ValueType KVT = _vt;
193}
194
Michael Zuckerman9e588312017-10-31 10:00:19 +0000195def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
Ayman Musa721d97f2017-06-27 12:08:37 +0000196def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000203// This multiclass generates the masking variants from the non-masking
204// variant. It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
Adam Nemet34801422014-10-08 23:25:39 +0000207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208 dag Outs,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210 string OpcodeStr,
211 string AttSrcAsm, string IntelSrcAsm,
212 list<dag> Pattern,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000215 InstrItinClass itin,
Adam Nemet34801422014-10-08 23:25:39 +0000216 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
Craig Topper9d2cab72016-01-11 01:03:40 +0000222 "$dst, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000223 Pattern, itin>;
224
225 // Prefer over VMOV*rrk Pat<>
Craig Topper63801df2017-02-19 21:44:35 +0000226 let isCommutable = IsKCommutable in
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000230 MaskingPattern, itin>,
231 EVEX_K {
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000234 }
235
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
Craig Topper63801df2017-02-19 21:44:35 +0000238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
Adam Nemet52bb6cf2014-10-08 23:25:23 +0000242 ZeroMaskingPattern,
243 itin>,
244 EVEX_KZ;
245}
246
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000247
Adam Nemet34801422014-10-08 23:25:39 +0000248// Common base class of AVX512_maskable and AVX512_maskable_3src.
249multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250 dag Outs,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252 string OpcodeStr,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000255 InstrItinClass itin,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000256 SDNode Select = vselect,
Adam Nemet34801422014-10-08 23:25:39 +0000257 string MaskingConstraint = "",
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
264 [(set _.RC:$dst,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000266 itin, MaskingConstraint, IsCommutable,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000267 IsKCommutable>;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000268
Adam Nemet2e91ee52014-08-14 17:13:19 +0000269// This multiclass generates the unconditional/non-masking, the masking and
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000270// the zero-masking variant of the vector instruction. In the masking case, the
Adam Nemet2e91ee52014-08-14 17:13:19 +0000271// perserved vector elements come from a new dummy input operand tied to $dst.
Craig Topper3a622a12017-08-17 15:40:25 +0000272// This version uses a separate dag for non-masking and masking.
273multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000277 InstrItinClass itin,
Craig Topper3a622a12017-08-17 15:40:25 +0000278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
285 [(set _.RC:$dst,
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287 [(set _.RC:$dst,
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
Craig Topper3a622a12017-08-17 15:40:25 +0000290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the vector instruction. In the masking case, the
293// perserved vector elements come from a new dummy input operand tied to $dst.
Adam Nemet34801422014-10-08 23:25:39 +0000294multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000297 dag RHS,
Simon Pilgrimaa902be2017-12-06 15:48:40 +0000298 InstrItinClass itin,
Vyacheslav Klochkov6daefcf2016-08-11 22:07:33 +0000299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
Adam Nemet34801422014-10-08 23:25:39 +0000301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000307
308// This multiclass generates the unconditional/non-masking, the masking and
309// the zero-masking variant of the scalar instruction.
310multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000313 dag RHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000314 InstrItinClass itin,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +0000315 bit IsCommutable = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000318
Adam Nemet34801422014-10-08 23:25:39 +0000319// Similar to AVX512_maskable but in this case one of the source operands
Adam Nemet2e91ee52014-08-14 17:13:19 +0000320// ($src1) is already tied to $dst so we just use that for the preserved
321// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
322// $src1.
Adam Nemet34801422014-10-08 23:25:39 +0000323multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000326 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000327 bit IsCommutable = 0,
Craig Topper1aa49ca2017-09-01 07:58:14 +0000328 bit IsKCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000329 SDNode Select = vselect,
330 bit MaskOnly = 0> :
Adam Nemet34801422014-10-08 23:25:39 +0000331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
Craig Topperb16598d2017-09-01 07:58:16 +0000335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
Simon Pilgrim07dc6d62017-12-06 13:14:44 +0000337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
Adam Nemet2e91ee52014-08-14 17:13:19 +0000339
Igor Breger15820b02015-07-01 13:24:28 +0000340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000343 dag RHS, InstrItinClass itin,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000344 bit IsCommutable = 0,
Craig Topperb16598d2017-09-01 07:58:16 +0000345 bit IsKCommutable = 0,
346 bit MaskOnly = 0> :
Craig Topper1aa49ca2017-09-01 07:58:14 +0000347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
Simon Pilgrim6a009702017-11-29 17:21:15 +0000348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
Craig Topperb16598d2017-09-01 07:58:16 +0000349 X86selects, MaskOnly>;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000350
Adam Nemet34801422014-10-08 23:25:39 +0000351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352 dag Outs, dag Ins,
353 string OpcodeStr,
354 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim569e53b2017-12-03 21:43:54 +0000355 list<dag> Pattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000356 InstrItinClass itin> :
Adam Nemet34801422014-10-08 23:25:39 +0000357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +0000360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000361 itin, "$src0 = $dst">;
Adam Nemet2b5cdbb2014-10-08 23:25:33 +0000362
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000363
364// Instruction with mask that puts result in mask register,
365// like "compare" and "vptest"
366multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367 dag Outs,
368 dag Ins, dag MaskingIns,
369 string OpcodeStr,
370 string AttSrcAsm, string IntelSrcAsm,
371 list<dag> Pattern,
Craig Topper225da2c2016-08-27 05:22:15 +0000372 list<dag> MaskingPattern,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000373 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000376 def NAME: AVX512<O, F, Outs, Ins,
Craig Topper156622a2016-01-11 00:44:56 +0000377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000379 Pattern, itin>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000380
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Craig Topper156622a2016-01-11 00:44:56 +0000382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000384 MaskingPattern, itin>, EVEX_K;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000385}
386
387multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388 dag Outs,
389 dag Ins, dag MaskingIns,
390 string OpcodeStr,
391 string AttSrcAsm, string IntelSrcAsm,
Craig Topper225da2c2016-08-27 05:22:15 +0000392 dag RHS, dag MaskingRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000393 InstrItinClass itin,
Craig Topper225da2c2016-08-27 05:22:15 +0000394 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000399
400multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000403 dag RHS, InstrItinClass itin,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000404 bit IsCommutable = 0> :
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +0000409
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000410multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000412 string AttSrcAsm, string IntelSrcAsm,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000413 InstrItinClass itin> :
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
Simon Pilgrimbb791b32017-11-30 13:18:06 +0000416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +0000417
Craig Topperabe80cc2016-08-28 06:06:28 +0000418// This multiclass generates the unconditional/non-masking, the masking and
419// the zero-masking variant of the vector instruction. In the masking case, the
420// perserved vector elements come from a new dummy input operand tied to $dst.
421multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +0000425 InstrItinClass itin,
Craig Topperabe80cc2016-08-28 06:06:28 +0000426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
432 [(set _.RC:$dst,
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434 [(set _.RC:$dst,
435 (Select _.KRCWM:$mask, MaskedRHS,
436 _.ImmAllZerosV))],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +0000437 itin, "$src0 = $dst", IsCommutable>;
Craig Topperabe80cc2016-08-28 06:06:28 +0000438
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000439
Craig Topper9d9251b2016-05-08 20:10:20 +0000440// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
Marina Yatsina6fc2aaa2018-01-22 10:05:23 +0000442// swizzled by ExecutionDomainFix to pxor.
Craig Topper9d9251b2016-05-08 20:10:20 +0000443// We set canFoldAsLoad because this can be converted to a constant-pool
444// load of an all-zeros value if folding it would be beneficial.
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000445let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper86748492016-07-11 05:36:41 +0000446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000447def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
Craig Topper9d9251b2016-05-08 20:10:20 +0000448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
Craig Topper516e14c2016-07-11 05:36:48 +0000449def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000451}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000452
Craig Topper6393afc2017-01-09 02:44:34 +0000453// Alias instructions that allow VPTERNLOG to be used with a mask to create
454// a mix of all ones and all zeros elements. This is done this way to force
455// the same register to be used as input for all three sources.
Simon Pilgrim26f106f2017-12-08 15:17:32 +0000456let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
Craig Topper6393afc2017-01-09 02:44:34 +0000457def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
467}
468
Craig Toppere5ce84a2016-05-08 21:33:53 +0000469let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
Craig Toppere5ce84a2016-05-08 21:33:53 +0000471def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475}
476
Craig Topperadd9cc62016-12-18 06:23:14 +0000477// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478// This is expanded by ExpandPostRAPseudos.
479let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
Craig Topper09b7e0f2017-01-14 07:29:24 +0000480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
Craig Topperadd9cc62016-12-18 06:23:14 +0000481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
485}
486
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000487//===----------------------------------------------------------------------===//
488// AVX-512 - VECTOR INSERT
489//
Craig Topper3a622a12017-08-17 15:40:25 +0000490
491// Supports two different pattern operators for mask and unmasked ops. Allows
492// null_frag to be passed for one.
493multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494 X86VectorVTInfo To,
495 SDPatternOperator vinsert_insert,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000496 SDPatternOperator vinsert_for_mask,
497 OpndItins itins> {
Craig Topperc228d792017-09-05 05:49:44 +0000498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
Craig Topper3a622a12017-08-17 15:40:25 +0000505 (iPTR imm)),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Topperc228d792017-09-05 05:49:44 +0000510 let mayLoad = 1 in
Craig Topper3a622a12017-08-17 15:40:25 +0000511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
Igor Breger0ede3cb2015-09-20 06:52:42 +0000513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Craig Topper3a622a12017-08-17 15:40:25 +0000517 (iPTR imm)),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemet4e2ef472014-10-02 23:18:28 +0000523 }
Adam Nemet4285c1f2014-10-15 23:42:17 +0000524}
Adam Nemet4e2ef472014-10-02 23:18:28 +0000525
Craig Topper3a622a12017-08-17 15:40:25 +0000526// Passes the same pattern operator for masked and unmasked ops.
527multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000529 SDPatternOperator vinsert_insert,
530 OpndItins itins> :
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000532
Igor Breger0ede3cb2015-09-20 06:52:42 +0000533multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
Adam Nemet4285c1f2014-10-15 23:42:17 +0000537 def : Pat<(vinsert_insert:$ins
Igor Breger0ede3cb2015-09-20 06:52:42 +0000538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543 def : Pat<(vinsert_insert:$ins
544 (To.VT To.RC:$src1),
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
546 (iPTR imm)),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
550 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551}
552
Adam Nemetb1c3ef42014-10-15 23:42:04 +0000553multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000554 ValueType EltVT64, int Opcode256,
555 OpndItins itins> {
Igor Breger0ede3cb2015-09-20 06:52:42 +0000556
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000561 vinsert128_insert, itins>, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000562
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000566 vinsert128_insert, itins>, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000567
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
Adam Nemet4e2ef472014-10-02 23:18:28 +0000569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000572
Craig Topper3a622a12017-08-17 15:40:25 +0000573 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000574 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000578 null_frag, vinsert128_insert, itins>,
579 VEX_W, EVEX_V256;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000580
Craig Topper3a622a12017-08-17 15:40:25 +0000581 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000582 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000586 null_frag, vinsert128_insert, itins>,
587 VEX_W, EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000588
Craig Topper3a622a12017-08-17 15:40:25 +0000589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
Igor Breger0ede3cb2015-09-20 06:52:42 +0000590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000592 null_frag, vinsert256_insert, itins>,
593 EVEX_V512;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000594 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595}
596
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000597// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598let Sched = WriteFShuffle256 in
599def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
601>;
602let Sched = WriteShuffle256 in
603def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605>;
606
607defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000609
Igor Breger0ede3cb2015-09-20 06:52:42 +0000610// Codegen pattern with the alternative types,
Craig Topper3a622a12017-08-17 15:40:25 +0000611// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Breger0ede3cb2015-09-20 06:52:42 +0000612defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000614defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000616
617defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000619defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000621
622defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000624defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
Igor Breger0ede3cb2015-09-20 06:52:42 +0000626
627// Codegen pattern with the alternative types insert VEC128 into VEC256
628defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632// Codegen pattern with the alternative types insert VEC128 into VEC512
633defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637// Codegen pattern with the alternative types insert VEC256 into VEC512
638defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
Craig Topperf7a19db2017-10-08 01:33:40 +0000643
644multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
648 list<Predicate> p> {
649let Predicates = p in {
650 def : Pat<(Cast.VT
651 (vselect Cast.KRCWM:$mask,
652 (bitconvert
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
655 (iPTR imm))),
656 Cast.RC:$src0)),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
660 def : Pat<(Cast.VT
661 (vselect Cast.KRCWM:$mask,
662 (bitconvert
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT
665 (bitconvert
666 (From.LdFrag addr:$src2))),
667 (iPTR imm))),
668 Cast.RC:$src0)),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673 def : Pat<(Cast.VT
674 (vselect Cast.KRCWM:$mask,
675 (bitconvert
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
678 (iPTR imm))),
679 Cast.ImmAllZerosV)),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
683 def : Pat<(Cast.VT
684 (vselect Cast.KRCWM:$mask,
685 (bitconvert
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
687 (From.VT
688 (bitconvert
689 (From.LdFrag addr:$src2))),
690 (iPTR imm))),
691 Cast.ImmAllZerosV)),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
695}
696}
697
698defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
730
731defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
749
750defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
775
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000776// vinsertps - insert f32 to XMM
Craig Topper43973152016-10-09 06:41:47 +0000777let ExeDomain = SSEPackedSingle in {
Craig Topper6189d3e2016-07-19 01:26:19 +0000778def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd255a622017-12-06 18:46:06 +0000781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
Craig Topper6189d3e2016-07-19 01:26:19 +0000783def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +0000784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
Simon Pilgrimd255a622017-12-06 18:46:06 +0000788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
Craig Topper43973152016-10-09 06:41:47 +0000790}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000791
792//===----------------------------------------------------------------------===//
793// AVX-512 VECTOR EXTRACT
794//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000795
Craig Topper3a622a12017-08-17 15:40:25 +0000796// Supports two different pattern operators for mask and unmasked ops. Allows
797// null_frag to be passed for one.
798multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000801 SDPatternOperator vextract_for_mask,
802 OpndItins itins> {
Igor Breger7f69a992015-09-10 12:54:54 +0000803
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
Craig Topper3a622a12017-08-17 15:40:25 +0000805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
Ayman Musaf77219e2017-02-13 09:55:48 +0000806 (ins From.RC:$src1, u8imm:$idx),
Igor Breger7f69a992015-09-10 12:54:54 +0000807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
Craig Topper3a622a12017-08-17 15:40:25 +0000809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
Craig Toppere1cac152016-06-07 07:27:54 +0000813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
Ayman Musaf77219e2017-02-13 09:55:48 +0000814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000821
Craig Toppere1cac152016-06-07 07:27:54 +0000822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
Ayman Musaf77219e2017-02-13 09:55:48 +0000825 From.RC:$src1, u8imm:$idx),
Craig Toppere1cac152016-06-07 07:27:54 +0000826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger7f69a992015-09-10 12:54:54 +0000831 }
Igor Bregerac29a822015-09-09 14:35:09 +0000832}
833
Craig Topper3a622a12017-08-17 15:40:25 +0000834// Passes the same pattern operator for masked and unmasked ops.
835multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836 X86VectorVTInfo To,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000837 SDPatternOperator vextract_extract,
838 OpndItins itins> :
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
Craig Topper3a622a12017-08-17 15:40:25 +0000840
Igor Bregerdefab3c2015-10-08 12:55:01 +0000841// Codegen pattern for the alternative types
842multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
Craig Topper5f3fef82016-05-22 07:40:58 +0000844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
Craig Topperdb960ed2016-05-21 22:50:14 +0000845 let Predicates = p in {
Igor Bregerdefab3c2015-10-08 12:55:01 +0000846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
848 From.RC:$src1,
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
Craig Topperdb960ed2016-05-21 22:50:14 +0000850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854 }
Igor Breger7f69a992015-09-10 12:54:54 +0000855}
856
857multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000858 ValueType EltVT64, int Opcode256,
859 OpndItins itins> {
Craig Topperaadec702017-08-14 01:53:10 +0000860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000864 vextract128_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000869 vextract256_extract, itins>,
Craig Topperaadec702017-08-14 01:53:10 +0000870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871 }
Igor Breger7f69a992015-09-10 12:54:54 +0000872 let Predicates = [HasVLX] in
Igor Bregerdefab3c2015-10-08 12:55:01 +0000873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000876 vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Craig Topper3a622a12017-08-17 15:40:25 +0000878
879 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000880 let Predicates = [HasVLX, HasDQI] in
Craig Topper3a622a12017-08-17 15:40:25 +0000881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000884 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000886
887 // Even with DQI we'd like to only use these instructions for masking.
Igor Breger7f69a992015-09-10 12:54:54 +0000888 let Predicates = [HasDQI] in {
Craig Topper3a622a12017-08-17 15:40:25 +0000889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
Igor Breger7f69a992015-09-10 12:54:54 +0000890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000892 null_frag, vextract128_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topper3a622a12017-08-17 15:40:25 +0000894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
Igor Breger7f69a992015-09-10 12:54:54 +0000895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000897 null_frag, vextract256_extract, itins>,
Igor Breger7f69a992015-09-10 12:54:54 +0000898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
899 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000900}
901
Simon Pilgrim031d8b72017-12-01 18:40:32 +0000902// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903let Sched = WriteFShuffle256 in
904def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
906>;
907let Sched = WriteShuffle256 in
908def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910>;
911
912defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000914
Igor Bregerdefab3c2015-10-08 12:55:01 +0000915// extract_subvector codegen patterns with the alternative types.
Craig Topper3a622a12017-08-17 15:40:25 +0000916// Even with AVX512DQ we'll still use these for unmasked operations.
Igor Bregerdefab3c2015-10-08 12:55:01 +0000917defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000919defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000921
922defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000924defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000926
927defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000929defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
Craig Topper3a622a12017-08-17 15:40:25 +0000930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +0000931
Craig Topper08a68572016-05-21 22:50:04 +0000932// Codegen pattern with the alternative types extract VEC128 from VEC256
Craig Topper02626c02016-05-21 07:08:56 +0000933defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938// Codegen pattern with the alternative types extract VEC128 from VEC512
Igor Bregerdefab3c2015-10-08 12:55:01 +0000939defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943// Codegen pattern with the alternative types extract VEC256 from VEC512
944defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
Craig Topper5f3fef82016-05-22 07:40:58 +0000949
Craig Topper48a79172017-08-30 07:26:12 +0000950// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951// smaller extract to enable EVEX->VEX.
952let Predicates = [NoVLX] in {
953def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956 (iPTR 1)))>;
957def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960 (iPTR 1)))>;
961def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964 (iPTR 1)))>;
965def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968 (iPTR 1)))>;
969def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972 (iPTR 1)))>;
973def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 (iPTR 1)))>;
977}
978
979// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980// smaller extract to enable EVEX->VEX.
981let Predicates = [HasVLX] in {
982def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985 (iPTR 1)))>;
986def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989 (iPTR 1)))>;
990def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993 (iPTR 1)))>;
994def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997 (iPTR 1)))>;
998def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001 (iPTR 1)))>;
1002def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005 (iPTR 1)))>;
1006}
1007
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001008
Craig Toppera0883622017-08-26 22:24:57 +00001009// Additional patterns for handling a bitcast between the vselect and the
1010// extract_subvector.
1011multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018 (bitconvert
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1021 To.RC:$src0)),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027 (bitconvert
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034}
1035}
1036
1037defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115// vextractps - extract 32 bits from XMM
Craig Topper03b849e2016-05-21 22:50:11 +00001116def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Craig Topperfc946a02015-01-25 02:21:13 +00001117 (ins VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimd255a622017-12-06 18:46:06 +00001119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001121
Craig Topper03b849e2016-05-21 22:50:11 +00001122def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Craig Topperfc946a02015-01-25 02:21:13 +00001123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +00001126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001128
1129//===---------------------------------------------------------------------===//
1130// AVX-512 BROADCAST
1131//---
Igor Breger131008f2016-05-01 08:40:00 +00001132// broadcast with a scalar argument.
1133multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
Craig Topperf6df4a62017-01-30 06:59:06 +00001135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
Igor Breger131008f2016-05-01 08:40:00 +00001149}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001150
Craig Topper17854ec2017-08-30 07:48:39 +00001151// Split version to allow mask and broadcast node to be different types. This
1152// helps support the 32x2 broadcasts.
1153multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001154 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
Igor Breger21296d22015-10-20 11:56:42 +00001162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001163 (MaskInfo.VT
1164 (bitconvert
1165 (DestInfo.VT
Craig Topperbf0de9d2017-10-13 06:07:10 +00001166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167 (MaskInfo.VT
1168 (bitconvert
1169 (DestInfo.VT
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
Craig Topperbf0de9d2017-10-13 06:07:10 +00001172 let mayLoad = 1 in
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
Igor Breger52bd1d52016-05-31 07:43:39 +00001175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper17854ec2017-08-30 07:48:39 +00001176 (MaskInfo.VT
1177 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1180 (MaskInfo.VT
1181 (bitconvert
Craig Topper17854ec2017-08-30 07:48:39 +00001182 (DestInfo.VT (X86VBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185 Sched<[SchedRM]>;
Craig Topper80934372016-07-16 03:42:59 +00001186 }
Craig Toppere1cac152016-06-07 07:27:54 +00001187
Craig Topper17854ec2017-08-30 07:48:39 +00001188 def : Pat<(MaskInfo.VT
1189 (bitconvert
Craig Topperbf0de9d2017-10-13 06:07:10 +00001190 (DestInfo.VT (UnmaskedOp
Craig Topper17854ec2017-08-30 07:48:39 +00001191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195 (bitconvert
1196 (DestInfo.VT
1197 (X86VBroadcast
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
Craig Topper80934372016-07-16 03:42:59 +00001201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
Craig Topper17854ec2017-08-30 07:48:39 +00001202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204 (bitconvert
1205 (DestInfo.VT
1206 (X86VBroadcast
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212}
Robert Khasanovaf318f72014-10-30 14:21:47 +00001213
Craig Topper17854ec2017-08-30 07:48:39 +00001214// Helper class to force mask and broadcast result to same type.
1215multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001216 SchedWrite SchedRR, SchedWrite SchedRM,
Craig Topper17854ec2017-08-30 07:48:39 +00001217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
Craig Topper17854ec2017-08-30 07:48:39 +00001221
Craig Topper80934372016-07-16 03:42:59 +00001222multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
Igor Breger21296d22015-10-20 11:56:42 +00001223 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001224 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001228 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001229 }
Robert Khasanovaf318f72014-10-30 14:21:47 +00001230
1231 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001232 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1233 WriteFShuffle256Ld, _.info256, _.info128>,
Igor Breger131008f2016-05-01 08:40:00 +00001234 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001235 EVEX_V256;
Robert Khasanovaf318f72014-10-30 14:21:47 +00001236 }
1237}
1238
Craig Topper80934372016-07-16 03:42:59 +00001239multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
Clement Courbet41a13742018-01-15 12:05:33 +00001241 let Predicates = [HasAVX512] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001244 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1245 EVEX_V512;
Clement Courbet41a13742018-01-15 12:05:33 +00001246 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001247
Craig Topper80934372016-07-16 03:42:59 +00001248 let Predicates = [HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001249 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1250 WriteFShuffle256Ld, _.info256, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001251 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1252 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001253 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1254 WriteFShuffle256Ld, _.info128, _.info128>,
Craig Topper80934372016-07-16 03:42:59 +00001255 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1256 EVEX_V128;
1257 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001258}
Craig Topper80934372016-07-16 03:42:59 +00001259defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1260 avx512vl_f32_info>;
1261defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1262 avx512vl_f64_info>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001263
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001264def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001265 (VBROADCASTSSZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001266def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001267 (VBROADCASTSDZm addr:$src)>;
Quentin Colombet4bf1c282013-10-25 17:47:18 +00001268
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001269multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1270 X86VectorVTInfo _, SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001271 RegisterClass SrcRC> {
Craig Topperfe259882017-02-26 06:45:51 +00001272 let ExeDomain = _.ExeDomain in
Igor Breger0aeda372016-02-07 08:30:50 +00001273 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001274 (ins SrcRC:$src),
1275 "vpbroadcast"##_.Suffix, "$src", "$src",
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001276 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1277 Sched<[SchedRR]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001278}
1279
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001280multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
Guy Blank7f60c992017-08-09 17:21:01 +00001281 X86VectorVTInfo _, SDPatternOperator OpNode,
1282 RegisterClass SrcRC, SubRegIndex Subreg> {
Craig Topper508aa972017-08-14 05:09:34 +00001283 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
Guy Blank7f60c992017-08-09 17:21:01 +00001284 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1285 (outs _.RC:$dst), (ins GR32:$src),
1286 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1287 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1288 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
Simon Pilgrimd3e21c62017-12-09 16:20:54 +00001289 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
Guy Blank7f60c992017-08-09 17:21:01 +00001290
1291 def : Pat <(_.VT (OpNode SrcRC:$src)),
1292 (!cast<Instruction>(Name#r)
1293 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1294
1295 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1296 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1297 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1298
1299 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1300 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1301 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302}
1303
1304multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1305 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1306 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1307 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001308 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1309 OpNode, SrcRC, Subreg>, EVEX_V512;
Guy Blank7f60c992017-08-09 17:21:01 +00001310 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001311 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1312 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1313 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1314 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
Guy Blank7f60c992017-08-09 17:21:01 +00001315 }
1316}
1317
Robert Khasanovcbc57032014-12-09 16:38:41 +00001318multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
Craig Topper49ba3f52017-02-26 06:45:48 +00001319 SDPatternOperator OpNode,
Robert Khasanovcbc57032014-12-09 16:38:41 +00001320 RegisterClass SrcRC, Predicate prd> {
1321 let Predicates = [prd] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001322 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1323 SrcRC>, EVEX_V512;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001324 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001325 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1326 SrcRC>, EVEX_V256;
1327 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1328 SrcRC>, EVEX_V128;
Robert Khasanovcbc57032014-12-09 16:38:41 +00001329 }
1330}
1331
Guy Blank7f60c992017-08-09 17:21:01 +00001332defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1333 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1334defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1335 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1336 HasBWI>;
Craig Topper49ba3f52017-02-26 06:45:48 +00001337defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1338 X86VBroadcast, GR32, HasAVX512>;
1339defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1340 X86VBroadcast, GR64, HasAVX512>, VEX_W;
Michael Liao5bf95782014-12-04 05:20:33 +00001341
Igor Breger21296d22015-10-20 11:56:42 +00001342// Provide aliases for broadcast from the same register class that
1343// automatically does the extract.
1344multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1345 X86VectorVTInfo SrcInfo> {
1346 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1347 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1348 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1349}
1350
1351multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1352 AVX512VLVectorVTInfo _, Predicate prd> {
1353 let Predicates = [prd] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001354 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1355 WriteShuffle256Ld, _.info512, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001356 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1357 EVEX_V512;
1358 // Defined separately to avoid redefinition.
1359 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1360 }
1361 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001362 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1363 WriteShuffle256Ld, _.info256, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001364 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1365 EVEX_V256;
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001366 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1367 WriteShuffleLd, _.info128, _.info128>,
Igor Breger21296d22015-10-20 11:56:42 +00001368 EVEX_V128;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00001369 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001370}
1371
Igor Breger21296d22015-10-20 11:56:42 +00001372defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1373 avx512vl_i8_info, HasBWI>;
1374defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1375 avx512vl_i16_info, HasBWI>;
1376defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1377 avx512vl_i32_info, HasAVX512>;
1378defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1379 avx512vl_i64_info, HasAVX512>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001380
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001381multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1382 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001383 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00001384 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1385 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001386 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1387 NoItinerary>, AVX5128IBase, EVEX,
1388 Sched<[WriteShuffleLd]>;
Adam Nemet73f72e12014-06-27 00:43:38 +00001389}
1390
Craig Topperd6f4be92017-08-21 05:29:02 +00001391// This should be used for the AVX512DQ broadcast instructions. It disables
1392// the unmasked patterns so that we only use the DQ instructions when masking
1393// is requested.
1394multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1395 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
Craig Topperc228d792017-09-05 05:49:44 +00001396 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperd6f4be92017-08-21 05:29:02 +00001397 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1398 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1399 (null_frag),
1400 (_Dst.VT (X86SubVBroadcast
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001401 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1402 NoItinerary>, AVX5128IBase, EVEX,
1403 Sched<[WriteShuffleLd]>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001404}
1405
Simon Pilgrim79195582017-02-21 16:41:44 +00001406let Predicates = [HasAVX512] in {
1407 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1408 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1409 (VPBROADCASTQZm addr:$src)>;
1410}
1411
Craig Topperad3d0312017-10-10 21:07:14 +00001412let Predicates = [HasVLX] in {
Simon Pilgrim79195582017-02-21 16:41:44 +00001413 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1414 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1415 (VPBROADCASTQZ128m addr:$src)>;
1416 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1417 (VPBROADCASTQZ256m addr:$src)>;
Craig Topperad3d0312017-10-10 21:07:14 +00001418}
1419let Predicates = [HasVLX, HasBWI] in {
Craig Topperbe351ee2016-10-01 06:01:23 +00001420 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1421 // This means we'll encounter truncated i32 loads; match that here.
1422 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1423 (VPBROADCASTWZ128m addr:$src)>;
1424 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1425 (VPBROADCASTWZ256m addr:$src)>;
1426 def : Pat<(v8i16 (X86VBroadcast
1427 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1428 (VPBROADCASTWZ128m addr:$src)>;
1429 def : Pat<(v16i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ256m addr:$src)>;
1432}
1433
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001434//===----------------------------------------------------------------------===//
1435// AVX-512 BROADCAST SUBVECTORS
1436//
1437
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001438defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1439 v16i32_info, v4i32x_info>,
Adam Nemet73f72e12014-06-27 00:43:38 +00001440 EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001441defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1442 v16f32_info, v4f32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1445 v8i64_info, v4i64x_info>, VEX_W,
Adam Nemet73f72e12014-06-27 00:43:38 +00001446 EVEX_V512, EVEX_CD8<64, CD8VT4>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001447defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1448 v8f64_info, v4f64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450
Craig Topper715ad7f2016-10-16 23:29:51 +00001451let Predicates = [HasAVX512] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001452def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1453 (VBROADCASTF64X4rm addr:$src)>;
1454def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1455 (VBROADCASTI64X4rm addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001456def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1457 (VBROADCASTI64X4rm addr:$src)>;
1458def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1459 (VBROADCASTI64X4rm addr:$src)>;
1460
1461// Provide fallback in case the load node that is used in the patterns above
1462// is used by additional users, which prevents the pattern selection.
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001463def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1464 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001465 (v4f64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001466def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v8f32 VR256X:$src), 1)>;
Ayman Musa7ec4ed52016-12-11 20:11:17 +00001469def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1470 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00001471 (v4i64 VR256X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001472def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v8i32 VR256X:$src), 1)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001475def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v16i16 VR256X:$src), 1)>;
1478def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v32i8 VR256X:$src), 1)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001481
Craig Topperd6f4be92017-08-21 05:29:02 +00001482def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1483 (VBROADCASTF32X4rm addr:$src)>;
1484def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1485 (VBROADCASTI32X4rm addr:$src)>;
Craig Toppera4dc3402016-10-19 04:44:17 +00001486def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1487 (VBROADCASTI32X4rm addr:$src)>;
1488def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1489 (VBROADCASTI32X4rm addr:$src)>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001490
1491// Patterns for selects of bitcasted operations.
1492def : Pat<(vselect VK16WM:$mask,
1493 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1494 (bc_v16f32 (v16i32 immAllZerosV))),
1495 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1496def : Pat<(vselect VK16WM:$mask,
1497 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1498 VR512:$src0),
1499 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1500def : Pat<(vselect VK16WM:$mask,
1501 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1502 (v16i32 immAllZerosV)),
1503 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1504def : Pat<(vselect VK16WM:$mask,
1505 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1506 VR512:$src0),
1507 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1508
1509def : Pat<(vselect VK8WM:$mask,
1510 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1511 (bc_v8f64 (v16i32 immAllZerosV))),
1512 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1513def : Pat<(vselect VK8WM:$mask,
1514 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1515 VR512:$src0),
1516 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1517def : Pat<(vselect VK8WM:$mask,
1518 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1519 (bc_v8i64 (v16i32 immAllZerosV))),
1520 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1521def : Pat<(vselect VK8WM:$mask,
1522 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1523 VR512:$src0),
1524 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001525}
1526
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001527let Predicates = [HasVLX] in {
1528defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1529 v8i32x_info, v4i32x_info>,
1530 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1531defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1532 v8f32x_info, v4f32x_info>,
1533 EVEX_V256, EVEX_CD8<32, CD8VT4>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001534
Craig Topperd6f4be92017-08-21 05:29:02 +00001535def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1536 (VBROADCASTF32X4Z256rm addr:$src)>;
1537def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1538 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001539def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1540 (VBROADCASTI32X4Z256rm addr:$src)>;
1541def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1542 (VBROADCASTI32X4Z256rm addr:$src)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001543
Craig Topper5a2bd992018-02-05 08:37:37 +00001544// Patterns for selects of bitcasted operations.
1545def : Pat<(vselect VK8WM:$mask,
1546 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1547 (bc_v8f32 (v8i32 immAllZerosV))),
1548 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1549def : Pat<(vselect VK8WM:$mask,
1550 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1551 VR256X:$src0),
1552 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1553def : Pat<(vselect VK8WM:$mask,
1554 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1555 (v8i32 immAllZerosV)),
1556 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1557def : Pat<(vselect VK8WM:$mask,
1558 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1559 VR256X:$src0),
1560 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1561
1562
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001563// Provide fallback in case the load node that is used in the patterns above
1564// is used by additional users, which prevents the pattern selection.
Craig Topperd6f4be92017-08-21 05:29:02 +00001565def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1566 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567 (v2f64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001568def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001569 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001570 (v4f32 VR128X:$src), 1)>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001571def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1572 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573 (v2i64 VR128X:$src), 1)>;
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001574def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001575 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001576 (v4i32 VR128X:$src), 1)>;
1577def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001578 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001579 (v8i16 VR128X:$src), 1)>;
1580def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
Simon Pilgrim6fe4a9e2016-08-25 15:45:27 +00001581 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
Simon Pilgrim0ad9f3e2016-08-25 12:45:16 +00001582 (v16i8 VR128X:$src), 1)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001583}
Simon Pilgrimea0d4f92016-07-22 13:58:44 +00001584
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001585let Predicates = [HasVLX, HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001586defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001587 v4i64x_info, v2i64x_info>, VEX_W,
1588 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001589defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001590 v4f64x_info, v2f64x_info>, VEX_W,
1591 EVEX_V256, EVEX_CD8<64, CD8VT2>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001592
1593// Patterns for selects of bitcasted operations.
1594def : Pat<(vselect VK4WM:$mask,
1595 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1596 (bc_v4f64 (v8i32 immAllZerosV))),
1597 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1598def : Pat<(vselect VK4WM:$mask,
1599 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1600 VR256X:$src0),
1601 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1602def : Pat<(vselect VK4WM:$mask,
1603 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1604 (bc_v4i64 (v8i32 immAllZerosV))),
1605 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1606def : Pat<(vselect VK4WM:$mask,
1607 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1608 VR256X:$src0),
1609 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
Craig Topper715ad7f2016-10-16 23:29:51 +00001610}
1611
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001612let Predicates = [HasDQI] in {
Craig Topperd6f4be92017-08-21 05:29:02 +00001613defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001614 v8i64_info, v2i64x_info>, VEX_W,
1615 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001616defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001617 v16i32_info, v8i32x_info>,
1618 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001619defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001620 v8f64_info, v2f64x_info>, VEX_W,
1621 EVEX_V512, EVEX_CD8<64, CD8VT2>;
Craig Topperd6f4be92017-08-21 05:29:02 +00001622defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001623 v16f32_info, v8f32x_info>,
1624 EVEX_V512, EVEX_CD8<32, CD8VT8>;
Craig Topper5a2bd992018-02-05 08:37:37 +00001625
1626// Patterns for selects of bitcasted operations.
1627def : Pat<(vselect VK16WM:$mask,
1628 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1629 (bc_v16f32 (v16i32 immAllZerosV))),
1630 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1631def : Pat<(vselect VK16WM:$mask,
1632 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1633 VR512:$src0),
1634 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1635def : Pat<(vselect VK16WM:$mask,
1636 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1637 (v16i32 immAllZerosV)),
1638 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1639def : Pat<(vselect VK16WM:$mask,
1640 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1641 VR512:$src0),
1642 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1643
1644def : Pat<(vselect VK8WM:$mask,
1645 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1646 (bc_v8f64 (v16i32 immAllZerosV))),
1647 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1648def : Pat<(vselect VK8WM:$mask,
1649 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1650 VR512:$src0),
1651 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1652def : Pat<(vselect VK8WM:$mask,
1653 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1654 (bc_v8i64 (v16i32 immAllZerosV))),
1655 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1656def : Pat<(vselect VK8WM:$mask,
1657 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1658 VR512:$src0),
1659 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
Elena Demikhovskyad9c3962015-05-18 06:42:57 +00001660}
Adam Nemet73f72e12014-06-27 00:43:38 +00001661
Igor Bregerfa798a92015-11-02 07:39:36 +00001662multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001663 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001664 let Predicates = [HasDQI] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001665 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1666 WriteShuffle256Ld, _Dst.info512,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001667 _Src.info512, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001668 EVEX_V512;
Igor Bregerfa798a92015-11-02 07:39:36 +00001669 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001670 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1671 WriteShuffle256Ld, _Dst.info256,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001672 _Src.info256, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001673 EVEX_V256;
Igor Bregerfa798a92015-11-02 07:39:36 +00001674}
1675
1676multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
Igor Breger52bd1d52016-05-31 07:43:39 +00001677 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1678 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
Igor Bregerfa798a92015-11-02 07:39:36 +00001679
1680 let Predicates = [HasDQI, HasVLX] in
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001681 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1682 WriteShuffleLd, _Dst.info128,
Craig Topperbf0de9d2017-10-13 06:07:10 +00001683 _Src.info128, _Src.info128, null_frag>,
Craig Topper17854ec2017-08-30 07:48:39 +00001684 EVEX_V128;
Igor Bregerfa798a92015-11-02 07:39:36 +00001685}
1686
Craig Topper51e052f2016-10-15 16:26:02 +00001687defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1688 avx512vl_i32_info, avx512vl_i64_info>;
1689defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1690 avx512vl_f32_info, avx512vl_f64_info>;
Igor Bregerfa798a92015-11-02 07:39:36 +00001691
Craig Topper52317e82017-01-15 05:47:45 +00001692let Predicates = [HasVLX] in {
1693def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1694 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1695def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1696 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1697}
1698
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001699def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001700 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001701def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1702 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1703
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001704def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
Robert Khasanovaf318f72014-10-30 14:21:47 +00001705 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
Elena Demikhovsky08ce53c2015-05-18 07:06:23 +00001706def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1707 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
Robert Khasanovdd09a8f2014-10-28 12:28:51 +00001708
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001709//===----------------------------------------------------------------------===//
1710// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1711//---
Asaf Badouh0d957b82015-11-18 09:42:45 +00001712multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1713 X86VectorVTInfo _, RegisterClass KRC> {
1714 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00001715 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrimaa902be2017-12-06 15:48:40 +00001716 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1717 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001718}
1719
Simon Pilgrimb13961d2016-06-11 14:34:10 +00001720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
Asaf Badouh0d957b82015-11-18 09:42:45 +00001721 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1722 let Predicates = [HasCDI] in
1723 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1724 let Predicates = [HasCDI, HasVLX] in {
1725 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1726 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1727 }
1728}
1729
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001730defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001731 avx512vl_i32_info, VK16>;
Elena Demikhovsky4b01b732014-10-26 09:52:24 +00001732defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
Asaf Badouh0d957b82015-11-18 09:42:45 +00001733 avx512vl_i64_info, VK8>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001734
1735//===----------------------------------------------------------------------===//
Craig Topperaad5f112015-11-30 00:13:24 +00001736// -- VPERMI2 - 3 source operands form --
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001737
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001738let Sched = WriteFShuffle256 in
1739def AVX512_PERM2_F : OpndItins<
1740 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1741>;
1742
1743let Sched = WriteShuffle256 in
1744def AVX512_PERM2_I : OpndItins<
1745 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1746>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001747
1748multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1749 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001750let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Craig Topper4fa3b502016-09-06 06:56:59 +00001751 // The index operand in the pattern should really be an integer type. However,
1752 // if we do that and it happens to come from a bitcast, then it becomes
1753 // difficult to find the bitcast needed to convert the index to the
1754 // destination type for the passthru since it will be folded with the bitcast
1755 // of the index operand.
1756 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001757 (ins _.RC:$src2, _.RC:$src3),
1758 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001759 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001760 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001761
Craig Topper4fa3b502016-09-06 06:56:59 +00001762 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001763 (ins _.RC:$src2, _.MemOp:$src3),
1764 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Topper4fa3b502016-09-06 06:56:59 +00001765 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001766 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1767 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001768 }
1769}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001770
1771multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001772 X86VectorVTInfo _> {
Craig Topper4729fe82016-10-16 04:54:31 +00001773 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Craig Topper4fa3b502016-09-06 06:56:59 +00001774 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001775 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1776 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1777 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper4fa3b502016-09-06 06:56:59 +00001778 (_.VT (X86VPermi2X _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001779 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001780 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1781 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Adam Nemetefe9c982014-07-02 21:25:58 +00001782}
1783
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001784multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper4fa3b502016-09-06 06:56:59 +00001785 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001786 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1787 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001788 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001789 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1790 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1791 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1792 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001793 }
1794}
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001795
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001796multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001797 OpndItins itins,
1798 AVX512VLVectorVTInfo VTInfo,
1799 Predicate Prd> {
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001800 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001801 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001802 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001803 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1804 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001805 }
1806}
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001807
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001808defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001809 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001810defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001811 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001812defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001813 avx512vl_i16_info, HasBWI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001814 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001815defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
Craig Topper4fa3b502016-09-06 06:56:59 +00001816 avx512vl_i8_info, HasVBMI>,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001817 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001818defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001819 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001820defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
Craig Topper4fa3b502016-09-06 06:56:59 +00001821 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyd3057e52015-06-18 08:56:19 +00001822
Craig Topperaad5f112015-11-30 00:13:24 +00001823// VPERMT2
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001824multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001825 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001826let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001827 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1828 (ins IdxVT.RC:$src2, _.RC:$src3),
1829 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00001830 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001831 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001832
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001833 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1834 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1835 OpcodeStr, "$src3, $src2", "$src2, $src3",
Craig Toppera47576f2015-11-26 20:21:29 +00001836 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001837 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1838 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001839 }
1840}
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001841multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001842 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
Craig Topper4729fe82016-10-16 04:54:31 +00001843 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001844 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1845 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1846 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1847 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Toppera47576f2015-11-26 20:21:29 +00001848 (_.VT (X86VPermt2 _.RC:$src1,
Craig Toppercada9f22016-11-22 04:57:34 +00001849 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001850 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1851 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001852}
1853
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001854multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001855 AVX512VLVectorVTInfo VTInfo,
1856 AVX512VLVectorVTInfo ShuffleMask> {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001857 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001858 ShuffleMask.info512>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001859 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001860 ShuffleMask.info512>, EVEX_V512;
1861 let Predicates = [HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001862 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001863 ShuffleMask.info128>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001864 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001865 ShuffleMask.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001866 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001867 ShuffleMask.info256>,
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001868 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001869 ShuffleMask.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001870 }
1871}
1872
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001873multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Toppera47576f2015-11-26 20:21:29 +00001874 AVX512VLVectorVTInfo VTInfo,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001875 AVX512VLVectorVTInfo Idx,
1876 Predicate Prd> {
1877 let Predicates = [Prd] in
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001878 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
Craig Toppera47576f2015-11-26 20:21:29 +00001879 Idx.info512>, EVEX_V512;
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001880 let Predicates = [Prd, HasVLX] in {
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001881 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
Craig Toppera47576f2015-11-26 20:21:29 +00001882 Idx.info128>, EVEX_V128;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001883 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
Craig Toppera47576f2015-11-26 20:21:29 +00001884 Idx.info256>, EVEX_V256;
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001885 }
1886}
Simon Pilgrim8d5e4692017-12-01 17:24:15 +00001887
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001888defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001889 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001890defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001891 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001892defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001893 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1894 VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001895defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
Michael Zuckerman4582bda2016-01-19 18:47:02 +00001896 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1897 EVEX_CD8<8, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001898defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001899 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrimfb01cb12017-12-01 17:23:06 +00001900defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
Elena Demikhovskyf07df9f2015-11-25 08:17:56 +00001901 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +00001902
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001903//===----------------------------------------------------------------------===//
1904// AVX-512 - BLEND using mask
1905//
Simon Pilgrimd4953012017-12-05 21:05:25 +00001906
Simon Pilgrim75673942017-12-06 11:23:13 +00001907let Sched = WriteFVarBlend in
1908def AVX512_BLENDM : OpndItins<
1909 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001910>;
1911
Simon Pilgrim75673942017-12-06 11:23:13 +00001912let Sched = WriteVarBlend in
1913def AVX512_PBLENDM : OpndItins<
1914 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
Simon Pilgrimd4953012017-12-05 21:05:25 +00001915>;
1916
1917multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1918 X86VectorVTInfo _> {
Craig Toppera74e3082017-01-07 22:20:34 +00001919 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001920 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1921 (ins _.RC:$src1, _.RC:$src2),
1922 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001923 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001924 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001925 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1926 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001927 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001928 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001929 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001930 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1931 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1932 !strconcat(OpcodeStr,
1933 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001934 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
Craig Toppera74e3082017-01-07 22:20:34 +00001935 let mayLoad = 1 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001936 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1937 (ins _.RC:$src1, _.MemOp:$src2),
1938 !strconcat(OpcodeStr,
Craig Topper9feea572016-01-11 00:44:58 +00001939 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001940 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1941 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001942 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1943 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001944 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00001945 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001946 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1947 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001948 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1949 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1950 !strconcat(OpcodeStr,
1951 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001952 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1953 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001954 }
Craig Toppera74e3082017-01-07 22:20:34 +00001955 }
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001956}
Simon Pilgrimd4953012017-12-05 21:05:25 +00001957multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1958 X86VectorVTInfo _> {
Craig Topper81f20aa2017-01-07 22:20:26 +00001959 let mayLoad = 1, hasSideEffects = 0 in {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001960 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1961 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1962 !strconcat(OpcodeStr,
1963 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1964 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001965 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1966 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001967
Craig Topper16b20242018-02-23 20:48:44 +00001968 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1969 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1970 !strconcat(OpcodeStr,
1971 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1972 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"),
1973 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1974 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1975
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001976 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1977 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1978 !strconcat(OpcodeStr,
1979 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1980 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
Simon Pilgrimd4953012017-12-05 21:05:25 +00001981 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1982 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper81f20aa2017-01-07 22:20:26 +00001983 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001984}
1985
Simon Pilgrimd4953012017-12-05 21:05:25 +00001986multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001987 AVX512VLVectorVTInfo VTInfo> {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001988 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1989 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001990
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001991 let Predicates = [HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00001992 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1993 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1994 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1995 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00001996 }
1997}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00001998
Simon Pilgrimd4953012017-12-05 21:05:25 +00001999multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002000 AVX512VLVectorVTInfo VTInfo> {
2001 let Predicates = [HasBWI] in
Simon Pilgrimd4953012017-12-05 21:05:25 +00002002 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002003
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002004 let Predicates = [HasBWI, HasVLX] in {
Simon Pilgrimd4953012017-12-05 21:05:25 +00002005 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
2006 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
Elena Demikhovsky949b0d42014-12-22 13:52:48 +00002007 }
2008}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002009
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002010
Simon Pilgrimd4953012017-12-05 21:05:25 +00002011defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
2012defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
2013defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
2014defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
2015defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
2016defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002017
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002018
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002019//===----------------------------------------------------------------------===//
2020// Compare Instructions
2021//===----------------------------------------------------------------------===//
2022
2023// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002024
Simon Pilgrim71660c62017-12-05 14:34:42 +00002025multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2026 OpndItins itins> {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002027 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2028 (outs _.KRC:$dst),
2029 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2030 "vcmp${cc}"#_.Suffix,
2031 "$src2, $src1", "$src1, $src2",
2032 (OpNode (_.VT _.RC:$src1),
2033 (_.VT _.RC:$src2),
Simon Pilgrim71660c62017-12-05 14:34:42 +00002034 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00002035 let mayLoad = 1 in
Craig Toppere1cac152016-06-07 07:27:54 +00002036 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2037 (outs _.KRC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00002038 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
Craig Toppere1cac152016-06-07 07:27:54 +00002039 "vcmp${cc}"#_.Suffix,
2040 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00002041 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002042 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2043 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002044
2045 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2046 (outs _.KRC:$dst),
2047 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2048 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002049 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002050 (OpNodeRnd (_.VT _.RC:$src1),
2051 (_.VT _.RC:$src2),
2052 imm:$cc,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002053 (i32 FROUND_NO_EXC)), itins.rr>,
2054 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002055 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002056 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002057 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2058 (outs VK1:$dst),
2059 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2060 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002061 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
2062 Sched<[itins.Sched]>;
Ayman Musa62d1c712017-04-13 10:03:45 +00002063 let mayLoad = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002064 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2065 (outs _.KRC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00002066 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002067 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002068 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2069 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002071
2072 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2073 (outs _.KRC:$dst),
2074 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2075 "vcmp"#_.Suffix,
Simon Pilgrim71660c62017-12-05 14:34:42 +00002076 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
2077 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002078 }// let isAsmParserOnly = 1, hasSideEffects = 0
2079
2080 let isCodeGenOnly = 1 in {
Craig Topper225da2c2016-08-27 05:22:15 +00002081 let isCommutable = 1 in
Igor Bregerb7e1f9d2015-09-20 15:15:10 +00002082 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2083 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2084 !strconcat("vcmp${cc}", _.Suffix,
2085 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2086 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2087 _.FRC:$src2,
2088 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00002089 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002090 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2091 (outs _.KRC:$dst),
2092 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2093 !strconcat("vcmp${cc}", _.Suffix,
2094 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2095 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2096 (_.ScalarLdFrag addr:$src2),
2097 imm:$cc))],
Simon Pilgrim71660c62017-12-05 14:34:42 +00002098 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2099 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002100 }
2101}
2102
2103let Predicates = [HasAVX512] in {
Craig Topperd890db62017-02-21 04:26:04 +00002104 let ExeDomain = SSEPackedSingle in
Simon Pilgrim71660c62017-12-05 14:34:42 +00002105 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2106 SSE_ALU_F32S>, AVX512XSIi8Base;
Craig Topperd890db62017-02-21 04:26:04 +00002107 let ExeDomain = SSEPackedDouble in
Simon Pilgrim71660c62017-12-05 14:34:42 +00002108 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2109 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002110}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002111
Craig Topper513d3fa2018-01-27 20:19:02 +00002112multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002113 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
Craig Topper392cd032016-09-03 16:28:03 +00002114 let isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002115 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002116 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2117 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2118 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002119 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002120 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002121 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2122 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2123 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2124 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002125 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1d81032017-06-13 07:13:47 +00002126 let isCommutable = IsCommutable in
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002127 def rrk : AVX512BI<opc, MRMSrcReg,
2128 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2129 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2130 "$dst {${mask}}, $src1, $src2}"),
2131 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2132 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002133 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002134 def rmk : AVX512BI<opc, MRMSrcMem,
2135 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2136 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2137 "$dst {${mask}}, $src1, $src2}"),
2138 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2139 (OpNode (_.VT _.RC:$src1),
2140 (_.VT (bitconvert
2141 (_.LdFrag addr:$src2))))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002142 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002143}
2144
Craig Topper513d3fa2018-01-27 20:19:02 +00002145multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002146 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2147 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002148 def rmb : AVX512BI<opc, MRMSrcMem,
2149 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2150 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2151 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2152 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2153 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002154 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002155 def rmbk : AVX512BI<opc, MRMSrcMem,
2156 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2157 _.ScalarMemOp:$src2),
2158 !strconcat(OpcodeStr,
2159 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2160 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2161 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2162 (OpNode (_.VT _.RC:$src1),
2163 (X86VBroadcast
2164 (_.ScalarLdFrag addr:$src2)))))],
Simon Pilgrima2b58622017-12-05 12:02:22 +00002165 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2166 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002167}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002168
Craig Topper513d3fa2018-01-27 20:19:02 +00002169multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002170 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2171 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002172 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002173 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002174 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002175
2176 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002177 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002178 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002179 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002180 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002181 }
2182}
2183
2184multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
Craig Topper513d3fa2018-01-27 20:19:02 +00002185 PatFrag OpNode, OpndItins itins,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002186 AVX512VLVectorVTInfo VTInfo,
2187 Predicate prd, bit IsCommutable = 0> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002188 let Predicates = [prd] in
Simon Pilgrima2b58622017-12-05 12:02:22 +00002189 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
Craig Topper392cd032016-09-03 16:28:03 +00002190 IsCommutable>, EVEX_V512;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002191
2192 let Predicates = [prd, HasVLX] in {
Simon Pilgrima2b58622017-12-05 12:02:22 +00002193 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
Craig Topper392cd032016-09-03 16:28:03 +00002194 IsCommutable>, EVEX_V256;
Simon Pilgrima2b58622017-12-05 12:02:22 +00002195 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
Craig Topper392cd032016-09-03 16:28:03 +00002196 IsCommutable>, EVEX_V128;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002197 }
2198}
2199
Craig Topper9471a7c2018-02-19 19:23:31 +00002200// This fragment treats X86cmpm as commutable to help match loads in both
2201// operands for PCMPEQ.
2202def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2203 (X86cmpm_c node:$src1, node:$src2, (i8 0))>;
Craig Topper513d3fa2018-01-27 20:19:02 +00002204def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2205 (X86cmpm node:$src1, node:$src2, (i8 6))>;
2206
Simon Pilgrima2b58622017-12-05 12:02:22 +00002207// FIXME: Is there a better scheduler itinerary for VPCMP?
Craig Topper9471a7c2018-02-19 19:23:31 +00002208defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002209 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002210 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002211
Craig Topper9471a7c2018-02-19 19:23:31 +00002212defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002213 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
Craig Toppera33846a2017-10-22 06:18:23 +00002214 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002215
Craig Topper9471a7c2018-02-19 19:23:31 +00002216defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002217 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002218 EVEX_CD8<32, CD8VF>;
2219
Craig Topper9471a7c2018-02-19 19:23:31 +00002220defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002221 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002222 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2223
2224defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002225 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002226 EVEX_CD8<8, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002227
2228defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002229 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
Craig Toppera33846a2017-10-22 06:18:23 +00002230 EVEX_CD8<16, CD8VF>, VEX_WIG;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002231
Robert Khasanovf70f7982014-09-18 14:06:55 +00002232defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002233 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002234 EVEX_CD8<32, CD8VF>;
2235
Robert Khasanovf70f7982014-09-18 14:06:55 +00002236defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Simon Pilgrima2b58622017-12-05 12:02:22 +00002237 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00002238 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002239
Craig Toppera88306e2017-10-10 06:36:46 +00002240// Transforms to swizzle an immediate to help matching memory operand in first
2241// operand.
2242def CommutePCMPCC : SDNodeXForm<imm, [{
2243 uint8_t Imm = N->getZExtValue() & 0x7;
Craig Topper9b64bf52018-02-20 03:58:11 +00002244 Imm = X86::getSwappedVPCMPImm(Imm);
Craig Toppera88306e2017-10-10 06:36:46 +00002245 return getI8Imm(Imm, SDLoc(N));
2246}]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002247
Robert Khasanov29e3b962014-08-27 09:34:37 +00002248multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002249 OpndItins itins, X86VectorVTInfo _> {
Craig Topper149e6bd2016-09-09 01:36:10 +00002250 let isCommutable = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002251 def rri : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002252 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002253 !strconcat("vpcmp${cc}", Suffix,
2254 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002255 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2256 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002257 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002258 def rmi : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002259 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00002260 !strconcat("vpcmp${cc}", Suffix,
2261 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002262 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2263 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002264 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002265 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper8b876762017-06-13 07:13:50 +00002266 let isCommutable = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002267 def rrik : AVX512AIi8<opc, MRMSrcReg,
2268 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002269 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002270 !strconcat("vpcmp${cc}", Suffix,
2271 "\t{$src2, $src1, $dst {${mask}}|",
2272 "$dst {${mask}}, $src1, $src2}"),
2273 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2274 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Craig Topper6e3a5822014-12-27 20:08:45 +00002275 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002276 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002277 def rmik : AVX512AIi8<opc, MRMSrcMem,
2278 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002279 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002280 !strconcat("vpcmp${cc}", Suffix,
2281 "\t{$src2, $src1, $dst {${mask}}|",
2282 "$dst {${mask}}, $src1, $src2}"),
2283 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2284 (OpNode (_.VT _.RC:$src1),
2285 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Craig Topper6e3a5822014-12-27 20:08:45 +00002286 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002287 itins.rm>, EVEX_4V, EVEX_K,
2288 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002289
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002290 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002291 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002292 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002293 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002294 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2295 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002296 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002297 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002298 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002299 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002300 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2301 "$dst, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002302 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002303 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2304 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002305 u8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00002306 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002307 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2308 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002309 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
Craig Topper9f4d4852015-01-20 12:15:30 +00002310 let mayLoad = 1 in
Robert Khasanov29e3b962014-08-27 09:34:37 +00002311 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2312 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002313 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002314 !strconcat("vpcmp", Suffix,
2315 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2316 "$dst {${mask}}, $src1, $src2, $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002317 [], itins.rm>, EVEX_4V, EVEX_K,
2318 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002319 }
Craig Toppera88306e2017-10-10 06:36:46 +00002320
2321 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2322 (_.VT _.RC:$src1), imm:$cc),
2323 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2324 (CommutePCMPCC imm:$cc))>;
2325
2326 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2327 (_.VT _.RC:$src1), imm:$cc)),
2328 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2329 _.RC:$src1, addr:$src2,
2330 (CommutePCMPCC imm:$cc))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002331}
2332
Robert Khasanov29e3b962014-08-27 09:34:37 +00002333multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002334 OpndItins itins, X86VectorVTInfo _> :
2335 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002336 def rmib : AVX512AIi8<opc, MRMSrcMem,
2337 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002338 AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002339 !strconcat("vpcmp${cc}", Suffix,
2340 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2341 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2342 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2343 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002344 imm:$cc))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002345 itins.rm>, EVEX_4V, EVEX_B,
2346 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002347 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2348 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7d3c6d32015-01-28 10:09:56 +00002349 _.ScalarMemOp:$src2, AVX512ICC:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002350 !strconcat("vpcmp${cc}", Suffix,
2351 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2352 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2353 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2354 (OpNode (_.VT _.RC:$src1),
2355 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
Craig Topper6e3a5822014-12-27 20:08:45 +00002356 imm:$cc)))],
Simon Pilgrimaa911552017-12-05 12:14:36 +00002357 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2358 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002359
Robert Khasanov29e3b962014-08-27 09:34:37 +00002360 // Accept explicit immediate argument form instead of comparison code.
Craig Topper9f4d4852015-01-20 12:15:30 +00002361 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002362 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2363 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002364 u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002365 !strconcat("vpcmp", Suffix,
2366 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2367 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002368 [], itins.rm>, EVEX_4V, EVEX_B,
2369 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002370 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2371 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
Craig Topper7ff6ab32015-01-21 08:43:49 +00002372 _.ScalarMemOp:$src2, u8imm:$cc),
Robert Khasanov29e3b962014-08-27 09:34:37 +00002373 !strconcat("vpcmp", Suffix,
2374 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2375 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
Simon Pilgrimaa911552017-12-05 12:14:36 +00002376 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2377 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002378 }
Craig Toppera88306e2017-10-10 06:36:46 +00002379
2380 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2381 (_.VT _.RC:$src1), imm:$cc),
2382 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2383 (CommutePCMPCC imm:$cc))>;
2384
2385 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2386 (_.ScalarLdFrag addr:$src2)),
2387 (_.VT _.RC:$src1), imm:$cc)),
2388 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2389 _.RC:$src1, addr:$src2,
2390 (CommutePCMPCC imm:$cc))>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002391}
2392
2393multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002394 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2395 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002396 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002397 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2398 EVEX_V512;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002399
2400 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002401 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2402 EVEX_V256;
2403 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2404 EVEX_V128;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002405 }
2406}
2407
2408multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
Simon Pilgrimaa911552017-12-05 12:14:36 +00002409 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2410 Predicate prd> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00002411 let Predicates = [prd] in
Simon Pilgrimaa911552017-12-05 12:14:36 +00002412 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002413 EVEX_V512;
2414
2415 let Predicates = [prd, HasVLX] in {
Simon Pilgrimaa911552017-12-05 12:14:36 +00002416 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002417 EVEX_V256;
Simon Pilgrimaa911552017-12-05 12:14:36 +00002418 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
Robert Khasanov29e3b962014-08-27 09:34:37 +00002419 EVEX_V128;
2420 }
2421}
2422
Simon Pilgrimaa911552017-12-05 12:14:36 +00002423// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2424defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2425 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2426defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2427 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002428
Simon Pilgrimaa911552017-12-05 12:14:36 +00002429defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2430 avx512vl_i16_info, HasBWI>,
2431 VEX_W, EVEX_CD8<16, CD8VF>;
2432defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2433 avx512vl_i16_info, HasBWI>,
2434 VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002435
Simon Pilgrimaa911552017-12-05 12:14:36 +00002436defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2437 avx512vl_i32_info, HasAVX512>,
2438 EVEX_CD8<32, CD8VF>;
2439defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2440 avx512vl_i32_info, HasAVX512>,
2441 EVEX_CD8<32, CD8VF>;
Robert Khasanov29e3b962014-08-27 09:34:37 +00002442
Simon Pilgrimaa911552017-12-05 12:14:36 +00002443defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2444 avx512vl_i64_info, HasAVX512>,
2445 VEX_W, EVEX_CD8<64, CD8VF>;
2446defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2447 avx512vl_i64_info, HasAVX512>,
2448 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002449
Ayman Musa721d97f2017-06-27 12:08:37 +00002450
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002451multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002452 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2453 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2454 "vcmp${cc}"#_.Suffix,
2455 "$src2, $src1", "$src1, $src2",
2456 (X86cmpm (_.VT _.RC:$src1),
2457 (_.VT _.RC:$src2),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002458 imm:$cc), itins.rr, 1>,
2459 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002460
Craig Toppere1cac152016-06-07 07:27:54 +00002461 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2462 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2463 "vcmp${cc}"#_.Suffix,
2464 "$src2, $src1", "$src1, $src2",
2465 (X86cmpm (_.VT _.RC:$src1),
2466 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002467 imm:$cc), itins.rm>,
2468 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002469
Craig Toppere1cac152016-06-07 07:27:54 +00002470 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2471 (outs _.KRC:$dst),
2472 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2473 "vcmp${cc}"#_.Suffix,
2474 "${src2}"##_.BroadcastStr##", $src1",
2475 "$src1, ${src2}"##_.BroadcastStr,
2476 (X86cmpm (_.VT _.RC:$src1),
2477 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002478 imm:$cc), itins.rm>,
2479 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002480 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00002481 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002482 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2483 (outs _.KRC:$dst),
2484 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2485 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002486 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2487 Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002488
2489 let mayLoad = 1 in {
2490 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2491 (outs _.KRC:$dst),
2492 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2493 "vcmp"#_.Suffix,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002494 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2495 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002496
2497 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2498 (outs _.KRC:$dst),
2499 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2500 "vcmp"#_.Suffix,
2501 "$cc, ${src2}"##_.BroadcastStr##", $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002502 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2503 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002504 }
Craig Topper61956982017-09-30 17:02:39 +00002505 }
2506
2507 // Patterns for selecting with loads in other operand.
2508 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2509 CommutableCMPCC:$cc),
2510 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2511 imm:$cc)>;
2512
2513 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2514 (_.VT _.RC:$src1),
2515 CommutableCMPCC:$cc)),
2516 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2517 _.RC:$src1, addr:$src2,
2518 imm:$cc)>;
2519
2520 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2521 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2522 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2523 imm:$cc)>;
2524
2525 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2526 (_.ScalarLdFrag addr:$src2)),
2527 (_.VT _.RC:$src1),
2528 CommutableCMPCC:$cc)),
2529 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2530 _.RC:$src1, addr:$src2,
2531 imm:$cc)>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002532}
2533
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002534multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002535 // comparison code form (VCMP[EQ/LT/LE/...]
2536 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2537 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2538 "vcmp${cc}"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002539 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002540 (X86cmpmRnd (_.VT _.RC:$src1),
2541 (_.VT _.RC:$src2),
2542 imm:$cc,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002543 (i32 FROUND_NO_EXC)), itins.rr>,
2544 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002545
2546 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2547 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2548 (outs _.KRC:$dst),
2549 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2550 "vcmp"#_.Suffix,
Craig Topperbfe13ff2016-01-11 00:44:52 +00002551 "$cc, {sae}, $src2, $src1",
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002552 "$src1, $src2, {sae}, $cc", itins.rr>,
2553 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002554 }
2555}
2556
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002557multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002558 let Predicates = [HasAVX512] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002559 defm Z : avx512_vcmp_common<itins, _.info512>,
2560 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002561
2562 }
2563 let Predicates = [HasAVX512,HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002564 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2565 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002566 }
2567}
2568
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002569defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002570 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00002571defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002572 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002573
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00002574
Craig Topper61956982017-09-30 17:02:39 +00002575// Patterns to select fp compares with load as first operand.
2576let Predicates = [HasAVX512] in {
2577 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2578 CommutableCMPCC:$cc)),
2579 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2580
2581 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2582 CommutableCMPCC:$cc)),
2583 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2584}
2585
Asaf Badouh572bbce2015-09-20 08:46:07 +00002586// ----------------------------------------------------------------
2587// FPClass
Asaf Badouh696e8e02015-10-18 11:04:38 +00002588//handle fpclass instruction mask = op(reg_scalar,imm)
2589// op(mem_scalar,imm)
2590multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002591 OpndItins itins, X86VectorVTInfo _,
2592 Predicate prd> {
Craig Topper4a638432017-11-11 06:57:44 +00002593 let Predicates = [prd], ExeDomain = _.ExeDomain in {
Craig Topper702097d2017-08-20 18:30:24 +00002594 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
Asaf Badouh696e8e02015-10-18 11:04:38 +00002595 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002596 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh696e8e02015-10-18 11:04:38 +00002597 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002598 (i32 imm:$src2)))], itins.rr>,
2599 Sched<[itins.Sched]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002600 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2601 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2602 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002603 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002604 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002605 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002606 (i32 imm:$src2))))], itins.rr>,
2607 EVEX_K, Sched<[itins.Sched]>;
Craig Topper63801df2017-02-19 21:44:35 +00002608 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002609 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002610 OpcodeStr##_.Suffix##
2611 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2612 [(set _.KRC:$dst,
Craig Topperca8abed2017-11-13 06:46:48 +00002613 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002614 (i32 imm:$src2)))], itins.rm>,
2615 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper63801df2017-02-19 21:44:35 +00002616 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
Craig Topperca8abed2017-11-13 06:46:48 +00002617 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
Craig Topper63801df2017-02-19 21:44:35 +00002618 OpcodeStr##_.Suffix##
2619 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2620 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Craig Topperca8abed2017-11-13 06:46:48 +00002621 (OpNode _.ScalarIntMemCPat:$src1,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002622 (i32 imm:$src2))))], itins.rm>,
2623 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002624 }
2625}
2626
Asaf Badouh572bbce2015-09-20 08:46:07 +00002627//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2628// fpclass(reg_vec, mem_vec, imm)
2629// fpclass(reg_vec, broadcast(eltVt), imm)
2630multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002631 OpndItins itins, X86VectorVTInfo _,
2632 string mem, string broadcast>{
Craig Topper4a638432017-11-11 06:57:44 +00002633 let ExeDomain = _.ExeDomain in {
Asaf Badouh572bbce2015-09-20 08:46:07 +00002634 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2635 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topper048e7002016-01-08 06:09:20 +00002636 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Asaf Badouh572bbce2015-09-20 08:46:07 +00002637 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002638 (i32 imm:$src2)))], itins.rr>,
2639 Sched<[itins.Sched]>;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002640 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2641 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2642 OpcodeStr##_.Suffix#
Craig Topper048e7002016-01-08 06:09:20 +00002643 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002644 [(set _.KRC:$dst,(or _.KRCWM:$mask,
Asaf Badouh572bbce2015-09-20 08:46:07 +00002645 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002646 (i32 imm:$src2))))], itins.rr>,
2647 EVEX_K, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002648 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2649 (ins _.MemOp:$src1, i32u8imm:$src2),
2650 OpcodeStr##_.Suffix##mem#
2651 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002652 [(set _.KRC:$dst,(OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002653 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002654 (i32 imm:$src2)))], itins.rm>,
2655 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002656 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2657 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2658 OpcodeStr##_.Suffix##mem#
2659 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002660 [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
Craig Toppere1cac152016-06-07 07:27:54 +00002661 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002662 (i32 imm:$src2))))], itins.rm>,
2663 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002664 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2665 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2666 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2667 _.BroadcastStr##", $dst|$dst, ${src1}"
2668 ##_.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002669 [(set _.KRC:$dst,(OpNode
2670 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002671 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002672 (i32 imm:$src2)))], itins.rm>,
2673 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002674 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2675 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2676 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2677 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2678 _.BroadcastStr##", $src2}",
Simon Pilgrimb13961d2016-06-11 14:34:10 +00002679 [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2680 (_.VT (X86VBroadcast
Craig Toppere1cac152016-06-07 07:27:54 +00002681 (_.ScalarLdFrag addr:$src1))),
Simon Pilgrim54c60832017-12-01 16:51:48 +00002682 (i32 imm:$src2))))], itins.rm>,
2683 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper4a638432017-11-11 06:57:44 +00002684 }
Asaf Badouh572bbce2015-09-20 08:46:07 +00002685}
2686
Simon Pilgrim54c60832017-12-01 16:51:48 +00002687multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2688 bits<8> opc, SDNode OpNode,
2689 OpndItins itins, Predicate prd,
2690 string broadcast>{
Asaf Badouh572bbce2015-09-20 08:46:07 +00002691 let Predicates = [prd] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002692 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2693 _.info512, "{z}", broadcast>, EVEX_V512;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002694 }
2695 let Predicates = [prd, HasVLX] in {
Simon Pilgrim54c60832017-12-01 16:51:48 +00002696 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2697 _.info128, "{x}", broadcast>, EVEX_V128;
2698 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2699 _.info256, "{y}", broadcast>, EVEX_V256;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002700 }
2701}
2702
Simon Pilgrim54c60832017-12-01 16:51:48 +00002703// FIXME: Is there a better scheduler itinerary for VFPCLASS?
Asaf Badouh572bbce2015-09-20 08:46:07 +00002704multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
Asaf Badouh696e8e02015-10-18 11:04:38 +00002705 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002706 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002707 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2708 EVEX_CD8<32, CD8VF>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002709 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002710 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2711 EVEX_CD8<64, CD8VF> , VEX_W;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002712 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002713 SSE_ALU_F32S, f32x_info, prd>,
2714 EVEX_CD8<32, CD8VT1>;
Asaf Badouh696e8e02015-10-18 11:04:38 +00002715 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
Simon Pilgrim54c60832017-12-01 16:51:48 +00002716 SSE_ALU_F64S, f64x_info, prd>,
2717 EVEX_CD8<64, CD8VT1>, VEX_W;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002718}
2719
Asaf Badouh696e8e02015-10-18 11:04:38 +00002720defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2721 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
Asaf Badouh572bbce2015-09-20 08:46:07 +00002722
Elena Demikhovsky29792e92015-05-07 11:24:42 +00002723//-----------------------------------------------------------------
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002724// Mask register copy, including
2725// - copy between mask registers
2726// - load/store mask registers
2727// - copy from GPR to mask register and vice versa
2728//
2729multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2730 string OpcodeStr, RegisterClass KRC,
Elena Demikhovskyba846722015-02-17 09:20:12 +00002731 ValueType vvt, X86MemOperand x86memop> {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002732 let hasSideEffects = 0, SchedRW = [WriteMove] in
Craig Toppere1cac152016-06-07 07:27:54 +00002733 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002734 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrim07e13372018-02-12 16:59:04 +00002735 IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002736 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2737 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim07e13372018-02-12 16:59:04 +00002738 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>,
2739 Sched<[WriteLoad]>;
Craig Toppere1cac152016-06-07 07:27:54 +00002740 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2741 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim07e13372018-02-12 16:59:04 +00002742 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>,
2743 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002744}
2745
2746multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2747 string OpcodeStr,
2748 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002749 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002750 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002751 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2752 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002753 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002754 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2755 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002756 }
2757}
2758
Robert Khasanov74acbb72014-07-23 14:49:42 +00002759let Predicates = [HasDQI] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002760 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002761 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2762 VEX, PD;
2763
2764let Predicates = [HasAVX512] in
Elena Demikhovskyba846722015-02-17 09:20:12 +00002765 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002766 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002767 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002768
2769let Predicates = [HasBWI] in {
Elena Demikhovskyba846722015-02-17 09:20:12 +00002770 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2771 VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002772 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2773 VEX, XD;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002774 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2775 VEX, PS, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002776 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2777 VEX, XD, VEX_W;
2778}
2779
2780// GR from/to mask register
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002781def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002782 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002783def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002784 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002785
2786def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002787 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002788def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
Craig Topper058f2f62017-03-28 16:35:29 +00002789 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002790
2791def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002792 (KMOVWrk VK16:$src)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002793def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002794 (COPY_TO_REGCLASS VK16:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002795
2796def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
Igor Bregera2f8ca92016-09-05 08:26:51 +00002797 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002798def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
Craig Topper058f2f62017-03-28 16:35:29 +00002799 (COPY_TO_REGCLASS VK8:$src, GR32)>;
Elena Demikhovskydca03be2016-08-07 13:05:58 +00002800
2801def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2802 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2803def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2804 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2805def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2806 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2807def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2808 (COPY_TO_REGCLASS VK64:$src, GR64)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002809
Robert Khasanov74acbb72014-07-23 14:49:42 +00002810// Load/store kreg
2811let Predicates = [HasDQI] in {
Elena Demikhovsky9f83c732015-09-02 09:20:58 +00002812 def : Pat<(store VK4:$src, addr:$dst),
2813 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2814 def : Pat<(store VK2:$src, addr:$dst),
2815 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002816 def : Pat<(store VK1:$src, addr:$dst),
2817 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002818
2819 def : Pat<(v2i1 (load addr:$src)),
2820 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2821 def : Pat<(v4i1 (load addr:$src)),
2822 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
Elena Demikhovskyba846722015-02-17 09:20:12 +00002823}
2824let Predicates = [HasAVX512, NoDQI] in {
Igor Bregerd6c187b2016-01-27 08:43:25 +00002825 def : Pat<(store VK1:$src, addr:$dst),
2826 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002827 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2828 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002829 def : Pat<(store VK2:$src, addr:$dst),
2830 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002831 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2832 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002833 def : Pat<(store VK4:$src, addr:$dst),
2834 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002835 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2836 sub_8bit)))>;
Igor Bregerd6c187b2016-01-27 08:43:25 +00002837 def : Pat<(store VK8:$src, addr:$dst),
2838 (MOV8mr addr:$dst,
Craig Topperd9f51352017-03-29 07:31:56 +00002839 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2840 sub_8bit)))>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00002841
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002842 def : Pat<(v8i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002843 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002844 def : Pat<(v2i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002845 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002846 def : Pat<(v4i1 (load addr:$src)),
Craig Topper99e30e62016-06-14 03:13:00 +00002847 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002848}
Elena Demikhovsky5e426f72016-04-03 08:41:12 +00002849
Robert Khasanov74acbb72014-07-23 14:49:42 +00002850let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002851 def : Pat<(v1i1 (load addr:$src)),
Craig Toppera362dee2017-12-31 07:38:33 +00002852 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
Craig Topper876ec0b2017-12-31 07:38:41 +00002853 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2854 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002855}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00002856
Robert Khasanov74acbb72014-07-23 14:49:42 +00002857let Predicates = [HasAVX512] in {
Guy Blank548e22a2017-05-19 12:35:15 +00002858 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2859 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2860 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002861
Craig Topperee1e71e2017-12-17 01:35:48 +00002862 def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
Guy Blank548e22a2017-05-19 12:35:15 +00002863 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002864
Guy Blank548e22a2017-05-19 12:35:15 +00002865 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2866 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
Guy Blank548e22a2017-05-19 12:35:15 +00002867 }
Elena Demikhovsky6e9b1602016-07-31 06:48:01 +00002868
Guy Blank548e22a2017-05-19 12:35:15 +00002869 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2870 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2871 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2872 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2873 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2874 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2875 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
Elena Demikhovskyb906df92016-09-13 07:57:00 +00002876
Craig Topper26a701f2018-01-23 05:36:53 +00002877 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2878 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
Guy Blank548e22a2017-05-19 12:35:15 +00002879 (COPY_TO_REGCLASS
Craig Topper26a701f2018-01-23 05:36:53 +00002880 (KMOVWkr (AND32ri8
2881 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2882 (i32 1))), VK16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002883}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002884
2885// Mask unary operation
2886// - KNOT
2887multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00002888 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002889 OpndItins itins, Predicate prd> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002890 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002891 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00002892 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002893 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2894 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002895}
2896
Robert Khasanov74acbb72014-07-23 14:49:42 +00002897multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002898 SDPatternOperator OpNode, OpndItins itins> {
Robert Khasanov74acbb72014-07-23 14:49:42 +00002899 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002900 itins, HasDQI>, VEX, PD;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002901 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002902 itins, HasAVX512>, VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002903 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002904 itins, HasBWI>, VEX, PD, VEX_W;
Robert Khasanov74acbb72014-07-23 14:49:42 +00002905 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002906 itins, HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002907}
2908
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002909defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002910
Robert Khasanov74acbb72014-07-23 14:49:42 +00002911// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
Craig Topper7b9cc142016-11-03 06:04:28 +00002912let Predicates = [HasAVX512, NoDQI] in
2913def : Pat<(vnot VK8:$src),
2914 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2915
2916def : Pat<(vnot VK4:$src),
2917 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2918def : Pat<(vnot VK2:$src),
2919 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002920
2921// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00002922// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002923multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00002924 RegisterClass KRC, SDPatternOperator OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002925 OpndItins itins, Predicate prd, bit IsCommutable> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002926 let Predicates = [prd], isCommutable = IsCommutable in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002927 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2928 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00002929 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002930 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2931 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002932}
2933
Robert Khasanov595683d2014-07-28 13:46:45 +00002934multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002935 SDPatternOperator OpNode, OpndItins itins,
2936 bit IsCommutable, Predicate prdW = HasAVX512> {
Robert Khasanov595683d2014-07-28 13:46:45 +00002937 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002938 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002939 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002940 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
Robert Khasanov595683d2014-07-28 13:46:45 +00002941 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002942 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
Robert Khasanov595683d2014-07-28 13:46:45 +00002943 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002944 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002945}
2946
2947def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2948def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002949// These nodes use 'vnot' instead of 'not' to support vectors.
2950def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2951def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002952
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002953defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2954defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2955defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2956defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2957defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
Craig Topper3ce035a2018-02-12 01:33:38 +00002958defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SSE_BIT_ITINS_P, 1, HasDQI>;
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00002959
Craig Topper7b9cc142016-11-03 06:04:28 +00002960multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2961 Instruction Inst> {
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002962 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2963 // for the DQI set, this type is legal and KxxxB instruction is used
2964 let Predicates = [NoDQI] in
Craig Topper7b9cc142016-11-03 06:04:28 +00002965 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002966 (COPY_TO_REGCLASS
2967 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2968 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2969
2970 // All types smaller than 8 bits require conversion anyway
2971 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2972 (COPY_TO_REGCLASS (Inst
2973 (COPY_TO_REGCLASS VK1:$src1, VK16),
2974 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002975 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002976 (COPY_TO_REGCLASS (Inst
2977 (COPY_TO_REGCLASS VK2:$src1, VK16),
2978 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
Craig Topper7b9cc142016-11-03 06:04:28 +00002979 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002980 (COPY_TO_REGCLASS (Inst
2981 (COPY_TO_REGCLASS VK4:$src1, VK16),
2982 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002983}
2984
Craig Topper7b9cc142016-11-03 06:04:28 +00002985defm : avx512_binop_pat<and, and, KANDWrr>;
2986defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2987defm : avx512_binop_pat<or, or, KORWrr>;
2988defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2989defm : avx512_binop_pat<xor, xor, KXORWrr>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00002990
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002991// Mask unpacking
Igor Bregera54a1a82015-09-08 13:10:00 +00002992multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002993 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
Igor Bregera54a1a82015-09-08 13:10:00 +00002994 let Predicates = [prd] in {
Craig Topperad2ce362016-01-05 07:44:08 +00002995 let hasSideEffects = 0 in
Igor Bregera54a1a82015-09-08 13:10:00 +00002996 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2997 (ins KRC:$src1, KRC:$src2),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00002998 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2999 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
Igor Bregera54a1a82015-09-08 13:10:00 +00003000
3001 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3002 (!cast<Instruction>(NAME##rr)
3003 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3004 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3005 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003006}
3007
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003008defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
3009defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
3010defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003011
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003012// Mask bit testing
3013multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003014 SDNode OpNode, OpndItins itins, Predicate prd> {
Igor Breger5ea0a6812015-08-31 13:30:19 +00003015 let Predicates = [prd], Defs = [EFLAGS] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003016 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Craig Topperedb09112014-11-25 20:11:23 +00003017 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003018 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
3019 Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003020}
3021
Igor Breger5ea0a6812015-08-31 13:30:19 +00003022multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003023 OpndItins itins, Predicate prdW = HasAVX512> {
3024 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00003025 VEX, PD;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003026 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00003027 VEX, PS;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003028 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00003029 VEX, PS, VEX_W;
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003030 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
Igor Breger5ea0a6812015-08-31 13:30:19 +00003031 VEX, PD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003032}
3033
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003034defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
3035defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00003036
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003037// Mask shift
3038multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003039 SDNode OpNode, OpndItins itins> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003040 let Predicates = [HasAVX512] in
Craig Topper7ff6ab32015-01-21 08:43:49 +00003041 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003042 !strconcat(OpcodeStr,
Craig Topperedb09112014-11-25 20:11:23 +00003043 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003044 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
3045 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003046}
3047
3048multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003049 SDNode OpNode, OpndItins itins> {
3050 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3051 itins>, VEX, TAPD, VEX_W;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00003052 let Predicates = [HasDQI] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003053 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3054 itins>, VEX, TAPD;
Elena Demikhovsky1a603b32015-01-25 12:47:15 +00003055 let Predicates = [HasBWI] in {
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003056 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3057 itins>, VEX, TAPD, VEX_W;
3058 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3059 itins>, VEX, TAPD;
Michael Liao66233b72015-08-06 09:06:20 +00003060 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003061}
3062
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003063defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
3064defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003065
Craig Topper513d3fa2018-01-27 20:19:02 +00003066multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00003067 X86VectorVTInfo Narrow,
3068 X86VectorVTInfo Wide> {
Craig Topper5e4b4532018-01-27 23:49:14 +00003069 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00003070 (Narrow.VT Narrow.RC:$src2))),
3071 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00003072 (!cast<Instruction>(InstStr#"Zrr")
Craig Topperd58c1652018-01-07 18:20:37 +00003073 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3074 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3075 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003076
Craig Topper5e4b4532018-01-27 23:49:14 +00003077 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3078 (Frag (Narrow.VT Narrow.RC:$src1),
Craig Topperd58c1652018-01-07 18:20:37 +00003079 (Narrow.VT Narrow.RC:$src2)))),
Craig Toppereb5c4112017-09-24 05:24:52 +00003080 (COPY_TO_REGCLASS
Craig Topper5e4b4532018-01-27 23:49:14 +00003081 (!cast<Instruction>(InstStr#"Zrrk")
Craig Topperd58c1652018-01-07 18:20:37 +00003082 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3083 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3084 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3085 Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003086}
3087
3088multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
Craig Topperd58c1652018-01-07 18:20:37 +00003089 X86VectorVTInfo Narrow,
3090 X86VectorVTInfo Wide> {
3091def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3092 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3093 (COPY_TO_REGCLASS
3094 (!cast<Instruction>(InstStr##Zrri)
3095 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3096 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3097 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003098
Craig Topperd58c1652018-01-07 18:20:37 +00003099def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3100 (OpNode (Narrow.VT Narrow.RC:$src1),
3101 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3102 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3103 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3104 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3105 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3106 imm:$cc), Narrow.KRC)>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003107}
3108
3109let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003110 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003111 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003112
Craig Topperd58c1652018-01-07 18:20:37 +00003113 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003114 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003115
3116 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003117 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003118
3119 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003120 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003121
3122 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3123 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v8i32x_info, v16i32_info>;
3125
3126 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3127 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3128 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", v4i32x_info, v16i32_info>;
3129
3130 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3131 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3132 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v4i64x_info, v8i64_info>;
3133
3134 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3135 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3136 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUQ", v2i64x_info, v8i64_info>;
Ayman Musa721d97f2017-06-27 12:08:37 +00003137}
3138
Craig Toppera2018e792018-01-08 06:53:52 +00003139let Predicates = [HasBWI, NoVLX] in {
3140 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003141 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003142
3143 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003144 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003145
3146 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003147 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003148
3149 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
Craig Topper9471a7c2018-02-19 19:23:31 +00003150 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
Craig Toppera2018e792018-01-08 06:53:52 +00003151
3152 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3153 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
3154
3155 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3156 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v16i8x_info, v64i8_info>;
3157
3158 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3159 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v16i16x_info, v32i16_info>;
3160
3161 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3162 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUW", v8i16x_info, v32i16_info>;
3163}
3164
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003165// Mask setting all 0s or 1s
3166multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3167 let Predicates = [HasAVX512] in
Simon Pilgrim9afbe772017-12-06 19:36:00 +00003168 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3169 SchedRW = [WriteZero] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003170 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3171 [(set KRC:$dst, (VT Val))]>;
3172}
3173
3174multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003175 defm W : avx512_mask_setop<VK16, v16i1, Val>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003176 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3177 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003178}
3179
3180defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3181defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3182
3183// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3184let Predicates = [HasAVX512] in {
3185 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
Igor Breger86724082016-08-14 05:25:07 +00003186 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3187 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003188 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003189 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyd1084c52015-04-27 12:57:59 +00003190 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3191 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
Guy Blank548e22a2017-05-19 12:35:15 +00003192 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003193}
Igor Bregerf1bd7612016-03-06 07:46:03 +00003194
3195// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3196multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3197 RegisterClass RC, ValueType VT> {
3198 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3199 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003200
Igor Bregerf1bd7612016-03-06 07:46:03 +00003201 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00003202 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003203}
Guy Blank548e22a2017-05-19 12:35:15 +00003204defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3205defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3206defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3207defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3208defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3209defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
Igor Bregerf1bd7612016-03-06 07:46:03 +00003210
3211defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3212defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3213defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3214defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3215defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3216
3217defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3218defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3219defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3220defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3221
3222defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3223defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3224defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3225
3226defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3227defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3228
3229defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003230
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003231//===----------------------------------------------------------------------===//
3232// AVX-512 - Aligned and unaligned load and store
3233//
3234
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003235
Simon Pilgrimdf052512017-12-06 17:59:26 +00003236multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3237 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3238 bit NoRMPattern = 0,
3239 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003240 let hasSideEffects = 0 in {
3241 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003242 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003243 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003244 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3245 (ins _.KRCWM:$mask, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003246 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
Simon Pilgrim18bcf932016-02-03 09:41:59 +00003247 "${dst} {${mask}} {z}, $src}"),
Craig Topper5c46c752017-01-08 05:46:21 +00003248 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
Igor Breger7a000f52016-01-21 14:18:11 +00003249 (_.VT _.RC:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003250 _.ImmAllZerosV)))], _.ExeDomain,
3251 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003252
Simon Pilgrimdf052512017-12-06 17:59:26 +00003253 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003254 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003255 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Toppercb0e7492017-07-31 17:35:44 +00003256 !if(NoRMPattern, [],
3257 [(set _.RC:$dst,
3258 (_.VT (bitconvert (ld_frag addr:$src))))]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003259 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003260
Craig Topper63e2cd62017-01-14 07:50:52 +00003261 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003262 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3263 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3264 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3265 "${dst} {${mask}}, $src1}"),
3266 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3267 (_.VT _.RC:$src1),
3268 (_.VT _.RC:$src0))))], _.ExeDomain,
3269 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003270 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3271 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003272 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3273 "${dst} {${mask}}, $src1}"),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003274 [(set _.RC:$dst, (_.VT
3275 (vselect _.KRCWM:$mask,
3276 (_.VT (bitconvert (ld_frag addr:$src1))),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003277 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3278 EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003279 }
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003280 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3281 (ins _.KRCWM:$mask, _.MemOp:$src),
3282 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3283 "${dst} {${mask}} {z}, $src}",
3284 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3285 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
Simon Pilgrimdf052512017-12-06 17:59:26 +00003286 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003287 }
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003288 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3289 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3290
3291 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3292 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3293
3294 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3295 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3296 _.KRCWM:$mask, addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003297}
3298
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003299multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3300 AVX512VLVectorVTInfo _,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003301 Predicate prd,
3302 bit NoRMPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003303 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003304 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003305 _.info512.AlignedLdFrag, masked_load_aligned512,
3306 NoRMPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003307
3308 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003309 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003310 _.info256.AlignedLdFrag, masked_load_aligned256,
3311 NoRMPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003312 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003313 _.info128.AlignedLdFrag, masked_load_aligned128,
3314 NoRMPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003315 }
3316}
3317
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003318multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3319 AVX512VLVectorVTInfo _,
3320 Predicate prd,
Craig Toppercb0e7492017-07-31 17:35:44 +00003321 bit NoRMPattern = 0,
Craig Topperc9293492016-02-26 06:50:29 +00003322 SDPatternOperator SelectOprr = vselect> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003323 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003324 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003325 masked_load_unaligned, NoRMPattern,
3326 SelectOprr>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003327
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003328 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003329 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003330 masked_load_unaligned, NoRMPattern,
3331 SelectOprr>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003332 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
Craig Toppercb0e7492017-07-31 17:35:44 +00003333 masked_load_unaligned, NoRMPattern,
3334 SelectOprr>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003335 }
3336}
3337
Simon Pilgrimdf052512017-12-06 17:59:26 +00003338multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3339 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3340 string Name, bit NoMRPattern = 0> {
Craig Topper99f6b622016-05-01 01:03:56 +00003341 let hasSideEffects = 0 in {
Igor Breger81b79de2015-11-19 07:43:43 +00003342 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3343 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003344 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3345 Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003346 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3347 (ins _.KRCWM:$mask, _.RC:$src),
3348 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3349 "${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003350 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3351 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
Igor Breger81b79de2015-11-19 07:43:43 +00003352 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003353 (ins _.KRCWM:$mask, _.RC:$src),
Igor Breger81b79de2015-11-19 07:43:43 +00003354 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003355 "${dst} {${mask}} {z}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003356 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3357 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
Craig Topper99f6b622016-05-01 01:03:56 +00003358 }
Igor Breger81b79de2015-11-19 07:43:43 +00003359
Craig Topper2462a712017-08-01 15:31:24 +00003360 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003361 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003362 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topper2462a712017-08-01 15:31:24 +00003363 !if(NoMRPattern, [],
3364 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003365 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003366 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003367 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3368 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00003369 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyd207f172015-03-03 15:03:35 +00003370
3371 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3372 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3373 _.KRCWM:$mask, _.RC:$src)>;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00003374}
3375
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003376
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003377multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003378 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper2462a712017-08-01 15:31:24 +00003379 string Name, bit NoMRPattern = 0> {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003380 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003381 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
Craig Topper2462a712017-08-01 15:31:24 +00003382 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003383
3384 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003385 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
Craig Topper2462a712017-08-01 15:31:24 +00003386 masked_store_unaligned, Name#Z256,
3387 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003388 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
Craig Topper2462a712017-08-01 15:31:24 +00003389 masked_store_unaligned, Name#Z128,
3390 NoMRPattern>, EVEX_V128;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003391 }
3392}
3393
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003394multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003395 AVX512VLVectorVTInfo _, Predicate prd,
Craig Topper83b0a982018-01-18 07:44:09 +00003396 string Name, bit NoMRPattern = 0> {
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003397 let Predicates = [prd] in
Simon Pilgrimdf052512017-12-06 17:59:26 +00003398 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003399 masked_store_aligned512, Name#Z,
3400 NoMRPattern>, EVEX_V512;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003401
3402 let Predicates = [prd, HasVLX] in {
Simon Pilgrimdf052512017-12-06 17:59:26 +00003403 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003404 masked_store_aligned256, Name#Z256,
3405 NoMRPattern>, EVEX_V256;
Simon Pilgrimdf052512017-12-06 17:59:26 +00003406 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
Craig Topper571231a2018-01-29 23:27:23 +00003407 masked_store_aligned128, Name#Z128,
3408 NoMRPattern>, EVEX_V128;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003409 }
3410}
3411
3412defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3413 HasAVX512>,
3414 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003415 HasAVX512, "VMOVAPS">,
3416 PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003417
3418defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3419 HasAVX512>,
3420 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003421 HasAVX512, "VMOVAPD">,
3422 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003423
Craig Topperc9293492016-02-26 06:50:29 +00003424defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003425 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003426 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3427 "VMOVUPS">,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003428 PS, EVEX_CD8<32, CD8VF>;
3429
Craig Topper4e7b8882016-10-03 02:00:29 +00003430defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003431 0, null_frag>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003432 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3433 "VMOVUPD">,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003434 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003435
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003436defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003437 HasAVX512, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003438 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003439 HasAVX512, "VMOVDQA32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003440 PD, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003441
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003442defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3443 HasAVX512>,
3444 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003445 HasAVX512, "VMOVDQA64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003446 PD, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003447
Craig Toppercb0e7492017-07-31 17:35:44 +00003448defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003449 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
Craig Topper2462a712017-08-01 15:31:24 +00003450 HasBWI, "VMOVDQU8", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003451 XD, EVEX_CD8<8, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003452
Craig Toppercb0e7492017-07-31 17:35:44 +00003453defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003454 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
Craig Topper2462a712017-08-01 15:31:24 +00003455 HasBWI, "VMOVDQU16", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003456 XD, VEX_W, EVEX_CD8<16, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003457
Craig Topperc9293492016-02-26 06:50:29 +00003458defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
Craig Topper21c8a8f2018-01-18 07:44:06 +00003459 1, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003460 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
Craig Topper83b0a982018-01-18 07:44:09 +00003461 HasAVX512, "VMOVDQU32", 1>,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003462 XS, EVEX_CD8<32, CD8VF>;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003463
Craig Topperc9293492016-02-26 06:50:29 +00003464defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
Craig Toppercb0e7492017-07-31 17:35:44 +00003465 0, null_frag>,
Elena Demikhovsky2689d782015-03-02 12:46:21 +00003466 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
Simon Pilgrim64fff142017-07-16 18:37:23 +00003467 HasAVX512, "VMOVDQU64">,
Ayman Musa0b4f97d2017-05-28 12:39:37 +00003468 XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00003469
Craig Topperd875d6b2016-09-29 06:07:09 +00003470// Special instructions to help with spilling when we don't have VLX. We need
3471// to load or store from a ZMM register instead. These are converted in
3472// expandPostRAPseudos.
Craig Toppereab23d32016-10-03 02:22:33 +00003473let isReMaterializable = 1, canFoldAsLoad = 1,
Craig Topperd875d6b2016-09-29 06:07:09 +00003474 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3475def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003476 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003477def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003478 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003479def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003480 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003481def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003482 "", [], IIC_SSE_MOVA_P_RM>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003483}
3484
Simon Pilgrimdf052512017-12-06 17:59:26 +00003485let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
Craig Topperf3e671e2016-09-30 05:35:47 +00003486def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003487 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003488def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003489 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003490def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003491 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperf3e671e2016-09-30 05:35:47 +00003492def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
Simon Pilgrimdf052512017-12-06 17:59:26 +00003493 "", [], IIC_SSE_MOVA_P_MR>;
Craig Topperd875d6b2016-09-29 06:07:09 +00003494}
3495
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003496def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003497 (v8i64 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003498 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003499 VK8), VR512:$src)>;
3500
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003501def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00003502 (v16i32 VR512:$src))),
Igor Breger7a000f52016-01-21 14:18:11 +00003503 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyf1de34b2014-12-04 09:40:44 +00003504
Craig Topper33c550c2016-05-22 00:39:30 +00003505// These patterns exist to prevent the above patterns from introducing a second
3506// mask inversion when one already exists.
3507def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3508 (bc_v8i64 (v16i32 immAllZerosV)),
3509 (v8i64 VR512:$src))),
3510 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3511def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3512 (v16i32 immAllZerosV),
3513 (v16i32 VR512:$src))),
3514 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3515
Craig Topperfc3ce492018-01-01 01:11:29 +00003516multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3517 X86VectorVTInfo Wide> {
3518 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3519 Narrow.RC:$src1, Narrow.RC:$src0)),
3520 (EXTRACT_SUBREG
3521 (Wide.VT
3522 (!cast<Instruction>(InstrStr#"rrk")
3523 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3524 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3525 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3526 Narrow.SubRegIdx)>;
3527
3528 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3529 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3530 (EXTRACT_SUBREG
3531 (Wide.VT
3532 (!cast<Instruction>(InstrStr#"rrkz")
3533 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3534 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3535 Narrow.SubRegIdx)>;
3536}
3537
Craig Topper96ab6fd2017-01-09 04:19:34 +00003538// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3539// available. Use a 512-bit operation and extract.
3540let Predicates = [HasAVX512, NoVLX] in {
Craig Topperd58c1652018-01-07 18:20:37 +00003541 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3542 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
Craig Topperfc3ce492018-01-01 01:11:29 +00003543 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3544 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
Craig Topperd58c1652018-01-07 18:20:37 +00003545
3546 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3547 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3548 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3549 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
Craig Topper96ab6fd2017-01-09 04:19:34 +00003550}
3551
Craig Toppere9fc0cd2018-01-14 02:05:51 +00003552let Predicates = [HasBWI, NoVLX] in {
3553 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3554 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3555
3556 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3557 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3558}
3559
Craig Topper2462a712017-08-01 15:31:24 +00003560let Predicates = [HasAVX512] in {
3561 // 512-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003562 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3563 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003564 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003565 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003566 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003567 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3568 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3569 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003570 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003571 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003572 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003573 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
Craig Topper2462a712017-08-01 15:31:24 +00003574}
3575
3576let Predicates = [HasVLX] in {
3577 // 128-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003578 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3579 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003580 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003581 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003582 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003583 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3584 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3585 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003586 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003587 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003588 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003589 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
Craig Topper14aa2662016-08-11 06:04:04 +00003590
Craig Topper2462a712017-08-01 15:31:24 +00003591 // 256-bit store.
Craig Topper571231a2018-01-29 23:27:23 +00003592 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3593 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003594 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003595 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
Craig Topperafa69ee2017-08-19 23:21:21 +00003596 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003597 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3598 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3599 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003600 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003601 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper5ef13ba2016-12-26 07:26:07 +00003602 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
Craig Topper83b0a982018-01-18 07:44:09 +00003603 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
Craig Topper95bdabd2016-05-22 23:44:33 +00003604}
3605
Craig Topper80075a52017-08-27 19:03:36 +00003606multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3607 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3608 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3609 (bitconvert
3610 (To.VT (extract_subvector
3611 (From.VT From.RC:$src), (iPTR 0)))),
3612 To.RC:$src0)),
3613 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3614 Cast.RC:$src0, Cast.KRCWM:$mask,
3615 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3616
3617 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3618 (bitconvert
3619 (To.VT (extract_subvector
3620 (From.VT From.RC:$src), (iPTR 0)))),
3621 Cast.ImmAllZerosV)),
3622 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3623 Cast.KRCWM:$mask,
3624 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3625}
3626
3627
Craig Topperd27386a2017-08-25 23:34:59 +00003628let Predicates = [HasVLX] in {
3629// A masked extract from the first 128-bits of a 256-bit vector can be
3630// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003631defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3632defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3633defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3634defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3635defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3636defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3637defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3638defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3639defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3640defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3641defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3642defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003643
3644// A masked extract from the first 128-bits of a 512-bit vector can be
3645// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003646defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3647defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3648defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3649defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3650defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3651defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3652defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3653defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3654defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3655defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3656defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3657defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003658
3659// A masked extract from the first 256-bits of a 512-bit vector can be
3660// implemented with masked move.
Craig Topper80075a52017-08-27 19:03:36 +00003661defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3662defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3663defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3664defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3665defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3666defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3667defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3668defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3669defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3670defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3671defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3672defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
Craig Topperd27386a2017-08-25 23:34:59 +00003673}
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003674
3675// Move Int Doubleword to Packed Double Int
3676//
3677let ExeDomain = SSEPackedInt in {
3678def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3679 "vmovd\t{$src, $dst|$dst, $src}",
3680 [(set VR128X:$dst,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003681 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003682 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003683def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003684 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003685 [(set VR128X:$dst,
3686 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003687 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003688def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003689 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003690 [(set VR128X:$dst,
3691 (v2i64 (scalar_to_vector GR64:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003692 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperc648c9b2015-12-28 06:11:42 +00003693let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3694def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3695 (ins i64mem:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003696 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3697 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00003698let isCodeGenOnly = 1 in {
Craig Topperaf88afb2015-12-28 06:11:45 +00003699def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003700 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003701 [(set FR64X:$dst, (bitconvert GR64:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003702 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper5971b542017-02-12 18:47:44 +00003703def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3704 "vmovq\t{$src, $dst|$dst, $src}",
3705 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003706 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003707def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003708 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003709 [(set GR64:$dst, (bitconvert FR64X:$src))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003710 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topperaf88afb2015-12-28 06:11:45 +00003711def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003712 "vmovq\t{$src, $dst|$dst, $src}",
Craig Topperaf88afb2015-12-28 06:11:45 +00003713 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003714 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3715 EVEX_CD8<64, CD8VT1>;
3716}
3717} // ExeDomain = SSEPackedInt
3718
3719// Move Int Doubleword to Single Scalar
3720//
3721let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3722def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3723 "vmovd\t{$src, $dst|$dst, $src}",
3724 [(set FR32X:$dst, (bitconvert GR32:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003725 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003726
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003727def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003728 "vmovd\t{$src, $dst|$dst, $src}",
3729 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003730 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003731} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3732
3733// Move doubleword from xmm register to r/m32
3734//
3735let ExeDomain = SSEPackedInt in {
3736def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3737 "vmovd\t{$src, $dst|$dst, $src}",
3738 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003739 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003740 EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003741def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003742 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003743 "vmovd\t{$src, $dst|$dst, $src}",
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003744 [(store (i32 (extractelt (v4i32 VR128X:$src),
3745 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003746 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003747} // ExeDomain = SSEPackedInt
3748
3749// Move quadword from xmm1 register to r/m64
3750//
3751let ExeDomain = SSEPackedInt in {
3752def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3753 "vmovq\t{$src, $dst|$dst, $src}",
3754 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003755 (iPTR 0)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003756 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003757 Requires<[HasAVX512, In64BitMode]>;
3758
Craig Topperc648c9b2015-12-28 06:11:42 +00003759let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3760def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3761 "vmovq\t{$src, $dst|$dst, $src}",
Simon Pilgrim75673942017-12-06 11:23:13 +00003762 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003763 Requires<[HasAVX512, In64BitMode]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003764
Craig Topperc648c9b2015-12-28 06:11:42 +00003765def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3766 (ins i64mem:$dst, VR128X:$src),
3767 "vmovq\t{$src, $dst|$dst, $src}",
3768 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3769 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topper401675c2015-12-28 06:32:47 +00003770 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
Craig Topperc648c9b2015-12-28 06:11:42 +00003771 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3772
3773let hasSideEffects = 0 in
3774def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003775 (ins VR128X:$src),
Simon Pilgrim75673942017-12-06 11:23:13 +00003776 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3777 EVEX, VEX_W, Sched<[WriteMove]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003778} // ExeDomain = SSEPackedInt
3779
3780// Move Scalar Single to Double Int
3781//
3782let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3783def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3784 (ins FR32X:$src),
3785 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003786 [(set GR32:$dst, (bitconvert FR32X:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003787 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00003788def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003789 (ins i32mem:$dst, FR32X:$src),
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003790 "vmovd\t{$src, $dst|$dst, $src}",
3791 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
Simon Pilgrim75673942017-12-06 11:23:13 +00003792 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003793} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3794
3795// Move Quadword Int to Packed Quadword Int
3796//
3797let ExeDomain = SSEPackedInt in {
3798def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3799 (ins i64mem:$src),
3800 "vmovq\t{$src, $dst|$dst, $src}",
3801 [(set VR128X:$dst,
3802 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003803 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003804} // ExeDomain = SSEPackedInt
3805
Craig Topper29476ab2018-01-05 21:57:23 +00003806// Allow "vmovd" but print "vmovq".
3807def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3808 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3809def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3810 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3811
Simon Pilgrimb2a80952017-01-08 16:45:39 +00003812//===----------------------------------------------------------------------===//
3813// AVX-512 MOVSS, MOVSD
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003814//===----------------------------------------------------------------------===//
3815
Craig Topperc7de3a12016-07-29 02:49:08 +00003816multiclass avx512_move_scalar<string asm, SDNode OpNode,
Asaf Badouh41ecf462015-12-06 13:26:56 +00003817 X86VectorVTInfo _> {
Craig Topperc7de3a12016-07-29 02:49:08 +00003818 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003819 (ins _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003820 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Craig Topper6fb55712017-10-04 17:20:12 +00003821 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003822 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003823 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003824 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003825 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3826 "$dst {${mask}} {z}, $src1, $src2}"),
3827 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003828 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003829 _.ImmAllZerosV)))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003830 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003831 let Constraints = "$src0 = $dst" in
3832 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00003833 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
Craig Topperc7de3a12016-07-29 02:49:08 +00003834 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3835 "$dst {${mask}}, $src1, $src2}"),
3836 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00003837 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Craig Topperc7de3a12016-07-29 02:49:08 +00003838 (_.VT _.RC:$src0))))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003839 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
Craig Toppere4f868e2016-07-29 06:06:04 +00003840 let canFoldAsLoad = 1, isReMaterializable = 1 in
Craig Topperc7de3a12016-07-29 02:49:08 +00003841 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3842 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3843 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
Simon Pilgrim75673942017-12-06 11:23:13 +00003844 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003845 let mayLoad = 1, hasSideEffects = 0 in {
3846 let Constraints = "$src0 = $dst" in
3847 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3848 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3849 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3850 "$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003851 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003852 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3853 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3854 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3855 "$dst {${mask}} {z}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003856 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
Asaf Badouh41ecf462015-12-06 13:26:56 +00003857 }
Craig Toppere1cac152016-06-07 07:27:54 +00003858 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3859 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3860 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
Simon Pilgrim75673942017-12-06 11:23:13 +00003861 EVEX, Sched<[WriteStore]>;
Craig Topperc7de3a12016-07-29 02:49:08 +00003862 let mayStore = 1, hasSideEffects = 0 in
Craig Toppere1cac152016-06-07 07:27:54 +00003863 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3864 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3865 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
Simon Pilgrim75673942017-12-06 11:23:13 +00003866 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003867}
3868
Asaf Badouh41ecf462015-12-06 13:26:56 +00003869defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3870 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003871
Asaf Badouh41ecf462015-12-06 13:26:56 +00003872defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3873 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003874
Ayman Musa46af8f92016-11-13 14:29:32 +00003875
3876multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3877 PatLeaf ZeroFP, X86VectorVTInfo _> {
3878
3879def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003880 (_.VT (scalar_to_vector
Craig Topper61d6ddb2018-02-23 20:13:42 +00003881 (_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003882 (_.EltVT _.FRC:$src1),
3883 (_.EltVT _.FRC:$src2))))))),
Craig Topper00230602017-10-01 23:53:50 +00003884 (!cast<Instruction>(InstrStr#rrk)
3885 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3886 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003887 (_.VT _.RC:$src0),
3888 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003889
3890def : Pat<(_.VT (OpNode _.RC:$src0,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003891 (_.VT (scalar_to_vector
Craig Topper61d6ddb2018-02-23 20:13:42 +00003892 (_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003893 (_.EltVT _.FRC:$src1),
3894 (_.EltVT ZeroFP))))))),
Craig Topper00230602017-10-01 23:53:50 +00003895 (!cast<Instruction>(InstrStr#rrkz)
3896 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00003897 (_.VT _.RC:$src0),
3898 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003899}
3900
3901multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3902 dag Mask, RegisterClass MaskRC> {
3903
3904def : Pat<(masked_store addr:$dst, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003905 (_.info512.VT (insert_subvector undef,
Ayman Musa46af8f92016-11-13 14:29:32 +00003906 (_.info256.VT (insert_subvector undef,
3907 (_.info128.VT _.info128.RC:$src),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003908 (iPTR 0))),
3909 (iPTR 0)))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003910 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003911 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003912 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003913
3914}
3915
Craig Topper058f2f62017-03-28 16:35:29 +00003916multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3917 AVX512VLVectorVTInfo _,
3918 dag Mask, RegisterClass MaskRC,
3919 SubRegIndex subreg> {
3920
3921def : Pat<(masked_store addr:$dst, Mask,
3922 (_.info512.VT (insert_subvector undef,
3923 (_.info256.VT (insert_subvector undef,
3924 (_.info128.VT _.info128.RC:$src),
3925 (iPTR 0))),
3926 (iPTR 0)))),
3927 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
Guy Blank548e22a2017-05-19 12:35:15 +00003928 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003929 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3930
3931}
3932
Ayman Musa46af8f92016-11-13 14:29:32 +00003933multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3934 dag Mask, RegisterClass MaskRC> {
3935
3936def : Pat<(_.info128.VT (extract_subvector
3937 (_.info512.VT (masked_load addr:$srcAddr, Mask,
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003938 (_.info512.VT (bitconvert
Ayman Musa46af8f92016-11-13 14:29:32 +00003939 (v16i32 immAllZerosV))))),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003940 (iPTR 0))),
Simon Pilgrim3f10e992016-11-20 14:05:23 +00003941 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003942 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003943 addr:$srcAddr)>;
3944
3945def : Pat<(_.info128.VT (extract_subvector
3946 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3947 (_.info512.VT (insert_subvector undef,
3948 (_.info256.VT (insert_subvector undef,
3949 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
Craig Topper7a5ee1c2017-03-14 06:40:04 +00003950 (iPTR 0))),
3951 (iPTR 0))))),
3952 (iPTR 0))),
Ayman Musa46af8f92016-11-13 14:29:32 +00003953 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003954 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
Ayman Musa46af8f92016-11-13 14:29:32 +00003955 addr:$srcAddr)>;
3956
3957}
3958
Craig Topper058f2f62017-03-28 16:35:29 +00003959multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3960 AVX512VLVectorVTInfo _,
3961 dag Mask, RegisterClass MaskRC,
3962 SubRegIndex subreg> {
3963
3964def : Pat<(_.info128.VT (extract_subvector
3965 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3966 (_.info512.VT (bitconvert
3967 (v16i32 immAllZerosV))))),
3968 (iPTR 0))),
3969 (!cast<Instruction>(InstrStr#rmkz)
Guy Blank548e22a2017-05-19 12:35:15 +00003970 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003971 addr:$srcAddr)>;
3972
3973def : Pat<(_.info128.VT (extract_subvector
3974 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3975 (_.info512.VT (insert_subvector undef,
3976 (_.info256.VT (insert_subvector undef,
3977 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3978 (iPTR 0))),
3979 (iPTR 0))))),
3980 (iPTR 0))),
3981 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
Guy Blank548e22a2017-05-19 12:35:15 +00003982 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
Craig Topper058f2f62017-03-28 16:35:29 +00003983 addr:$srcAddr)>;
3984
3985}
3986
Ayman Musa46af8f92016-11-13 14:29:32 +00003987defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3988defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3989
3990defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3991 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003992defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3993 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3994defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3995 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00003996
3997defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3998 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
Craig Topper058f2f62017-03-28 16:35:29 +00003999defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4000 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4001defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4002 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
Ayman Musa46af8f92016-11-13 14:29:32 +00004003
Craig Topper61d6ddb2018-02-23 20:13:42 +00004004def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
Guy Blankb169d56d2017-07-31 08:26:14 +00004005 (f32 FR32X:$src1), (f32 FR32X:$src2))),
4006 (COPY_TO_REGCLASS
4007 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
4008 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
4009 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00004010 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
4011 FR32X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00004012
Craig Topper74ed0872016-05-18 06:55:59 +00004013def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00004014 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00004015 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4016 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00004017
Craig Topper61d6ddb2018-02-23 20:13:42 +00004018def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
Guy Blankb169d56d2017-07-31 08:26:14 +00004019 (f64 FR64X:$src1), (f64 FR64X:$src2))),
4020 (COPY_TO_REGCLASS
4021 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
4022 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
4023 GR8:$mask, sub_8bit)), VK1WM),
Craig Topper6fb55712017-10-04 17:20:12 +00004024 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
4025 FR64X)>;
Guy Blankb169d56d2017-07-31 08:26:14 +00004026
Craig Topper74ed0872016-05-18 06:55:59 +00004027def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
Craig Topperc7de3a12016-07-29 02:49:08 +00004028 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
Craig Topper6fb55712017-10-04 17:20:12 +00004029 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4030 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004031
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00004032def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
Guy Blank548e22a2017-05-19 12:35:15 +00004033 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00004034 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
4035
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004036let hasSideEffects = 0 in {
Simon Pilgrim64fff142017-07-16 18:37:23 +00004037 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004038 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004039 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004040 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
4041 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
Igor Breger4424aaa2015-11-19 07:58:33 +00004042
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004043let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00004044 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4045 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00004046 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004047 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4048 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004049 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4050 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
Simon Pilgrim64fff142017-07-16 18:37:23 +00004051
4052 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004053 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004054 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4055 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004056 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4057 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004058
Simon Pilgrim64fff142017-07-16 18:37:23 +00004059 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
Craig Topper6fb55712017-10-04 17:20:12 +00004060 (ins VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004061 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004062 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
4063 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004064
4065let Constraints = "$src0 = $dst" in
Simon Pilgrim64fff142017-07-16 18:37:23 +00004066 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4067 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
Craig Topper6fb55712017-10-04 17:20:12 +00004068 VR128X:$src1, VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004069 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4070 "$dst {${mask}}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004071 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4072 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004073
Simon Pilgrim64fff142017-07-16 18:37:23 +00004074 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4075 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
Craig Topper6fb55712017-10-04 17:20:12 +00004076 VR128X:$src2),
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004077 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4078 "$dst {${mask}} {z}, $src1, $src2}",
Simon Pilgrimdf052512017-12-06 17:59:26 +00004079 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4080 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
Ayman Musa0b4f97d2017-05-28 12:39:37 +00004081}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004082
4083let Predicates = [HasAVX512] in {
4084 let AddedComplexity = 15 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004085 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00004086 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004087 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
Craig Topper6fb55712017-10-04 17:20:12 +00004088 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004089 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
Craig Topper6fb55712017-10-04 17:20:12 +00004090 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4091 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
Craig Topper3f8126e2016-08-13 05:43:20 +00004092 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004093
4094 // Move low f32 and clear high bits.
4095 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4096 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004097 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004098 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
4099 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4100 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004101 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004102 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004103 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4104 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004105 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004106 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4107 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4108 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004109 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004110 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004111
4112 let AddedComplexity = 20 in {
4113 // MOVSSrm zeros the high parts of the register; represent this
4114 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4115 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4116 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4117 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4118 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4119 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4120 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004121 def : Pat<(v4f32 (X86vzload addr:$src)),
4122 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004123
4124 // MOVSDrm zeros the high parts of the register; represent this
4125 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4126 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4127 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4128 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4129 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4130 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4131 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4132 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4133 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4134 def : Pat<(v2f64 (X86vzload addr:$src)),
4135 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4136
4137 // Represent the same patterns above but in the form they appear for
4138 // 256-bit types
4139 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4140 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004141 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004142 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4143 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4144 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004145 def : Pat<(v8f32 (X86vzload addr:$src)),
4146 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004147 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4148 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4149 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004150 def : Pat<(v4f64 (X86vzload addr:$src)),
4151 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004152
4153 // Represent the same patterns above but in the form they appear for
4154 // 512-bit types
4155 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4156 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4157 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4158 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4159 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4160 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004161 def : Pat<(v16f32 (X86vzload addr:$src)),
4162 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
Simon Pilgrim6788f332016-02-04 16:12:56 +00004163 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4164 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4165 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Simon Pilgrim7823fd22016-02-04 19:27:51 +00004166 def : Pat<(v8f64 (X86vzload addr:$src)),
4167 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004168 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004169 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4170 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00004171 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004172
4173 // Move low f64 and clear high bits.
4174 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4175 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004176 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004177 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004178 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4179 (SUBREG_TO_REG (i32 0),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004180 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004181 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004182
4183 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004184 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004185 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
Craig Topper600685d2016-08-13 05:33:12 +00004186 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
Craig Topper09b7e0f2017-01-14 07:29:24 +00004187 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
Craig Topper600685d2016-08-13 05:33:12 +00004188 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004189
4190 // Extract and store.
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +00004191 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004192 addr:$dst),
4193 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004194
4195 // Shuffle with VMOVSS
4196 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004197 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4198
4199 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4200 (VMOVSSZrr VR128X:$src1,
4201 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004202
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004203 // Shuffle with VMOVSD
4204 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004205 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4206
4207 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4208 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004209
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004210 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004211 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004212 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
Craig Topper6fb55712017-10-04 17:20:12 +00004213 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004214}
4215
Simon Pilgrim369e59d2018-02-12 16:18:36 +00004216let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004217let AddedComplexity = 15 in
4218def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4219 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00004220 "vmovq\t{$src, $dst|$dst, $src}",
Michael Liao5bf95782014-12-04 05:20:33 +00004221 [(set VR128X:$dst, (v2i64 (X86vzmovl
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004222 (v2i64 VR128X:$src))))],
4223 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
Simon Pilgrim369e59d2018-02-12 16:18:36 +00004224}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004225
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004226let Predicates = [HasAVX512] in {
Craig Topperde549852016-05-22 06:09:34 +00004227 let AddedComplexity = 15 in {
4228 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4229 (VMOVDI2PDIZrr GR32:$src)>;
4230
4231 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4232 (VMOV64toPQIZrr GR64:$src)>;
4233
4234 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4235 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4236 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004237
4238 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4239 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4240 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
Craig Topperde549852016-05-22 06:09:34 +00004241 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004242 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4243 let AddedComplexity = 20 in {
Simon Pilgrima4c350f2017-02-17 20:43:32 +00004244 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4245 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004246 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4247 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004248 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4249 (VMOVDI2PDIZrm addr:$src)>;
4250 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4251 (VMOVDI2PDIZrm addr:$src)>;
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004252 def : Pat<(v4i32 (X86vzload addr:$src)),
4253 (VMOVDI2PDIZrm addr:$src)>;
4254 def : Pat<(v8i32 (X86vzload addr:$src)),
4255 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004256 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004257 (VMOVQI2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004258 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004259 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00004260 def : Pat<(v2i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004261 (VMOVQI2PQIZrm addr:$src)>;
Craig Topperde549852016-05-22 06:09:34 +00004262 def : Pat<(v4i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004263 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004264 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00004265
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004266 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4267 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4268 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4269 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
Craig Topperf4442312016-08-07 21:52:59 +00004270 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4271 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4272 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4273
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004274 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
Simon Pilgrim6392b8d2016-08-24 10:46:40 +00004275 def : Pat<(v16i32 (X86vzload addr:$src)),
4276 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004277 def : Pat<(v8i64 (X86vzload addr:$src)),
Craig Topper3dcf45f2016-11-22 05:31:43 +00004278 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004279}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004280//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00004281// AVX-512 - Non-temporals
4282//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00004283let SchedRW = [WriteLoad] in {
4284 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4285 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004286 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
Robert Khasanoved882972014-08-13 10:46:00 +00004287 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004288
Craig Topper2f90c1f2016-06-07 07:27:57 +00004289 let Predicates = [HasVLX] in {
Robert Khasanoved882972014-08-13 10:46:00 +00004290 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004291 (ins i256mem:$src),
4292 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004293 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004294 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00004295
Robert Khasanoved882972014-08-13 10:46:00 +00004296 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
Craig Topper2f90c1f2016-06-07 07:27:57 +00004297 (ins i128mem:$src),
4298 "vmovntdqa\t{$src, $dst|$dst, $src}",
Simon Pilgrim5a22eaa2017-04-14 15:05:35 +00004299 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
Craig Topper2f90c1f2016-06-07 07:27:57 +00004300 EVEX_CD8<64, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004301 }
Adam Nemetefd07852014-06-18 16:51:10 +00004302}
4303
Igor Bregerd3341f52016-01-20 13:11:47 +00004304multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4305 PatFrag st_frag = alignednontemporalstore,
4306 InstrItinClass itin = IIC_SSE_MOVNT> {
Craig Toppere1cac152016-06-07 07:27:54 +00004307 let SchedRW = [WriteStore], AddedComplexity = 400 in
Igor Bregerd3341f52016-01-20 13:11:47 +00004308 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
Robert Khasanoved882972014-08-13 10:46:00 +00004309 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Igor Bregerd3341f52016-01-20 13:11:47 +00004310 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4311 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
Robert Khasanoved882972014-08-13 10:46:00 +00004312}
4313
Igor Bregerd3341f52016-01-20 13:11:47 +00004314multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4315 AVX512VLVectorVTInfo VTInfo> {
4316 let Predicates = [HasAVX512] in
4317 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
Robert Khasanoved882972014-08-13 10:46:00 +00004318
Igor Bregerd3341f52016-01-20 13:11:47 +00004319 let Predicates = [HasAVX512, HasVLX] in {
4320 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4321 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
Robert Khasanoved882972014-08-13 10:46:00 +00004322 }
4323}
4324
Igor Bregerd3341f52016-01-20 13:11:47 +00004325defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4326defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4327defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
Robert Khasanoved882972014-08-13 10:46:00 +00004328
Craig Topper707c89c2016-05-08 23:43:17 +00004329let Predicates = [HasAVX512], AddedComplexity = 400 in {
4330 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4331 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4332 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4333 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4334 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4335 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004336
4337 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4338 (VMOVNTDQAZrm addr:$src)>;
4339 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4340 (VMOVNTDQAZrm addr:$src)>;
4341 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4342 (VMOVNTDQAZrm addr:$src)>;
Craig Topper707c89c2016-05-08 23:43:17 +00004343}
4344
Craig Topperc41320d2016-05-08 23:08:45 +00004345let Predicates = [HasVLX], AddedComplexity = 400 in {
4346 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4347 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4348 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4349 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4350 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4351 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4352
Simon Pilgrim9a896232016-06-07 13:34:24 +00004353 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4354 (VMOVNTDQAZ256rm addr:$src)>;
4355 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4356 (VMOVNTDQAZ256rm addr:$src)>;
4357 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4358 (VMOVNTDQAZ256rm addr:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004359
Craig Topperc41320d2016-05-08 23:08:45 +00004360 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4361 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4362 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4363 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4364 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4365 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
Simon Pilgrim9a896232016-06-07 13:34:24 +00004366
4367 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4368 (VMOVNTDQAZ128rm addr:$src)>;
4369 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4370 (VMOVNTDQAZ128rm addr:$src)>;
4371 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4372 (VMOVNTDQAZ128rm addr:$src)>;
Craig Topperc41320d2016-05-08 23:08:45 +00004373}
4374
Adam Nemet7f62b232014-06-10 16:39:53 +00004375//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004376// AVX-512 - Integer arithmetic
4377//
4378multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanov44241442014-10-08 14:37:45 +00004379 X86VectorVTInfo _, OpndItins itins,
4380 bit IsCommutable = 0> {
Adam Nemet34801422014-10-08 23:25:39 +00004381 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Bregerf2460112015-07-26 14:41:44 +00004382 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Robert Khasanov44241442014-10-08 14:37:45 +00004383 "$src2, $src1", "$src1, $src2",
4384 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004385 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4386 Sched<[itins.Sched]>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004387
Craig Toppere1cac152016-06-07 07:27:54 +00004388 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4389 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4390 "$src2, $src1", "$src1, $src2",
4391 (_.VT (OpNode _.RC:$src1,
4392 (bitconvert (_.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004393 itins.rm>, AVX512BIBase, EVEX_4V,
4394 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004395}
4396
4397multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4398 X86VectorVTInfo _, OpndItins itins,
4399 bit IsCommutable = 0> :
4400 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
Craig Toppere1cac152016-06-07 07:27:54 +00004401 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4402 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4403 "${src2}"##_.BroadcastStr##", $src1",
4404 "$src1, ${src2}"##_.BroadcastStr,
4405 (_.VT (OpNode _.RC:$src1,
4406 (X86VBroadcast
4407 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004408 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4409 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004410}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004411
Robert Khasanovd5b14f72014-10-09 08:38:48 +00004412multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4413 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4414 Predicate prd, bit IsCommutable = 0> {
4415 let Predicates = [prd] in
4416 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4417 IsCommutable>, EVEX_V512;
4418
4419 let Predicates = [prd, HasVLX] in {
4420 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4421 IsCommutable>, EVEX_V256;
4422 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4423 IsCommutable>, EVEX_V128;
4424 }
4425}
4426
Robert Khasanov545d1b72014-10-14 14:36:19 +00004427multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4428 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4429 Predicate prd, bit IsCommutable = 0> {
4430 let Predicates = [prd] in
4431 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4432 IsCommutable>, EVEX_V512;
4433
4434 let Predicates = [prd, HasVLX] in {
4435 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4436 IsCommutable>, EVEX_V256;
4437 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4438 IsCommutable>, EVEX_V128;
4439 }
4440}
4441
4442multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4443 OpndItins itins, Predicate prd,
4444 bit IsCommutable = 0> {
4445 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4446 itins, prd, IsCommutable>,
4447 VEX_W, EVEX_CD8<64, CD8VF>;
4448}
4449
4450multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4451 OpndItins itins, Predicate prd,
4452 bit IsCommutable = 0> {
4453 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4454 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4455}
4456
4457multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4458 OpndItins itins, Predicate prd,
4459 bit IsCommutable = 0> {
4460 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004461 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4462 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004463}
4464
4465multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4466 OpndItins itins, Predicate prd,
4467 bit IsCommutable = 0> {
4468 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
Craig Toppera33846a2017-10-22 06:18:23 +00004469 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4470 VEX_WIG;
Robert Khasanov545d1b72014-10-14 14:36:19 +00004471}
4472
4473multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4474 SDNode OpNode, OpndItins itins, Predicate prd,
4475 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004476 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004477 IsCommutable>;
4478
Igor Bregerf2460112015-07-26 14:41:44 +00004479 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004480 IsCommutable>;
4481}
4482
4483multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4484 SDNode OpNode, OpndItins itins, Predicate prd,
4485 bit IsCommutable = 0> {
Igor Bregerf2460112015-07-26 14:41:44 +00004486 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004487 IsCommutable>;
4488
Igor Bregerf2460112015-07-26 14:41:44 +00004489 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004490 IsCommutable>;
4491}
4492
4493multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4494 bits<8> opc_d, bits<8> opc_q,
4495 string OpcodeStr, SDNode OpNode,
4496 OpndItins itins, bit IsCommutable = 0> {
4497 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4498 itins, HasAVX512, IsCommutable>,
4499 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4500 itins, HasBWI, IsCommutable>;
4501}
4502
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004503multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
Michael Liao66233b72015-08-06 09:06:20 +00004504 SDNode OpNode,X86VectorVTInfo _Src,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004505 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4506 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004507 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004508 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004509 "$src2, $src1","$src1, $src2",
4510 (_Dst.VT (OpNode
4511 (_Src.VT _Src.RC:$src1),
Elena Demikhovsky1eeece12015-04-02 10:51:40 +00004512 (_Src.VT _Src.RC:$src2))),
Michael Liao66233b72015-08-06 09:06:20 +00004513 itins.rr, IsCommutable>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004514 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004515 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4516 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4517 "$src2, $src1", "$src1, $src2",
4518 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4519 (bitconvert (_Src.LdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004520 itins.rm>, AVX512BIBase, EVEX_4V,
4521 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004522
4523 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
Coby Tayree99a66392016-11-20 17:19:55 +00004524 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
Craig Toppere1cac152016-06-07 07:27:54 +00004525 OpcodeStr,
4526 "${src2}"##_Brdct.BroadcastStr##", $src1",
Coby Tayree99a66392016-11-20 17:19:55 +00004527 "$src1, ${src2}"##_Brdct.BroadcastStr,
Craig Toppere1cac152016-06-07 07:27:54 +00004528 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4529 (_Brdct.VT (X86VBroadcast
4530 (_Brdct.ScalarLdFrag addr:$src2)))))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004531 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4532 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004533}
4534
Robert Khasanov545d1b72014-10-14 14:36:19 +00004535defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4536 SSE_INTALU_ITINS_P, 1>;
4537defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4538 SSE_INTALU_ITINS_P, 0>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004539defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4540 SSE_INTALU_ITINS_P, HasBWI, 1>;
4541defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4542 SSE_INTALU_ITINS_P, HasBWI, 0>;
4543defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
Michael Liao66233b72015-08-06 09:06:20 +00004544 SSE_INTALU_ITINS_P, HasBWI, 1>;
Elena Demikhovsky52266382015-05-04 12:35:55 +00004545defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
Michael Liao66233b72015-08-06 09:06:20 +00004546 SSE_INTALU_ITINS_P, HasBWI, 0>;
Igor Bregerf2460112015-07-26 14:41:44 +00004547defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004548 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004549defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004550 SSE_INTMUL_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004551defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
Craig Topper9b800c62017-12-26 05:43:04 +00004552 SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4553defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
Asaf Badouh73f26f82015-07-05 12:23:20 +00004554 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004555defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004556 HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004557defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
Michael Liao66233b72015-08-06 09:06:20 +00004558 HasBWI, 1>, T8PD;
Asaf Badouh81f03c32015-06-18 12:30:53 +00004559defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
Michael Liao66233b72015-08-06 09:06:20 +00004560 SSE_INTALU_ITINS_P, HasBWI, 1>;
4561
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004562multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004563 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4564 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4565 let Predicates = [prd] in
4566 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4567 _SrcVTInfo.info512, _DstVTInfo.info512,
4568 v8i64_info, IsCommutable>,
4569 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4570 let Predicates = [HasVLX, prd] in {
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004571 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004572 _SrcVTInfo.info256, _DstVTInfo.info256,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004573 v4i64x_info, IsCommutable>,
4574 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004575 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004576 _SrcVTInfo.info128, _DstVTInfo.info128,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004577 v2i64x_info, IsCommutable>,
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004578 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4579 }
Michael Liao66233b72015-08-06 09:06:20 +00004580}
Elena Demikhovsky50b88dd2015-04-21 10:27:40 +00004581
Craig Topper9b800c62017-12-26 05:43:04 +00004582defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004583 avx512vl_i32_info, avx512vl_i64_info,
4584 X86pmuldq, HasAVX512, 1>,T8PD;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00004585defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
Asaf Badouh5a3a0232016-02-01 15:48:21 +00004586 avx512vl_i32_info, avx512vl_i64_info,
4587 X86pmuludq, HasAVX512, 1>;
4588defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4589 avx512vl_i8_info, avx512vl_i8_info,
4590 X86multishift, HasVBMI, 0>, T8PD;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004591
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004592multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004593 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4594 OpndItins itins> {
Craig Toppere1cac152016-06-07 07:27:54 +00004595 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4596 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4597 OpcodeStr,
4598 "${src2}"##_Src.BroadcastStr##", $src1",
4599 "$src1, ${src2}"##_Src.BroadcastStr,
4600 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4601 (_Src.VT (X86VBroadcast
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004602 (_Src.ScalarLdFrag addr:$src2)))))),
4603 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4604 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004605}
4606
Michael Liao66233b72015-08-06 09:06:20 +00004607multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4608 SDNode OpNode,X86VectorVTInfo _Src,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004609 X86VectorVTInfo _Dst, OpndItins itins,
4610 bit IsCommutable = 0> {
Michael Liao66233b72015-08-06 09:06:20 +00004611 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004612 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
Michael Liao66233b72015-08-06 09:06:20 +00004613 "$src2, $src1","$src1, $src2",
4614 (_Dst.VT (OpNode
4615 (_Src.VT _Src.RC:$src1),
Craig Topper37e8c542016-08-14 17:57:22 +00004616 (_Src.VT _Src.RC:$src2))),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004617 itins.rr, IsCommutable>,
4618 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00004619 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4620 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4621 "$src2, $src1", "$src1, $src2",
4622 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004623 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4624 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4625 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004626}
4627
4628multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4629 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004630 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004631 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004632 v32i16_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004633 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004634 v32i16_info, SSE_PACK>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004635 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004636 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004637 v16i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004638 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004639 v16i16x_info, SSE_PACK>, EVEX_V256;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004640 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004641 v8i16x_info, SSE_PACK>,
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004642 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004643 v8i16x_info, SSE_PACK>, EVEX_V128;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004644 }
4645}
4646multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4647 SDNode OpNode> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004648 let Predicates = [HasBWI] in
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004649 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004650 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004651 let Predicates = [HasBWI, HasVLX] in {
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004652 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004653 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004654 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004655 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004656 }
4657}
Igor Bregerf7fd5472015-07-21 07:11:28 +00004658
4659multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4660 SDNode OpNode, AVX512VLVectorVTInfo _Src,
Craig Topper37e8c542016-08-14 17:57:22 +00004661 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
Craig Topper5acb5a12016-05-01 06:24:57 +00004662 let Predicates = [HasBWI] in
Igor Bregerf7fd5472015-07-21 07:11:28 +00004663 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004664 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
Craig Topper5acb5a12016-05-01 06:24:57 +00004665 let Predicates = [HasBWI, HasVLX] in {
Igor Bregerf7fd5472015-07-21 07:11:28 +00004666 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004667 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004668 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
Simon Pilgrim4ac95c92017-11-27 18:14:18 +00004669 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004670 }
4671}
4672
Craig Topperb6da6542016-05-01 17:38:32 +00004673defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4674defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4675defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4676defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
Igor Bregerf7fd5472015-07-21 07:11:28 +00004677
Craig Topper5acb5a12016-05-01 06:24:57 +00004678defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
Craig Toppera33846a2017-10-22 06:18:23 +00004679 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
Craig Topper5acb5a12016-05-01 06:24:57 +00004680defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
Craig Toppera33846a2017-10-22 06:18:23 +00004681 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
Elena Demikhovsky2557a222015-05-04 09:14:02 +00004682
Igor Bregerf2460112015-07-26 14:41:44 +00004683defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004684 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004685defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004686 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004687defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004688 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004689
Igor Bregerf2460112015-07-26 14:41:44 +00004690defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004691 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004692defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004693 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004694defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004695 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004696
Igor Bregerf2460112015-07-26 14:41:44 +00004697defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004698 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Igor Bregerf2460112015-07-26 14:41:44 +00004699defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004700 SSE_INTALU_ITINS_P, HasBWI, 1>;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004701defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004702 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00004703
Igor Bregerf2460112015-07-26 14:41:44 +00004704defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004705 SSE_INTALU_ITINS_P, HasBWI, 1>;
Igor Bregerf2460112015-07-26 14:41:44 +00004706defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004707 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
Simon Pilgrim8b756592015-07-06 20:30:47 +00004708defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
Robert Khasanov545d1b72014-10-14 14:36:19 +00004709 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
Craig Topperabe80cc2016-08-28 06:06:28 +00004710
Simon Pilgrim47c1ff72016-10-27 17:07:40 +00004711// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4712let Predicates = [HasDQI, NoVLX] in {
4713 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4714 (EXTRACT_SUBREG
4715 (VPMULLQZrr
4716 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4717 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4718 sub_ymm)>;
4719
4720 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4721 (EXTRACT_SUBREG
4722 (VPMULLQZrr
4723 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4724 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4725 sub_xmm)>;
4726}
4727
Craig Topper4520d4f2017-12-04 07:21:01 +00004728// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4729let Predicates = [HasDQI, NoVLX] in {
4730 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4731 (EXTRACT_SUBREG
4732 (VPMULLQZrr
4733 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4734 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4735 sub_ymm)>;
4736
4737 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4738 (EXTRACT_SUBREG
4739 (VPMULLQZrr
4740 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4741 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4742 sub_xmm)>;
4743}
4744
4745multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4746 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4747 (EXTRACT_SUBREG
4748 (Instr
4749 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4750 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4751 sub_ymm)>;
4752
4753 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4754 (EXTRACT_SUBREG
4755 (Instr
4756 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4757 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4758 sub_xmm)>;
4759}
4760
Craig Topper694c73a2018-01-01 01:11:32 +00004761let Predicates = [HasAVX512, NoVLX] in {
Craig Topper4520d4f2017-12-04 07:21:01 +00004762 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4763 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4764 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4765 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4766}
4767
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004768//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004769// AVX-512 Logical Instructions
4770//===----------------------------------------------------------------------===//
4771
Craig Topperafce0ba2017-08-30 16:38:33 +00004772// OpNodeMsk is the OpNode to use when element size is important. OpNode will
4773// be set to null_frag for 32-bit elements.
4774multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4775 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004776 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004777 bit IsCommutable = 0> {
4778 let hasSideEffects = 0 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004779 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4780 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4781 "$src2, $src1", "$src1, $src2",
4782 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4783 (bitconvert (_.VT _.RC:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004784 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4785 _.RC:$src2)))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004786 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4787 Sched<[itins.Sched]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004788
Craig Topperafce0ba2017-08-30 16:38:33 +00004789 let hasSideEffects = 0, mayLoad = 1 in
Craig Topperabe80cc2016-08-28 06:06:28 +00004790 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4791 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4792 "$src2, $src1", "$src1, $src2",
4793 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4794 (bitconvert (_.LdFrag addr:$src2)))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004795 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004796 (bitconvert (_.LdFrag addr:$src2)))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004797 itins.rm>, AVX512BIBase, EVEX_4V,
4798 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004799}
4800
Craig Topperafce0ba2017-08-30 16:38:33 +00004801// OpNodeMsk is the OpNode to use where element size is important. So use
4802// for all of the broadcast patterns.
4803multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4804 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004805 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
Craig Topperafce0ba2017-08-30 16:38:33 +00004806 bit IsCommutable = 0> :
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004807 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4808 IsCommutable> {
Craig Topperabe80cc2016-08-28 06:06:28 +00004809 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4810 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4811 "${src2}"##_.BroadcastStr##", $src1",
4812 "$src1, ${src2}"##_.BroadcastStr,
Craig Topperafce0ba2017-08-30 16:38:33 +00004813 (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004814 (bitconvert
4815 (_.VT (X86VBroadcast
4816 (_.ScalarLdFrag addr:$src2)))))),
Craig Topperafce0ba2017-08-30 16:38:33 +00004817 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
Craig Topperabe80cc2016-08-28 06:06:28 +00004818 (bitconvert
4819 (_.VT (X86VBroadcast
4820 (_.ScalarLdFrag addr:$src2)))))))),
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004821 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4822 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004823}
4824
Craig Topperafce0ba2017-08-30 16:38:33 +00004825multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4826 SDPatternOperator OpNode,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004827 SDNode OpNodeMsk, OpndItins itins,
4828 AVX512VLVectorVTInfo VTInfo,
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004829 bit IsCommutable = 0> {
4830 let Predicates = [HasAVX512] in
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004831 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4832 VTInfo.info512, IsCommutable>, EVEX_V512;
Craig Topperabe80cc2016-08-28 06:06:28 +00004833
Craig Topperb0cbd5b2017-01-24 06:25:34 +00004834 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004835 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004836 VTInfo.info256, IsCommutable>, EVEX_V256;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004837 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004838 VTInfo.info128, IsCommutable>, EVEX_V128;
Craig Topperabe80cc2016-08-28 06:06:28 +00004839 }
4840}
4841
Craig Topperabe80cc2016-08-28 06:06:28 +00004842multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004843 SDNode OpNode, OpndItins itins,
4844 bit IsCommutable = 0> {
4845 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004846 avx512vl_i64_info, IsCommutable>,
4847 VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004848 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
Craig Topperafce0ba2017-08-30 16:38:33 +00004849 avx512vl_i32_info, IsCommutable>,
4850 EVEX_CD8<32, CD8VF>;
Craig Topperabe80cc2016-08-28 06:06:28 +00004851}
4852
Simon Pilgrimb9b46392017-12-05 14:04:23 +00004853defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4854defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4855defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4856defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004857
4858//===----------------------------------------------------------------------===//
4859// AVX-512 FP arithmetic
4860//===----------------------------------------------------------------------===//
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004861multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4862 SDNode OpNode, SDNode VecNode, OpndItins itins,
4863 bit IsCommutable> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004864 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004865 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4866 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4867 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004868 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4869 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004870 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004871
4872 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00004873 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004874 "$src2, $src1", "$src1, $src2",
Craig Topperd9fe6642017-02-21 04:26:10 +00004875 (_.VT (VecNode _.RC:$src1,
4876 _.ScalarIntMemCPat:$src2,
4877 (i32 FROUND_CURRENT))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004878 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper79011a62016-07-26 08:06:18 +00004879 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004880 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004881 (ins _.FRC:$src1, _.FRC:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004882 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4883 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004884 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004885 let isCommutable = IsCommutable;
4886 }
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004887 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00004888 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004889 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4890 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004891 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4892 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004893 }
Craig Topper5ec33a92016-07-22 05:00:42 +00004894 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004895}
4896
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004897multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00004898 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
Craig Topper5ec33a92016-07-22 05:00:42 +00004899 let ExeDomain = _.ExeDomain in
Craig Topperda7e78e2017-12-10 04:07:28 +00004900 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004901 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4902 "$rc, $src2, $src1", "$src1, $src2, $rc",
4903 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004904 (i32 imm:$rc)), itins.rr, IsCommutable>,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004905 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004906}
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004907multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Craig Topper56d40222017-02-22 06:54:18 +00004908 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4909 OpndItins itins, bit IsCommutable> {
4910 let ExeDomain = _.ExeDomain in {
4911 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4912 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4913 "$src2, $src1", "$src1, $src2",
4914 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004915 itins.rr>, Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004916
4917 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4918 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4919 "$src2, $src1", "$src1, $src2",
4920 (_.VT (VecNode _.RC:$src1,
4921 _.ScalarIntMemCPat:$src2)),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004922 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004923
4924 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4925 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4926 (ins _.FRC:$src1, _.FRC:$src2),
4927 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4928 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004929 itins.rr>, Sched<[itins.Sched]> {
Craig Topper56d40222017-02-22 06:54:18 +00004930 let isCommutable = IsCommutable;
4931 }
4932 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4933 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4934 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4935 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00004936 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4937 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper56d40222017-02-22 06:54:18 +00004938 }
4939
Craig Topperda7e78e2017-12-10 04:07:28 +00004940 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004941 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00004942 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Topper56d40222017-02-22 06:54:18 +00004943 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim4a9b1e12017-12-05 16:10:57 +00004944 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4945 Sched<[itins.Sched]>;
Craig Topper56d40222017-02-22 06:54:18 +00004946 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004947}
4948
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004949multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4950 SDNode VecNode,
4951 SizeItins itins, bit IsCommutable> {
4952 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4953 itins.s, IsCommutable>,
4954 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4955 itins.s, IsCommutable>,
4956 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4957 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4958 itins.d, IsCommutable>,
4959 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4960 itins.d, IsCommutable>,
4961 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4962}
4963
4964multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper56d40222017-02-22 06:54:18 +00004965 SDNode VecNode, SDNode SaeNode,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004966 SizeItins itins, bit IsCommutable> {
Craig Topper56d40222017-02-22 06:54:18 +00004967 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4968 VecNode, SaeNode, itins.s, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004969 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper56d40222017-02-22 06:54:18 +00004970 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4971 VecNode, SaeNode, itins.d, IsCommutable>,
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00004972 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4973}
Craig Topper8783bbb2017-02-24 07:21:10 +00004974defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4975defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4976defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4977defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4978defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004979 SSE_ALU_ITINS_S, 0>;
Craig Topper8783bbb2017-02-24 07:21:10 +00004980defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
Craig Topper56d40222017-02-22 06:54:18 +00004981 SSE_ALU_ITINS_S, 0>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004982
4983// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4984// X86fminc and X86fmaxc instead of X86fmin and X86fmax
4985multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4986 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
Craig Topper03669332017-02-26 06:45:56 +00004987 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004988 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4989 (ins _.FRC:$src1, _.FRC:$src2),
4990 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4991 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004992 itins.rr>, Sched<[itins.Sched]> {
Craig Topper79011a62016-07-26 08:06:18 +00004993 let isCommutable = 1;
4994 }
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00004995 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4996 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4997 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4998 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00004999 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
5000 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyd84f3372016-07-11 06:08:06 +00005001 }
5002}
5003defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5004 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
5005 EVEX_CD8<32, CD8VT1>;
5006
5007defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5008 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
5009 EVEX_CD8<64, CD8VT1>;
5010
5011defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5012 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
5013 EVEX_CD8<32, CD8VT1>;
5014
5015defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5016 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
5017 EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky02ffd262015-03-01 07:44:04 +00005018
Craig Topper375aa902016-12-19 00:42:28 +00005019multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00005020 X86VectorVTInfo _, OpndItins itins,
5021 bit IsCommutable> {
Craig Topper375aa902016-12-19 00:42:28 +00005022 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005023 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5024 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5025 "$src2, $src1", "$src1, $src2",
Craig Topper9433f972016-08-02 06:16:53 +00005026 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005027 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
Craig Topper375aa902016-12-19 00:42:28 +00005028 let mayLoad = 1 in {
5029 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5030 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5031 "$src2, $src1", "$src1, $src2",
5032 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005033 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00005034 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5035 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5036 "${src2}"##_.BroadcastStr##", $src1",
5037 "$src1, ${src2}"##_.BroadcastStr,
5038 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5039 (_.ScalarLdFrag addr:$src2)))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005040 itins.rm>, EVEX_4V, EVEX_B,
5041 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper375aa902016-12-19 00:42:28 +00005042 }
Craig Topper5ec33a92016-07-22 05:00:42 +00005043 }
Robert Khasanov595e5982014-10-29 15:43:02 +00005044}
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005045
Craig Topper375aa902016-12-19 00:42:28 +00005046multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005047 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00005048 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00005049 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005050 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5051 "$rc, $src2, $src1", "$src1, $src2, $rc",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005052 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
5053 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005054}
5055
Craig Topper375aa902016-12-19 00:42:28 +00005056multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005057 OpndItins itins, X86VectorVTInfo _> {
Craig Topper5ec33a92016-07-22 05:00:42 +00005058 let ExeDomain = _.ExeDomain in
Craig Topperc89e2822017-12-10 09:14:38 +00005059 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005060 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5061 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005062 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
5063 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005064}
5065
Craig Topper375aa902016-12-19 00:42:28 +00005066multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Craig Topper9433f972016-08-02 06:16:53 +00005067 Predicate prd, SizeItins itins,
5068 bit IsCommutable = 0> {
Craig Topperdb290662016-05-01 05:57:06 +00005069 let Predicates = [prd] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005070 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
Craig Topper9433f972016-08-02 06:16:53 +00005071 itins.s, IsCommutable>, EVEX_V512, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005072 EVEX_CD8<32, CD8VF>;
5073 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
Craig Topper9433f972016-08-02 06:16:53 +00005074 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005075 EVEX_CD8<64, CD8VF>;
Craig Topperdb290662016-05-01 05:57:06 +00005076 }
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005077
Robert Khasanov595e5982014-10-29 15:43:02 +00005078 // Define only if AVX512VL feature is present.
Craig Topperdb290662016-05-01 05:57:06 +00005079 let Predicates = [prd, HasVLX] in {
Robert Khasanov595e5982014-10-29 15:43:02 +00005080 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005081 itins.s, IsCommutable>, EVEX_V128, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005082 EVEX_CD8<32, CD8VF>;
5083 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005084 itins.s, IsCommutable>, EVEX_V256, PS,
Robert Khasanov595e5982014-10-29 15:43:02 +00005085 EVEX_CD8<32, CD8VF>;
5086 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005087 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005088 EVEX_CD8<64, CD8VF>;
5089 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
Craig Topper9433f972016-08-02 06:16:53 +00005090 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
Robert Khasanov595e5982014-10-29 15:43:02 +00005091 EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00005092 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005093}
5094
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005095multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5096 SizeItins itins> {
5097 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005098 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005099 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00005100 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5101}
5102
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005103multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5104 SizeItins itins> {
5105 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005106 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005107 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00005108 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5109}
5110
Craig Topper9433f972016-08-02 06:16:53 +00005111defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5112 SSE_ALU_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005113 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005114defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5115 SSE_MUL_ITINS_P, 1>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005116 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005117defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005118 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005119defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005120 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005121defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5122 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005123 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
Craig Topper9433f972016-08-02 06:16:53 +00005124defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5125 SSE_ALU_ITINS_P, 0>,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005126 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
Igor Breger58c07802016-05-03 11:51:45 +00005127let isCodeGenOnly = 1 in {
Craig Topper9433f972016-08-02 06:16:53 +00005128 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5129 SSE_ALU_ITINS_P, 1>;
5130 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5131 SSE_ALU_ITINS_P, 1>;
Igor Breger58c07802016-05-03 11:51:45 +00005132}
Craig Topper375aa902016-12-19 00:42:28 +00005133defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005134 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005135defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005136 SSE_ALU_ITINS_P, 0>;
Craig Topper375aa902016-12-19 00:42:28 +00005137defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005138 SSE_ALU_ITINS_P, 1>;
Craig Topper375aa902016-12-19 00:42:28 +00005139defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
Craig Topper9433f972016-08-02 06:16:53 +00005140 SSE_ALU_ITINS_P, 1>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00005141
Craig Topper8f6827c2016-08-31 05:37:52 +00005142// Patterns catch floating point selects with bitcasted integer logic ops.
Craig Topper45d65032016-09-02 05:29:13 +00005143multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5144 X86VectorVTInfo _, Predicate prd> {
5145let Predicates = [prd] in {
5146 // Masked register-register logical operations.
5147 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5148 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5149 _.RC:$src0)),
5150 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5151 _.RC:$src1, _.RC:$src2)>;
5152 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5153 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5154 _.ImmAllZerosV)),
5155 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5156 _.RC:$src2)>;
5157 // Masked register-memory logical operations.
5158 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5159 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5160 (load addr:$src2)))),
5161 _.RC:$src0)),
5162 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5163 _.RC:$src1, addr:$src2)>;
5164 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5165 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5166 _.ImmAllZerosV)),
5167 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5168 addr:$src2)>;
5169 // Register-broadcast logical operations.
5170 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5171 (bitconvert (_.VT (X86VBroadcast
5172 (_.ScalarLdFrag addr:$src2)))))),
5173 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5174 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5175 (bitconvert
5176 (_.i64VT (OpNode _.RC:$src1,
5177 (bitconvert (_.VT
5178 (X86VBroadcast
5179 (_.ScalarLdFrag addr:$src2))))))),
5180 _.RC:$src0)),
5181 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5182 _.RC:$src1, addr:$src2)>;
5183 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5184 (bitconvert
5185 (_.i64VT (OpNode _.RC:$src1,
5186 (bitconvert (_.VT
5187 (X86VBroadcast
5188 (_.ScalarLdFrag addr:$src2))))))),
5189 _.ImmAllZerosV)),
5190 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5191 _.RC:$src1, addr:$src2)>;
5192}
Craig Topper8f6827c2016-08-31 05:37:52 +00005193}
5194
Craig Topper45d65032016-09-02 05:29:13 +00005195multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5196 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5197 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5198 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5199 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5200 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5201 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
Craig Topper8f6827c2016-08-31 05:37:52 +00005202}
5203
Craig Topper45d65032016-09-02 05:29:13 +00005204defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5205defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5206defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5207defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5208
Craig Topper2baef8f2016-12-18 04:17:00 +00005209let Predicates = [HasVLX,HasDQI] in {
Craig Topperd3295c62016-12-17 19:26:00 +00005210 // Use packed logical operations for scalar ops.
5211 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5212 (COPY_TO_REGCLASS (VANDPDZ128rr
5213 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5214 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5215 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5216 (COPY_TO_REGCLASS (VORPDZ128rr
5217 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5218 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5219 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5220 (COPY_TO_REGCLASS (VXORPDZ128rr
5221 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5222 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5223 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5224 (COPY_TO_REGCLASS (VANDNPDZ128rr
5225 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5226 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5227
5228 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5229 (COPY_TO_REGCLASS (VANDPSZ128rr
5230 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5231 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5232 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5233 (COPY_TO_REGCLASS (VORPSZ128rr
5234 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5235 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5236 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5237 (COPY_TO_REGCLASS (VXORPSZ128rr
5238 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5239 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5240 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5241 (COPY_TO_REGCLASS (VANDNPSZ128rr
5242 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5243 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5244}
5245
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005246multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005247 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005248 let ExeDomain = _.ExeDomain in {
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005249 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5250 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5251 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005252 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5253 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005254 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5255 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5256 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005257 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5258 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005259 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5260 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5261 "${src2}"##_.BroadcastStr##", $src1",
5262 "$src1, ${src2}"##_.BroadcastStr,
5263 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005264 (_.ScalarLdFrag addr:$src2))),
5265 (i32 FROUND_CURRENT)), itins.rm>,
5266 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005267 }
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005268}
5269
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005270multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005271 OpndItins itins, X86VectorVTInfo _> {
Craig Topperaa8e9032017-02-26 06:45:40 +00005272 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005273 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5274 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5275 "$src2, $src1", "$src1, $src2",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005276 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5277 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00005278 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00005279 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
Craig Toppere1cac152016-06-07 07:27:54 +00005280 "$src2, $src1", "$src1, $src2",
Craig Topper75d71542017-11-13 08:07:33 +00005281 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005282 (i32 FROUND_CURRENT)), itins.rm>,
5283 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperaa8e9032017-02-26 06:45:40 +00005284 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005285}
5286
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005287multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005288 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5289 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005290 EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005291 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5292 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005293 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005294 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005295 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005296 EVEX_4V,EVEX_CD8<32, CD8VT1>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005297 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005298 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00005299 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5300
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005301 // Define only if AVX512VL feature is present.
5302 let Predicates = [HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005303 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005304 EVEX_V128, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005305 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005306 EVEX_V256, EVEX_CD8<32, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005307 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005308 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00005309 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005310 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5311 }
5312}
Michael Zuckerman11b55b22016-05-21 11:09:53 +00005313defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
Asaf Badouh7ec4b7a2015-06-28 14:30:39 +00005314
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005315//===----------------------------------------------------------------------===//
5316// AVX-512 VPTESTM instructions
5317//===----------------------------------------------------------------------===//
5318
Craig Topper15d69732018-01-28 00:56:30 +00005319multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5320 OpndItins itins, X86VectorVTInfo _, string Suffix> {
Craig Topper1a093932017-11-11 06:19:12 +00005321 let ExeDomain = _.ExeDomain in {
Igor Breger639fde72016-03-03 14:18:38 +00005322 let isCommutable = 1 in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005323 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5324 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5325 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005326 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5327 _.ImmAllZerosV), itins.rr>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005328 EVEX_4V, Sched<[itins.Sched]>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005329 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5330 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5331 "$src2, $src1", "$src1, $src2",
Craig Topper15d69732018-01-28 00:56:30 +00005332 (OpNode (bitconvert
5333 (_.i64VT (and _.RC:$src1,
5334 (bitconvert (_.LdFrag addr:$src2))))),
5335 _.ImmAllZerosV),
5336 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005337 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper1a093932017-11-11 06:19:12 +00005338 }
Craig Topper15d69732018-01-28 00:56:30 +00005339
5340 // Patterns for compare with 0 that just use the same source twice.
5341 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5342 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr")
5343 _.RC:$src, _.RC:$src))>;
5344
5345 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5346 (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk")
5347 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005348}
5349
Craig Topper15d69732018-01-28 00:56:30 +00005350multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005351 OpndItins itins, X86VectorVTInfo _> {
Craig Topper1a093932017-11-11 06:19:12 +00005352 let ExeDomain = _.ExeDomain in
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005353 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5354 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5355 "${src2}"##_.BroadcastStr##", $src1",
5356 "$src1, ${src2}"##_.BroadcastStr,
Craig Topper15d69732018-01-28 00:56:30 +00005357 (OpNode (and _.RC:$src1,
5358 (X86VBroadcast
5359 (_.ScalarLdFrag addr:$src2))),
5360 _.ImmAllZerosV),
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005361 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5362 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005363}
Igor Bregerfca0a342016-01-28 13:19:25 +00005364
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005365// Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topper15d69732018-01-28 00:56:30 +00005366multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00005367 X86VectorVTInfo _, string Suffix> {
Craig Topper15d69732018-01-28 00:56:30 +00005368 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5369 _.ImmAllZerosV)),
Craig Topper5e4b4532018-01-27 23:49:14 +00005370 (_.KVT (COPY_TO_REGCLASS
5371 (!cast<Instruction>(NAME # Suffix # "Zrr")
5372 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5373 _.RC:$src1, _.SubRegIdx),
5374 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5375 _.RC:$src2, _.SubRegIdx)),
5376 _.KRC))>;
5377
5378 def : Pat<(_.KVT (and _.KRC:$mask,
Craig Topper15d69732018-01-28 00:56:30 +00005379 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5380 _.ImmAllZerosV))),
Craig Topper5e4b4532018-01-27 23:49:14 +00005381 (COPY_TO_REGCLASS
5382 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5383 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5384 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5385 _.RC:$src1, _.SubRegIdx),
5386 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5387 _.RC:$src2, _.SubRegIdx)),
5388 _.KRC)>;
Craig Topper15d69732018-01-28 00:56:30 +00005389
5390 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5391 (_.KVT (COPY_TO_REGCLASS
5392 (!cast<Instruction>(NAME # Suffix # "Zrr")
5393 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5394 _.RC:$src, _.SubRegIdx),
5395 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5396 _.RC:$src, _.SubRegIdx)),
5397 _.KRC))>;
5398
5399 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5400 (COPY_TO_REGCLASS
5401 (!cast<Instruction>(NAME # Suffix # "Zrrk")
5402 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5403 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5404 _.RC:$src, _.SubRegIdx),
5405 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5406 _.RC:$src, _.SubRegIdx)),
5407 _.KRC)>;
Igor Bregerfca0a342016-01-28 13:19:25 +00005408}
5409
Craig Topper15d69732018-01-28 00:56:30 +00005410multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005411 OpndItins itins, AVX512VLVectorVTInfo _,
5412 string Suffix> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005413 let Predicates = [HasAVX512] in
Craig Topper15d69732018-01-28 00:56:30 +00005414 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005415 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005416
5417 let Predicates = [HasAVX512, HasVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005418 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005419 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005420 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005421 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005422 }
Igor Bregerfca0a342016-01-28 13:19:25 +00005423 let Predicates = [HasAVX512, NoVLX] in {
5424 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5425 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005426 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005427}
5428
Craig Topper15d69732018-01-28 00:56:30 +00005429multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005430 OpndItins itins> {
5431 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005432 avx512vl_i32_info, "D">;
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005433 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
Igor Bregerfca0a342016-01-28 13:19:25 +00005434 avx512vl_i64_info, "Q">, VEX_W;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005435}
5436
5437multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005438 PatFrag OpNode, OpndItins itins> {
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005439 let Predicates = [HasBWI] in {
Craig Topper15d69732018-01-28 00:56:30 +00005440 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005441 EVEX_V512, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005442 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005443 EVEX_V512;
5444 }
5445 let Predicates = [HasVLX, HasBWI] in {
5446
Craig Topper15d69732018-01-28 00:56:30 +00005447 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005448 EVEX_V256, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005449 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005450 EVEX_V128, VEX_W;
Craig Topper15d69732018-01-28 00:56:30 +00005451 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005452 EVEX_V256;
Craig Topper15d69732018-01-28 00:56:30 +00005453 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">,
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005454 EVEX_V128;
5455 }
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005456
Igor Bregerfca0a342016-01-28 13:19:25 +00005457 let Predicates = [HasAVX512, NoVLX] in {
Craig Topper15d69732018-01-28 00:56:30 +00005458 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">;
5459 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">;
5460 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">;
5461 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">;
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005462 }
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005463}
5464
Craig Topper9471a7c2018-02-19 19:23:31 +00005465// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5466// as commutable here because we already canonicalized all zeros vectors to the
5467// RHS during lowering.
5468def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5469 (X86cmpm node:$src1, node:$src2, (i8 0))>;
5470def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5471 (X86cmpm node:$src1, node:$src2, (i8 4))>;
5472
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005473multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
Craig Topper15d69732018-01-28 00:56:30 +00005474 PatFrag OpNode, OpndItins itins> :
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005475 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5476 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
Elena Demikhovsky431b81e2015-04-21 13:13:46 +00005477
Craig Topper15d69732018-01-28 00:56:30 +00005478defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005479 SSE_BIT_ITINS_P>, T8PD;
Craig Topper15d69732018-01-28 00:56:30 +00005480defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
Simon Pilgrimbb791b32017-11-30 13:18:06 +00005481 SSE_BIT_ITINS_P>, T8XS;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00005482
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005483
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005484//===----------------------------------------------------------------------===//
5485// AVX-512 Shift instructions
5486//===----------------------------------------------------------------------===//
5487multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005488 string OpcodeStr, SDNode OpNode, OpndItins itins,
5489 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005490 let ExeDomain = _.ExeDomain in {
Cameron McInally04400442014-11-14 15:43:00 +00005491 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005492 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005493 "$src2, $src1", "$src1, $src2",
5494 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005495 itins.rr>, Sched<[itins.Sched]>;
Cameron McInally04400442014-11-14 15:43:00 +00005496 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
Craig Topper7ff6ab32015-01-21 08:43:49 +00005497 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
Cameron McInally04400442014-11-14 15:43:00 +00005498 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005499 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5500 (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005501 itins.rm>, Sched<[itins.Sched.Folded]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005502 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005503}
5504
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005505multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005506 string OpcodeStr, SDNode OpNode, OpndItins itins,
5507 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005508 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005509 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5510 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5511 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5512 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005513 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005514}
5515
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005516multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005517 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5518 X86VectorVTInfo _> {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005519 // src2 is always 128-bit
Craig Topper05948fb2016-08-02 05:11:15 +00005520 let ExeDomain = _.ExeDomain in {
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005521 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5522 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5523 "$src2, $src1", "$src1, $src2",
5524 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005525 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005526 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5527 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5528 "$src2, $src1", "$src1, $src2",
Craig Topper820d4922015-02-09 04:04:50 +00005529 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005530 itins.rm>, AVX512BIBase,
5531 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005532 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005533}
5534
Cameron McInally5fb084e2014-12-11 17:13:05 +00005535multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005536 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5537 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005538 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005539 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005540 VTInfo.info512>, EVEX_V512,
5541 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5542 let Predicates = [prd, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005543 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005544 VTInfo.info256>, EVEX_V256,
5545 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005546 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005547 VTInfo.info128>, EVEX_V128,
5548 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5549 }
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005550}
5551
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005552multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005553 string OpcodeStr, SDNode OpNode,
5554 OpndItins itins> {
5555 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5556 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5557 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5558 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5559 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5560 bc_v2i64, avx512vl_i16_info, HasBWI>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005561}
5562
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005563multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005564 string OpcodeStr, SDNode OpNode,
5565 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005566 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005567 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005568 VTInfo.info512>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005569 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005570 VTInfo.info512>, EVEX_V512;
5571 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005572 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005573 VTInfo.info256>,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005574 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005575 VTInfo.info256>, EVEX_V256;
5576 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005577 itins, VTInfo.info128>,
5578 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005579 VTInfo.info128>, EVEX_V128;
5580 }
5581}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005582
Michael Liao66233b72015-08-06 09:06:20 +00005583multiclass avx512_shift_rmi_w<bits<8> opcw,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005584 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005585 string OpcodeStr, SDNode OpNode,
5586 OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005587 let Predicates = [HasBWI] in
5588 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005589 itins, v32i16_info>, EVEX_V512, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005590 let Predicates = [HasVLX, HasBWI] in {
5591 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005592 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005593 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005594 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005595 }
5596}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005597
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005598multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5599 Format ImmFormR, Format ImmFormM,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005600 string OpcodeStr, SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005601 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005602 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005603 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005604 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005605}
Cameron McInally9b7c15a2014-11-25 20:41:51 +00005606
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005607defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5608 SSE_INTSHIFT_P>,
5609 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5610 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005611
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005612defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5613 SSE_INTSHIFT_P>,
5614 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5615 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005616
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005617defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5618 SSE_INTSHIFT_P>,
5619 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5620 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005621
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005622defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5623 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5624defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5625 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005626
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005627defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5628defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5629defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005630
Simon Pilgrim5910ebe2017-02-20 12:16:38 +00005631// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5632let Predicates = [HasAVX512, NoVLX] in {
5633 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5634 (EXTRACT_SUBREG (v8i64
5635 (VPSRAQZrr
5636 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5637 VR128X:$src2)), sub_ymm)>;
5638
5639 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5640 (EXTRACT_SUBREG (v8i64
5641 (VPSRAQZrr
5642 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5643 VR128X:$src2)), sub_xmm)>;
5644
5645 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5646 (EXTRACT_SUBREG (v8i64
5647 (VPSRAQZri
5648 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5649 imm:$src2)), sub_ymm)>;
5650
5651 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5652 (EXTRACT_SUBREG (v8i64
5653 (VPSRAQZri
5654 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5655 imm:$src2)), sub_xmm)>;
5656}
5657
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005658//===-------------------------------------------------------------------===//
5659// Variable Bit Shifts
5660//===-------------------------------------------------------------------===//
5661multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005662 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005663 let ExeDomain = _.ExeDomain in {
Cameron McInally5fb084e2014-12-11 17:13:05 +00005664 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5665 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5666 "$src2, $src1", "$src1, $src2",
5667 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005668 itins.rr>, AVX5128IBase, EVEX_4V,
5669 Sched<[itins.Sched]>;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005670 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5671 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5672 "$src2, $src1", "$src1, $src2",
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005673 (_.VT (OpNode _.RC:$src1,
5674 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005675 itins.rm>, AVX5128IBase, EVEX_4V,
5676 EVEX_CD8<_.EltSize, CD8VF>,
5677 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00005678 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005679}
5680
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005681multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005682 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00005683 let ExeDomain = _.ExeDomain in
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005684 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5685 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5686 "${src2}"##_.BroadcastStr##", $src1",
5687 "$src1, ${src2}"##_.BroadcastStr,
5688 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5689 (_.ScalarLdFrag addr:$src2))))),
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005690 itins.rm>, AVX5128IBase, EVEX_B,
5691 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5692 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005693}
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005694
Cameron McInally5fb084e2014-12-11 17:13:05 +00005695multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005696 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005697 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005698 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5699 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005700
5701 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005702 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5703 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5704 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5705 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005706 }
Cameron McInally5fb084e2014-12-11 17:13:05 +00005707}
5708
5709multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005710 SDNode OpNode, OpndItins itins> {
5711 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005712 avx512vl_i32_info>;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005713 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005714 avx512vl_i64_info>, VEX_W;
Cameron McInally5fb084e2014-12-11 17:13:05 +00005715}
5716
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005717// Use 512bit version to implement 128/256 bit in case NoVLX.
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005718multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5719 SDNode OpNode, list<Predicate> p> {
5720 let Predicates = p in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005721 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005722 (_.info256.VT _.info256.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005723 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005724 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005725 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5726 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5727 sub_ymm)>;
5728
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005729 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
Igor Breger7b46b4e2015-12-23 08:06:50 +00005730 (_.info128.VT _.info128.RC:$src2))),
Simon Pilgrim18bcf932016-02-03 09:41:59 +00005731 (EXTRACT_SUBREG
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005732 (!cast<Instruction>(OpcodeStr#"Zrr")
Igor Breger7b46b4e2015-12-23 08:06:50 +00005733 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5734 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5735 sub_xmm)>;
5736 }
5737}
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005738multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005739 SDNode OpNode, OpndItins itins> {
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005740 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005741 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005742 EVEX_V512, VEX_W;
5743 let Predicates = [HasVLX, HasBWI] in {
5744
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005745 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005746 EVEX_V256, VEX_W;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005747 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
Elena Demikhovsky0b9dbe32015-03-11 10:25:42 +00005748 EVEX_V128, VEX_W;
5749 }
5750}
5751
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005752defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5753 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005754
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005755defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5756 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
Igor Bregere59165c2016-06-20 07:05:43 +00005757
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005758defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5759 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005760
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005761defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5762defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00005763
Simon Pilgrim7f2a6d52017-01-13 13:16:19 +00005764defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5765defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5766defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5767defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5768
Craig Topper05629d02016-07-24 07:32:45 +00005769// Special handing for handling VPSRAV intrinsics.
5770multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5771 list<Predicate> p> {
5772 let Predicates = p in {
5773 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5774 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5775 _.RC:$src2)>;
5776 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5777 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5778 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005779 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5780 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5781 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5782 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5783 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5784 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5785 _.RC:$src0)),
5786 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5787 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005788 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5789 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5790 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5791 _.RC:$src1, _.RC:$src2)>;
5792 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5793 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5794 _.ImmAllZerosV)),
5795 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5796 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005797 }
5798}
5799
5800multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5801 list<Predicate> p> :
5802 avx512_var_shift_int_lowering<InstrStr, _, p> {
5803 let Predicates = p in {
5804 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5805 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5806 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5807 _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005808 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5809 (X86vsrav _.RC:$src1,
5810 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5811 _.RC:$src0)),
5812 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5813 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
Craig Topper05629d02016-07-24 07:32:45 +00005814 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5815 (X86vsrav _.RC:$src1,
5816 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5817 _.ImmAllZerosV)),
5818 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5819 _.RC:$src1, addr:$src2)>;
5820 }
5821}
5822
5823defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5824defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5825defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5826defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5827defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5828defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5829defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5830defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5831defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5832
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005833
5834// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5835let Predicates = [HasAVX512, NoVLX] in {
5836 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5837 (EXTRACT_SUBREG (v8i64
5838 (VPROLVQZrr
5839 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005840 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005841 sub_xmm)>;
5842 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5843 (EXTRACT_SUBREG (v8i64
5844 (VPROLVQZrr
5845 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005846 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005847 sub_ymm)>;
5848
5849 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5850 (EXTRACT_SUBREG (v16i32
5851 (VPROLVDZrr
5852 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005853 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005854 sub_xmm)>;
5855 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5856 (EXTRACT_SUBREG (v16i32
5857 (VPROLVDZrr
5858 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005859 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005860 sub_ymm)>;
5861
5862 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5863 (EXTRACT_SUBREG (v8i64
5864 (VPROLQZri
5865 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5866 imm:$src2)), sub_xmm)>;
5867 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5868 (EXTRACT_SUBREG (v8i64
5869 (VPROLQZri
5870 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5871 imm:$src2)), sub_ymm)>;
5872
5873 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5874 (EXTRACT_SUBREG (v16i32
5875 (VPROLDZri
5876 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5877 imm:$src2)), sub_xmm)>;
5878 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5879 (EXTRACT_SUBREG (v16i32
5880 (VPROLDZri
5881 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5882 imm:$src2)), sub_ymm)>;
5883}
5884
5885// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5886let Predicates = [HasAVX512, NoVLX] in {
5887 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5888 (EXTRACT_SUBREG (v8i64
5889 (VPRORVQZrr
5890 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005891 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005892 sub_xmm)>;
5893 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5894 (EXTRACT_SUBREG (v8i64
5895 (VPRORVQZrr
5896 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005897 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005898 sub_ymm)>;
5899
5900 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5901 (EXTRACT_SUBREG (v16i32
5902 (VPRORVDZrr
5903 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005904 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005905 sub_xmm)>;
5906 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5907 (EXTRACT_SUBREG (v16i32
5908 (VPRORVDZrr
5909 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
Craig Topper688f0ca2017-11-01 07:11:32 +00005910 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
Simon Pilgrim1cbe8c22017-07-17 14:11:30 +00005911 sub_ymm)>;
5912
5913 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5914 (EXTRACT_SUBREG (v8i64
5915 (VPRORQZri
5916 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5917 imm:$src2)), sub_xmm)>;
5918 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5919 (EXTRACT_SUBREG (v8i64
5920 (VPRORQZri
5921 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5922 imm:$src2)), sub_ymm)>;
5923
5924 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5925 (EXTRACT_SUBREG (v16i32
5926 (VPRORDZri
5927 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5928 imm:$src2)), sub_xmm)>;
5929 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5930 (EXTRACT_SUBREG (v16i32
5931 (VPRORDZri
5932 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5933 imm:$src2)), sub_ymm)>;
5934}
5935
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005936//===-------------------------------------------------------------------===//
5937// 1-src variable permutation VPERMW/D/Q
5938//===-------------------------------------------------------------------===//
5939multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005940 OpndItins itins, AVX512VLVectorVTInfo _> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005941 let Predicates = [HasAVX512] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005942 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5943 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005944
5945 let Predicates = [HasAVX512, HasVLX] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005946 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5947 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005948}
5949
5950multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5951 string OpcodeStr, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005952 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005953 let Predicates = [HasAVX512] in
5954 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005955 itins, VTInfo.info512>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005956 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005957 itins, VTInfo.info512>, EVEX_V512;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005958 let Predicates = [HasAVX512, HasVLX] in
5959 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005960 itins, VTInfo.info256>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005961 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005962 itins, VTInfo.info256>, EVEX_V256;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005963}
5964
Michael Zuckermand9cac592016-01-19 17:07:43 +00005965multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5966 Predicate prd, SDNode OpNode,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005967 OpndItins itins, AVX512VLVectorVTInfo _> {
Michael Zuckermand9cac592016-01-19 17:07:43 +00005968 let Predicates = [prd] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005969 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005970 EVEX_V512 ;
5971 let Predicates = [HasVLX, prd] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005972 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005973 EVEX_V256 ;
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005974 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
Michael Zuckermand9cac592016-01-19 17:07:43 +00005975 EVEX_V128 ;
5976 }
5977}
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005978
Michael Zuckermand9cac592016-01-19 17:07:43 +00005979defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005980 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
Michael Zuckermand9cac592016-01-19 17:07:43 +00005981defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005982 AVX2_PERMV_I, avx512vl_i8_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005983
5984defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005985 AVX2_PERMV_I, avx512vl_i32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005986defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005987 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005988defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005989 AVX2_PERMV_F, avx512vl_f32_info>;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005990defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005991 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005992
5993defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005994 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005995 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5996defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00005997 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
Elena Demikhovsky4078c752015-06-04 07:07:13 +00005998 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger78741a12015-10-04 07:20:41 +00005999//===----------------------------------------------------------------------===//
Simon Pilgrim18bcf932016-02-03 09:41:59 +00006000// AVX-512 - VPERMIL
Igor Breger78741a12015-10-04 07:20:41 +00006001//===----------------------------------------------------------------------===//
Elena Demikhovsky4078c752015-06-04 07:07:13 +00006002
Simon Pilgrim1401a752017-11-29 14:58:34 +00006003multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6004 OpndItins itins, X86VectorVTInfo _,
6005 X86VectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00006006 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6007 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6008 "$src2, $src1", "$src1, $src2",
6009 (_.VT (OpNode _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00006010 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
6011 T8PD, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00006012 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6013 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6014 "$src2, $src1", "$src1, $src2",
6015 (_.VT (OpNode
6016 _.RC:$src1,
Simon Pilgrim1401a752017-11-29 14:58:34 +00006017 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
6018 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6019 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00006020 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6021 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6022 "${src2}"##_.BroadcastStr##", $src1",
6023 "$src1, ${src2}"##_.BroadcastStr,
6024 (_.VT (OpNode
6025 _.RC:$src1,
6026 (Ctrl.VT (X86VBroadcast
Simon Pilgrim1401a752017-11-29 14:58:34 +00006027 (Ctrl.ScalarLdFrag addr:$src2))))),
6028 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6029 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Breger78741a12015-10-04 07:20:41 +00006030}
6031
6032multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
Simon Pilgrim1401a752017-11-29 14:58:34 +00006033 OpndItins itins, AVX512VLVectorVTInfo _,
6034 AVX512VLVectorVTInfo Ctrl> {
Igor Breger78741a12015-10-04 07:20:41 +00006035 let Predicates = [HasAVX512] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00006036 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6037 _.info512, Ctrl.info512>, EVEX_V512;
Igor Breger78741a12015-10-04 07:20:41 +00006038 }
6039 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim1401a752017-11-29 14:58:34 +00006040 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6041 _.info128, Ctrl.info128>, EVEX_V128;
6042 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
6043 _.info256, Ctrl.info256>, EVEX_V256;
Igor Breger78741a12015-10-04 07:20:41 +00006044 }
6045}
6046
6047multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6048 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
Simon Pilgrim1401a752017-11-29 14:58:34 +00006049 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
Igor Breger78741a12015-10-04 07:20:41 +00006050 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006051 X86VPermilpi, AVX_VPERMILV, _>,
Igor Breger78741a12015-10-04 07:20:41 +00006052 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
Igor Breger78741a12015-10-04 07:20:41 +00006053}
6054
Craig Topper05948fb2016-08-02 05:11:15 +00006055let ExeDomain = SSEPackedSingle in
Igor Breger78741a12015-10-04 07:20:41 +00006056defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6057 avx512vl_i32_info>;
Craig Topper05948fb2016-08-02 05:11:15 +00006058let ExeDomain = SSEPackedDouble in
Igor Breger78741a12015-10-04 07:20:41 +00006059defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6060 avx512vl_i64_info>, VEX_W;
Simon Pilgrim1401a752017-11-29 14:58:34 +00006061
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006062//===----------------------------------------------------------------------===//
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006063// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6064//===----------------------------------------------------------------------===//
6065
6066defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006067 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006068 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6069defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006070 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006071defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006072 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
Michael Liao66233b72015-08-06 09:06:20 +00006073
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006074multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6075 OpndItins itins> {
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006076 let Predicates = [HasBWI] in
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006077 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006078
6079 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006080 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
6081 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006082 }
6083}
6084
Simon Pilgrim2dc4ff12017-12-01 13:25:54 +00006085defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
Elena Demikhovsky55a99742015-06-22 13:00:42 +00006086
Elena Demikhovsky75ede682015-06-01 07:17:23 +00006087//===----------------------------------------------------------------------===//
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00006088// Move Low to High and High to Low packed FP Instructions
6089//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006090def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6091 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00006092 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006093 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006094 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006095def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6096 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00006097 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006098 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006099 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006100
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006101//===----------------------------------------------------------------------===//
Igor Bregerb6b27af2015-11-10 07:09:07 +00006102// VMOVHPS/PD VMOVLPS Instructions
6103// All patterns was taken from SSS implementation.
6104//===----------------------------------------------------------------------===//
6105multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
6106 X86VectorVTInfo _> {
Craig Toppere70231b2017-02-26 06:45:54 +00006107 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00006108 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6109 (ins _.RC:$src1, f64mem:$src2),
6110 !strconcat(OpcodeStr,
6111 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6112 [(set _.RC:$dst,
6113 (OpNode _.RC:$src1,
6114 (_.VT (bitconvert
6115 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006116 IIC_SSE_MOV_LH>, Sched<[WriteFShuffleLd, ReadAfterLd]>, EVEX_4V;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006117}
6118
6119defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
6120 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
Craig Topper3b11fca2017-09-18 00:20:53 +00006121defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006122 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6123defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
6124 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6125defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
6126 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6127
6128let Predicates = [HasAVX512] in {
6129 // VMOVHPS patterns
6130 def : Pat<(X86Movlhps VR128X:$src1,
6131 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
6132 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6133 def : Pat<(X86Movlhps VR128X:$src1,
Craig Topper0a197df2017-09-17 18:59:32 +00006134 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006135 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6136 // VMOVHPD patterns
6137 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
Igor Bregerb6b27af2015-11-10 07:09:07 +00006138 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6139 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6140 // VMOVLPS patterns
6141 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
6142 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006143 // VMOVLPD patterns
6144 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6145 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006146 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6147 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6148 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6149}
6150
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006151let SchedRW = [WriteStore] in {
Igor Bregerb6b27af2015-11-10 07:09:07 +00006152def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6153 (ins f64mem:$dst, VR128X:$src),
6154 "vmovhps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006155 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006156 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6157 (bc_v2f64 (v4f32 VR128X:$src))),
6158 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6159 EVEX, EVEX_CD8<32, CD8VT2>;
6160def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6161 (ins f64mem:$dst, VR128X:$src),
6162 "vmovhpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006163 [(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006164 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6165 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6166 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6167def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6168 (ins f64mem:$dst, VR128X:$src),
6169 "vmovlps\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006170 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006171 (iPTR 0))), addr:$dst)],
6172 IIC_SSE_MOV_LH>,
6173 EVEX, EVEX_CD8<32, CD8VT2>;
6174def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6175 (ins f64mem:$dst, VR128X:$src),
6176 "vmovlpd\t{$src, $dst|$dst, $src}",
Craig Topperc9b19232016-05-01 04:59:44 +00006177 [(store (f64 (extractelt (v2f64 VR128X:$src),
Igor Bregerb6b27af2015-11-10 07:09:07 +00006178 (iPTR 0))), addr:$dst)],
6179 IIC_SSE_MOV_LH>,
6180 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim369e59d2018-02-12 16:18:36 +00006181} // SchedRW
Craig Toppere1cac152016-06-07 07:27:54 +00006182
Igor Bregerb6b27af2015-11-10 07:09:07 +00006183let Predicates = [HasAVX512] in {
6184 // VMOVHPD patterns
Craig Topperc9b19232016-05-01 04:59:44 +00006185 def : Pat<(store (f64 (extractelt
Igor Bregerb6b27af2015-11-10 07:09:07 +00006186 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6187 (iPTR 0))), addr:$dst),
6188 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6189 // VMOVLPS patterns
6190 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6191 addr:$src1),
6192 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006193 // VMOVLPD patterns
6194 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6195 addr:$src1),
6196 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
Igor Bregerb6b27af2015-11-10 07:09:07 +00006197}
6198//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006199// FMA - Fused Multiply Operations
6200//
Adam Nemet26371ce2014-10-24 00:02:55 +00006201
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006202multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006203 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006204 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Adam Nemet34801422014-10-08 23:25:39 +00006205 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Adam Nemet6bddb8c2014-09-29 22:54:41 +00006206 (ins _.RC:$src2, _.RC:$src3),
Adam Nemet2e91ee52014-08-14 17:13:19 +00006207 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006208 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006209 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006210
Craig Toppere1cac152016-06-07 07:27:54 +00006211 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6212 (ins _.RC:$src2, _.MemOp:$src3),
6213 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006214 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
Craig Topper468a8132017-12-12 07:06:35 +00006215 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006216
Craig Toppere1cac152016-06-07 07:27:54 +00006217 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6218 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6219 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6220 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper6bcbf532016-07-25 07:20:28 +00006221 (OpNode _.RC:$src2,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006222 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6223 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006224 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006225 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006226}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006227
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006228multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006229 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006230 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006231 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006232 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6233 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006234 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6235 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006236}
Elena Demikhovsky7b0dd392015-01-28 10:21:27 +00006237
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006238multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006239 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6240 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006241 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006242 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6243 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6244 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006245 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006246 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006247 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006248 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006249 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006250 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006251 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006252}
6253
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006254multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006255 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006256 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006257 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006258 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006259 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006260}
6261
Craig Topperaf0b9922017-09-04 06:59:50 +00006262defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006263defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6264defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6265defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6266defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6267defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6268
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006269
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006270multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006271 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006272 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006273 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6274 (ins _.RC:$src2, _.RC:$src3),
6275 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006276 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6277 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006278
Craig Toppere1cac152016-06-07 07:27:54 +00006279 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6280 (ins _.RC:$src2, _.MemOp:$src3),
6281 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006282 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
Craig Topper468a8132017-12-12 07:06:35 +00006283 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006284
Craig Toppere1cac152016-06-07 07:27:54 +00006285 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6286 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6287 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6288 "$src2, ${src3}"##_.BroadcastStr,
6289 (_.VT (OpNode _.RC:$src2,
6290 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006291 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
Craig Topper468a8132017-12-12 07:06:35 +00006292 Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006293 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006294}
6295
6296multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006297 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006298 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006299 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6300 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6301 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006302 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6303 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006304 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006305}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006306
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006307multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006308 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6309 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006310 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006311 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6312 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6313 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006314 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006315 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006316 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006317 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006318 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006319 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006320 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006321}
6322
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006323multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006324 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006325 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006326 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006327 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006328 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006329}
6330
Craig Topperaf0b9922017-09-04 06:59:50 +00006331defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006332defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6333defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6334defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6335defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6336defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6337
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006338multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006339 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006340 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006341 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006342 (ins _.RC:$src2, _.RC:$src3),
6343 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006344 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6345 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006346
Craig Topper69e22782017-09-04 07:35:05 +00006347 // Pattern is 312 order so that the load is in a different place from the
6348 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006349 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006350 (ins _.RC:$src2, _.MemOp:$src3),
6351 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006352 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
Craig Topper468a8132017-12-12 07:06:35 +00006353 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006354
Craig Topper69e22782017-09-04 07:35:05 +00006355 // Pattern is 312 order so that the load is in a different place from the
6356 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Toppere1cac152016-06-07 07:27:54 +00006357 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006358 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6359 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6360 "$src2, ${src3}"##_.BroadcastStr,
Craig Topper69e22782017-09-04 07:35:05 +00006361 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006362 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
Craig Topper468a8132017-12-12 07:06:35 +00006363 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
Craig Topper5ec33a92016-07-22 05:00:42 +00006364 }
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006365}
6366
6367multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006368 X86VectorVTInfo _, string Suff> {
Craig Topperb16598d2017-09-01 07:58:16 +00006369 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006370 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Topper6bcbf532016-07-25 07:20:28 +00006371 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6372 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
Simon Pilgrim6a009702017-11-29 17:21:15 +00006373 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6374 NoItinerary, 1, 1, vselect, 1>,
Simon Pilgrim97160be2017-11-27 10:41:32 +00006375 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006376}
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006377
6378multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006379 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6380 string Suff> {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006381 let Predicates = [HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006382 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6383 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6384 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006385 }
6386 let Predicates = [HasVLX, HasAVX512] in {
Craig Topper318e40b2016-07-25 07:20:31 +00006387 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006388 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Craig Topper318e40b2016-07-25 07:20:31 +00006389 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006390 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6391 }
6392}
6393
6394multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
Craig Topper318e40b2016-07-25 07:20:31 +00006395 SDNode OpNodeRnd > {
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006396 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006397 avx512vl_f32_info, "PS">;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006398 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
Craig Topper318e40b2016-07-25 07:20:31 +00006399 avx512vl_f64_info, "PD">, VEX_W;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006400}
6401
Craig Topperaf0b9922017-09-04 06:59:50 +00006402defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
Igor Bregera7a8e9a2015-06-29 09:10:00 +00006403defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6404defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6405defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6406defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6407defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
Elena Demikhovskyfcea06a2014-12-23 10:30:39 +00006408
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006409// Scalar FMA
Igor Breger15820b02015-07-01 13:24:28 +00006410multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6411 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
Craig Topper69e22782017-09-04 07:35:05 +00006412 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
Craig Topperb16598d2017-09-01 07:58:16 +00006413let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
Igor Breger15820b02015-07-01 13:24:28 +00006414 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6415 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006416 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6417 AVX512FMA3Base, Sched<[WriteFMA]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006418
Craig Toppere1cac152016-06-07 07:27:54 +00006419 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperd9fe6642017-02-21 04:26:10 +00006420 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
Simon Pilgrim6a009702017-11-29 17:21:15 +00006421 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
Craig Topper468a8132017-12-12 07:06:35 +00006422 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006423
6424 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6425 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
Simon Pilgrim6a009702017-11-29 17:21:15 +00006426 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6427 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
Craig Toppera2f55282017-12-10 03:16:36 +00006428 Sched<[WriteFMA]>;
Igor Breger15820b02015-07-01 13:24:28 +00006429
Craig Toppereafdbec2016-08-13 06:48:41 +00006430 let isCodeGenOnly = 1, isCommutable = 1 in {
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006431 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
Igor Breger15820b02015-07-01 13:24:28 +00006432 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6433 !strconcat(OpcodeStr,
6434 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Simon Pilgrim97160be2017-11-27 10:41:32 +00006435 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
Craig Topper5bfa5ff2017-11-09 08:26:26 +00006436 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
Craig Toppere1cac152016-06-07 07:27:54 +00006437 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6438 !strconcat(OpcodeStr,
6439 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Craig Topper468a8132017-12-12 07:06:35 +00006440 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
Igor Breger15820b02015-07-01 13:24:28 +00006441 }// isCodeGenOnly = 1
Igor Breger15820b02015-07-01 13:24:28 +00006442}// Constraints = "$src1 = $dst"
Craig Topperb16598d2017-09-01 07:58:16 +00006443}
Igor Breger15820b02015-07-01 13:24:28 +00006444
6445multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006446 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6447 SDNode OpNodeRnds1, SDNode OpNodes3,
6448 SDNode OpNodeRnds3, X86VectorVTInfo _,
6449 string SUFF> {
Craig Topper2caa97c2017-02-25 19:36:28 +00006450 let ExeDomain = _.ExeDomain in {
Craig Topperb16598d2017-09-01 07:58:16 +00006451 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
Craig Toppera55b4832016-12-09 06:42:28 +00006452 // Operands for intrinsic are in 123 order to preserve passthu
6453 // semantics.
Craig Topper07dac552017-11-06 05:48:25 +00006454 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6455 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6456 _.ScalarIntMemCPat:$src3)),
Craig Toppera55b4832016-12-09 06:42:28 +00006457 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
Igor Breger15820b02015-07-01 13:24:28 +00006458 (i32 imm:$rc))),
6459 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6460 _.FRC:$src3))),
6461 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
Craig Topper69e22782017-09-04 07:35:05 +00006462 (_.ScalarLdFrag addr:$src3)))), 0>;
Igor Breger15820b02015-07-01 13:24:28 +00006463
Craig Topperb16598d2017-09-01 07:58:16 +00006464 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
Craig Topper07dac552017-11-06 05:48:25 +00006465 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6466 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6467 _.RC:$src1)),
Craig Toppera55b4832016-12-09 06:42:28 +00006468 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
Igor Breger15820b02015-07-01 13:24:28 +00006469 (i32 imm:$rc))),
6470 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6471 _.FRC:$src1))),
6472 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
Craig Topper69e22782017-09-04 07:35:05 +00006473 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
Igor Breger15820b02015-07-01 13:24:28 +00006474
Craig Toppereec768b2017-09-06 03:35:58 +00006475 // One pattern is 312 order so that the load is in a different place from the
6476 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
Craig Topperb16598d2017-09-01 07:58:16 +00006477 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
Craig Topper69e22782017-09-04 07:35:05 +00006478 (null_frag),
Craig Topper07dac552017-11-06 05:48:25 +00006479 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6480 _.RC:$src2)),
Craig Topper69e22782017-09-04 07:35:05 +00006481 (null_frag),
Igor Breger15820b02015-07-01 13:24:28 +00006482 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6483 _.FRC:$src2))),
Craig Toppereec768b2017-09-06 03:35:58 +00006484 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6485 _.FRC:$src1, _.FRC:$src2))), 1>;
Craig Topper2caa97c2017-02-25 19:36:28 +00006486 }
Igor Breger15820b02015-07-01 13:24:28 +00006487}
6488
6489multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
Craig Topper07dac552017-11-06 05:48:25 +00006490 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6491 SDNode OpNodeRnds1, SDNode OpNodes3,
Craig Toppera55b4832016-12-09 06:42:28 +00006492 SDNode OpNodeRnds3> {
Igor Breger15820b02015-07-01 13:24:28 +00006493 let Predicates = [HasAVX512] in {
6494 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006495 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6496 f32x_info, "SS">,
Craig Toppera55b4832016-12-09 06:42:28 +00006497 EVEX_CD8<32, CD8VT1>, VEX_LIG;
Igor Breger15820b02015-07-01 13:24:28 +00006498 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
Craig Topper07dac552017-11-06 05:48:25 +00006499 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6500 f64x_info, "SD">,
Craig Toppera55b4832016-12-09 06:42:28 +00006501 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
Igor Breger15820b02015-07-01 13:24:28 +00006502 }
6503}
6504
Craig Topper07dac552017-11-06 05:48:25 +00006505defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6506 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6507defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6508 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6509defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6510 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6511defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6512 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006513
6514//===----------------------------------------------------------------------===//
Asaf Badouh655822a2016-01-25 11:14:24 +00006515// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6516//===----------------------------------------------------------------------===//
6517let Constraints = "$src1 = $dst" in {
6518multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006519 OpndItins itins, X86VectorVTInfo _> {
Craig Topper47e14ea2017-09-24 19:30:55 +00006520 // NOTE: The SDNode have the multiply operands first with the add last.
6521 // This enables commuted load patterns to be autogenerated by tablegen.
Craig Topper6bf9b802017-02-26 06:45:45 +00006522 let ExeDomain = _.ExeDomain in {
Asaf Badouh655822a2016-01-25 11:14:24 +00006523 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6524 (ins _.RC:$src2, _.RC:$src3),
6525 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006526 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6527 AVX512FMA3Base, Sched<[itins.Sched]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006528
Craig Toppere1cac152016-06-07 07:27:54 +00006529 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6530 (ins _.RC:$src2, _.MemOp:$src3),
6531 OpcodeStr, "$src3, $src2", "$src2, $src3",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006532 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6533 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouh655822a2016-01-25 11:14:24 +00006534
Craig Toppere1cac152016-06-07 07:27:54 +00006535 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6536 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6537 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6538 !strconcat("$src2, ${src3}", _.BroadcastStr ),
Craig Topper47e14ea2017-09-24 19:30:55 +00006539 (OpNode _.RC:$src2,
6540 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006541 _.RC:$src1), itins.rm>,
6542 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper6bf9b802017-02-26 06:45:45 +00006543 }
Asaf Badouh655822a2016-01-25 11:14:24 +00006544}
6545} // Constraints = "$src1 = $dst"
6546
6547multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006548 OpndItins itins, AVX512VLVectorVTInfo _> {
Asaf Badouh655822a2016-01-25 11:14:24 +00006549 let Predicates = [HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006550 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006551 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6552 }
6553 let Predicates = [HasVLX, HasIFMA] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006554 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006555 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006556 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
Asaf Badouh655822a2016-01-25 11:14:24 +00006557 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6558 }
6559}
6560
6561defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006562 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006563defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +00006564 SSE_PMADD, avx512vl_i64_info>, VEX_W;
Asaf Badouh655822a2016-01-25 11:14:24 +00006565
6566//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006567// AVX-512 Scalar convert from sign integer to float/double
6568//===----------------------------------------------------------------------===//
6569
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006570multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6571 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6572 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006573 let hasSideEffects = 0 in {
6574 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6575 (ins DstVT.FRC:$src1, SrcRC:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006576 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6577 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006578 let mayLoad = 1 in
6579 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6580 (ins DstVT.FRC:$src1, x86memop:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006581 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6582 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006583 } // hasSideEffects = 0
6584 let isCodeGenOnly = 1 in {
6585 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6586 (ins DstVT.RC:$src1, SrcRC:$src2),
6587 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6588 [(set DstVT.RC:$dst,
6589 (OpNode (DstVT.VT DstVT.RC:$src1),
6590 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006591 (i32 FROUND_CURRENT)))], itins.rr>,
6592 EVEX_4V, Sched<[itins.Sched]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006593
6594 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6595 (ins DstVT.RC:$src1, x86memop:$src2),
6596 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6597 [(set DstVT.RC:$dst,
6598 (OpNode (DstVT.VT DstVT.RC:$src1),
6599 (ld_frag addr:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006600 (i32 FROUND_CURRENT)))], itins.rm>,
6601 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006602 }//isCodeGenOnly = 1
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006603}
Elena Demikhovskyd8fda622015-03-30 09:29:28 +00006604
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006605multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6606 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
Igor Bregerabe4a792015-06-14 12:44:55 +00006607 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6608 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006609 !strconcat(asm,
6610 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
Igor Bregerabe4a792015-06-14 12:44:55 +00006611 [(set DstVT.RC:$dst,
6612 (OpNode (DstVT.VT DstVT.RC:$src1),
6613 SrcRC:$src2,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006614 (i32 imm:$rc)))], itins.rr>,
6615 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Bregerabe4a792015-06-14 12:44:55 +00006616}
6617
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006618multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6619 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6620 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6621 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6622 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6623 ld_frag, asm>, VEX_LIG;
Igor Bregerabe4a792015-06-14 12:44:55 +00006624}
6625
Andrew Trick15a47742013-10-09 05:11:10 +00006626let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006627defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006628 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6629 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006630defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006631 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6632 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006633defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006634 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6635 XD, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006636defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006637 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6638 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006639
Craig Topper8f85ad12016-11-14 02:46:58 +00006640def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6641 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6642def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6643 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6644
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006645def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6646 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6647def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006648 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006649def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6650 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6651def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006652 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006653
6654def : Pat<(f32 (sint_to_fp GR32:$src)),
6655 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6656def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006657 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006658def : Pat<(f64 (sint_to_fp GR32:$src)),
6659 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6660def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006661 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6662
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006663defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006664 v4f32x_info, i32mem, loadi32,
6665 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006666defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006667 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6668 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006669defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006670 i32mem, loadi32, "cvtusi2sd{l}">,
6671 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006672defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
Igor Bregerdfcc3d32015-06-17 07:23:57 +00006673 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6674 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006675
Craig Topper8f85ad12016-11-14 02:46:58 +00006676def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6677 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6678def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6679 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6680
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006681def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6682 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6683def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6684 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6685def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6686 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6687def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6688 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6689
6690def : Pat<(f32 (uint_to_fp GR32:$src)),
6691 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6692def : Pat<(f32 (uint_to_fp GR64:$src)),
6693 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6694def : Pat<(f64 (uint_to_fp GR32:$src)),
6695 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6696def : Pat<(f64 (uint_to_fp GR64:$src)),
6697 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00006698}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006699
6700//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006701// AVX-512 Scalar convert from float/double to integer
6702//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006703
6704multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6705 X86VectorVTInfo DstVT, SDNode OpNode,
Craig Toppera49c3542018-01-06 19:20:33 +00006706 OpndItins itins, string asm,
6707 string aliasStr,
6708 bit CodeGenOnly = 1> {
Craig Toppere1cac152016-06-07 07:27:54 +00006709 let Predicates = [HasAVX512] in {
Craig Toppera0be5a02017-12-10 19:47:56 +00006710 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006711 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006712 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6713 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
Craig Toppera0be5a02017-12-10 19:47:56 +00006714 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
Craig Topper1de942b2017-12-10 17:42:44 +00006715 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6716 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6717 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6718 Sched<[itins.Sched]>;
Craig Toppera49c3542018-01-06 19:20:33 +00006719 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Toppera0be5a02017-12-10 19:47:56 +00006720 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006721 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006722 [(set DstVT.RC:$dst, (OpNode
Craig Topper5a63ca22017-03-13 03:59:06 +00006723 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006724 (i32 FROUND_CURRENT)))], itins.rm>,
6725 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere2659d82018-01-05 23:13:54 +00006726
6727 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6728 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>;
6729 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
6730 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0>;
Craig Toppera49c3542018-01-06 19:20:33 +00006731 } // Predicates = [HasAVX512]
6732}
6733
6734multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
6735 X86VectorVTInfo DstVT, SDNode OpNode,
6736 OpndItins itins, string asm,
6737 string aliasStr> :
6738 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, itins, asm, aliasStr, 0> {
6739 let Predicates = [HasAVX512] in {
Craig Toppere2659d82018-01-05 23:13:54 +00006740 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6741 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
6742 SrcVT.IntScalarMemOp:$src), 0>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006743 } // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006744}
Asaf Badouh2744d212015-09-20 14:31:19 +00006745
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006746// Convert float/double to signed/unsigned int 32/64
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006747defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006748 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006749 XS, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006750defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006751 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006752 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006753defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006754 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006755 XS, EVEX_CD8<32, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006756defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006757 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006758 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimb13961d2016-06-11 14:34:10 +00006759defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006760 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006761 XD, EVEX_CD8<64, CD8VT1>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006762defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006763 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006764 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006765defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006766 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006767 XD, EVEX_CD8<64, CD8VT1>;
Craig Toppera49c3542018-01-06 19:20:33 +00006768defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
Craig Toppere2659d82018-01-05 23:13:54 +00006769 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006770 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006771
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006772// The SSE version of these instructions are disabled for AVX512.
6773// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6774let Predicates = [HasAVX512] in {
6775 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006776 (VCVTSS2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006777 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006778 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006779 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006780 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006781 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006782 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006783 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006784 (VCVTSD2SIZrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006785 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006786 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006787 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
Craig Toppera0be5a02017-12-10 19:47:56 +00006788 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
Craig Topper5a63ca22017-03-13 03:59:06 +00006789 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
Craig Toppera0be5a02017-12-10 19:47:56 +00006790 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
Asaf Badouhad5c3fc2016-02-07 14:59:13 +00006791} // HasAVX512
6792
Craig Topperac941b92016-09-25 16:33:53 +00006793let Predicates = [HasAVX512] in {
6794 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6795 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6796 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6797 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6798 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6799 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6800 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6801 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6802 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6803 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6804 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6805 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6806 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6807 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6808 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6809 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6810 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6811 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6812 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6813 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6814} // Predicates = [HasAVX512]
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006815
Elad Cohen0c260102017-01-11 09:11:48 +00006816// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6817// which produce unnecessary vmovs{s,d} instructions
6818let Predicates = [HasAVX512] in {
6819def : Pat<(v4f32 (X86Movss
6820 (v4f32 VR128X:$dst),
6821 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6822 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6823
6824def : Pat<(v4f32 (X86Movss
6825 (v4f32 VR128X:$dst),
6826 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6827 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6828
6829def : Pat<(v2f64 (X86Movsd
6830 (v2f64 VR128X:$dst),
6831 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6832 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6833
6834def : Pat<(v2f64 (X86Movsd
6835 (v2f64 VR128X:$dst),
6836 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6837 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6838} // Predicates = [HasAVX512]
6839
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006840// Convert float/double to signed/unsigned int 32/64 with truncation
Simon Pilgrim18bcf932016-02-03 09:41:59 +00006841multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6842 X86VectorVTInfo _DstRC, SDNode OpNode,
Craig Topper61d8a602018-01-06 21:27:25 +00006843 SDNode OpNodeRnd, OpndItins itins, string aliasStr,
6844 bit CodeGenOnly = 1>{
Asaf Badouh2744d212015-09-20 14:31:19 +00006845let Predicates = [HasAVX512] in {
Craig Topper90353a92018-01-06 21:02:22 +00006846 let isCodeGenOnly = 1 in {
Igor Bregerc59b3a22016-08-03 10:58:05 +00006847 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006848 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006849 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6850 EVEX, Sched<[itins.Sched]>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006851 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
Asaf Badouh2744d212015-09-20 14:31:19 +00006852 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006853 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6854 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper90353a92018-01-06 21:02:22 +00006855 }
6856
6857 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6858 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6859 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6860 (i32 FROUND_CURRENT)))], itins.rr>,
6861 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6862 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6863 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6864 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6865 (i32 FROUND_NO_EXC)))], itins.rr>,
6866 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
Craig Topper61d8a602018-01-06 21:27:25 +00006867 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
Craig Topper0f4ccb72018-01-06 21:02:26 +00006868 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6869 (ins _SrcRC.IntScalarMemOp:$src),
6870 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6871 [(set _DstRC.RC:$dst, (OpNodeRnd
6872 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
6873 (i32 FROUND_CURRENT)))], itins.rm>,
6874 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Simon Pilgrim916485c2016-08-18 11:22:22 +00006875
Igor Bregerc59b3a22016-08-03 10:58:05 +00006876 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
Craig Topper90353a92018-01-06 21:02:22 +00006877 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Craig Toppere2659d82018-01-05 23:13:54 +00006878 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
Craig Topper90353a92018-01-06 21:02:22 +00006879 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006880} //HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006881}
6882
Craig Topper61d8a602018-01-06 21:27:25 +00006883multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
6884 X86VectorVTInfo _SrcRC,
6885 X86VectorVTInfo _DstRC, SDNode OpNode,
6886 SDNode OpNodeRnd, OpndItins itins,
6887 string aliasStr> :
6888 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, itins,
6889 aliasStr, 0> {
6890let Predicates = [HasAVX512] in {
6891 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6892 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
6893 _SrcRC.IntScalarMemOp:$src), 0>;
6894}
6895}
Asaf Badouh2744d212015-09-20 14:31:19 +00006896
Igor Bregerc59b3a22016-08-03 10:58:05 +00006897defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006898 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006899 XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006900defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006901 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006902 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006903defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006904 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006905 XD, EVEX_CD8<64, CD8VT1>;
Igor Bregerc59b3a22016-08-03 10:58:05 +00006906defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006907 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006908 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6909
Craig Topper61d8a602018-01-06 21:27:25 +00006910defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006911 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006912 XS, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006913defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006914 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006915 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006916defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006917 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006918 XD, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006919defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006920 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
Asaf Badouh2744d212015-09-20 14:31:19 +00006921 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper61d8a602018-01-06 21:27:25 +00006922
Asaf Badouh2744d212015-09-20 14:31:19 +00006923let Predicates = [HasAVX512] in {
6924 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006925 (VCVTTSS2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006926 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6927 (VCVTTSS2SIZrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006928 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006929 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006930 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6931 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006932 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006933 (VCVTTSD2SIZrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006934 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6935 (VCVTTSD2SIZrm_Int sdmem:$src)>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006936 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
Craig Topper8c252bc2016-09-18 18:59:33 +00006937 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
Ayman Musaf77219e2017-02-13 09:55:48 +00006938 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6939 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00006940} // HasAVX512
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006941
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00006942//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006943// AVX-512 Convert form float to double and back
6944//===----------------------------------------------------------------------===//
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006945
Asaf Badouh2744d212015-09-20 14:31:19 +00006946multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006947 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006948 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006949 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006950 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006951 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Toppera02e3942016-09-23 06:24:43 +00006952 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006953 (i32 FROUND_CURRENT))), itins.rr>,
6954 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006955 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper08b413a2017-03-13 05:14:44 +00006956 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006957 "$src2, $src1", "$src1, $src2",
Craig Toppera58abd12016-05-09 05:34:12 +00006958 (_.VT (OpNode (_.VT _.RC:$src1),
Craig Topper08b413a2017-03-13 05:14:44 +00006959 (_Src.VT _Src.ScalarIntMemCPat:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006960 (i32 FROUND_CURRENT))), itins.rm>,
6961 EVEX_4V, VEX_LIG,
6962 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Ayman Musa6e670cf2017-02-23 07:24:21 +00006963
Craig Topperd2011e32017-02-25 18:43:42 +00006964 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6965 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6966 (ins _.FRC:$src1, _Src.FRC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006967 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6968 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006969 let mayLoad = 1 in
6970 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6971 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006972 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6973 itins.rm>, EVEX_4V, VEX_LIG,
6974 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topperd2011e32017-02-25 18:43:42 +00006975 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006976}
6977
Asaf Badouh2744d212015-09-20 14:31:19 +00006978// Scalar Coversion with SAE - suppress all exceptions
6979multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006980 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006981 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006982 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006983 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Craig Toppera58abd12016-05-09 05:34:12 +00006984 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Asaf Badouh2744d212015-09-20 14:31:19 +00006985 (_Src.VT _Src.RC:$src2),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006986 (i32 FROUND_NO_EXC))), itins.rr>,
6987 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh2744d212015-09-20 14:31:19 +00006988}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00006989
Asaf Badouh2744d212015-09-20 14:31:19 +00006990// Scalar Conversion with rounding control (RC)
6991multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006992 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
Ayman Musa6e670cf2017-02-23 07:24:21 +00006993 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Craig Toppera58abd12016-05-09 05:34:12 +00006994 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
Asaf Badouh2744d212015-09-20 14:31:19 +00006995 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Toppera58abd12016-05-09 05:34:12 +00006996 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00006997 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
Craig Toppera2f55282017-12-10 03:16:36 +00006998 itins.rr>,
6999 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
Asaf Badouh2744d212015-09-20 14:31:19 +00007000 EVEX_B, EVEX_RC;
7001}
Craig Toppera02e3942016-09-23 06:24:43 +00007002multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007003 SDNode OpNodeRnd, OpndItins itins,
7004 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00007005 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007006 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Asaf Badouh2744d212015-09-20 14:31:19 +00007007 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007008 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
Asaf Badouh2744d212015-09-20 14:31:19 +00007009 }
7010}
7011
Craig Toppera02e3942016-09-23 06:24:43 +00007012multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007013 SDNode OpNodeRnd, OpndItins itins,
7014 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
Asaf Badouh2744d212015-09-20 14:31:19 +00007015 let Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007016 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
7017 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
Michael Zuckerman4b88a772016-12-18 14:29:00 +00007018 EVEX_CD8<32, CD8VT1>, XS;
Asaf Badouh2744d212015-09-20 14:31:19 +00007019 }
7020}
Craig Toppera02e3942016-09-23 06:24:43 +00007021defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007022 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
7023 f32x_info>, NotMemoryFoldable;
Craig Toppera02e3942016-09-23 06:24:43 +00007024defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007025 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
7026 f64x_info>, NotMemoryFoldable;
Asaf Badouh2744d212015-09-20 14:31:19 +00007027
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007028def : Pat<(f64 (fpextend FR32X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00007029 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00007030 Requires<[HasAVX512]>;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007031def : Pat<(f64 (fpextend (loadf32 addr:$src))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007032 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Asaf Badouh2744d212015-09-20 14:31:19 +00007033 Requires<[HasAVX512]>;
7034
7035def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007036 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007037 Requires<[HasAVX512, OptForSize]>;
7038
Asaf Badouh2744d212015-09-20 14:31:19 +00007039def : Pat<(f64 (extloadf32 addr:$src)),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007040 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Asaf Badouh2744d212015-09-20 14:31:19 +00007041 Requires<[HasAVX512, OptForSpeed]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007042
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007043def : Pat<(f32 (fpround FR64X:$src)),
Craig Topperafc3c822017-11-07 04:44:22 +00007044 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007045 Requires<[HasAVX512]>;
Elad Cohen0c260102017-01-11 09:11:48 +00007046
7047def : Pat<(v4f32 (X86Movss
7048 (v4f32 VR128X:$dst),
7049 (v4f32 (scalar_to_vector
7050 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007051 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00007052 Requires<[HasAVX512]>;
7053
7054def : Pat<(v2f64 (X86Movsd
7055 (v2f64 VR128X:$dst),
7056 (v2f64 (scalar_to_vector
7057 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
Ayman Musa6e670cf2017-02-23 07:24:21 +00007058 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Elad Cohen0c260102017-01-11 09:11:48 +00007059 Requires<[HasAVX512]>;
7060
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007061//===----------------------------------------------------------------------===//
7062// AVX-512 Vector convert from signed/unsigned integer to float/double
7063// and from float/double to signed/unsigned integer
7064//===----------------------------------------------------------------------===//
7065
7066multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007067 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007068 string Broadcast = _.BroadcastStr,
Coby Tayree97e9cf62016-11-20 17:09:56 +00007069 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007070
7071 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7072 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007073 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
7074 EVEX, Sched<[itins.Sched]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007075
7076 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Coby Tayree97e9cf62016-11-20 17:09:56 +00007077 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007078 (_.VT (OpNode (_Src.VT
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007079 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
7080 EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007081
7082 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007083 (ins _Src.ScalarMemOp:$src), OpcodeStr,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007084 "${src}"##Broadcast, "${src}"##Broadcast,
7085 (_.VT (OpNode (_Src.VT
7086 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007087 )), itins.rm>, EVEX, EVEX_B,
7088 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007089}
7090// Coversion with SAE - suppress all exceptions
7091multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007092 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7093 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007094 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7095 (ins _Src.RC:$src), OpcodeStr,
7096 "{sae}, $src", "$src, {sae}",
7097 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007098 (i32 FROUND_NO_EXC))), itins.rr>,
7099 EVEX, EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007100}
7101
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007102// Conversion with rounding control (RC)
7103multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007104 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7105 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007106 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7107 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7108 "$rc, $src", "$src, $rc",
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007109 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
7110 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007111}
7112
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007113// Extend Float to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007114multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7115 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007116 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007117 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7118 fpextend, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007119 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007120 X86vfpextRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007121 }
7122 let Predicates = [HasVLX] in {
7123 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007124 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
7125 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7126 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007127 }
7128}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007129
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007130// Truncate Double to Float
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007131multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007132 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007133 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007134 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007135 X86vfproundRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007136 }
7137 let Predicates = [HasVLX] in {
7138 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007139 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007140 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007141 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007142
7143 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7144 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7145 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7146 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7147 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7148 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7149 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7150 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007151 }
7152}
7153
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007154defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007155 VEX_W, PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007156defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007157 PS, EVEX_CD8<32, CD8VH>;
7158
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007159def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7160 (VCVTPS2PDZrm addr:$src)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007161
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007162let Predicates = [HasVLX] in {
Craig Topperee277e12017-10-14 05:55:42 +00007163 let AddedComplexity = 15 in {
7164 def : Pat<(X86vzmovl (v2f64 (bitconvert
7165 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7166 (VCVTPD2PSZ128rr VR128X:$src)>;
7167 def : Pat<(X86vzmovl (v2f64 (bitconvert
7168 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7169 (VCVTPD2PSZ128rm addr:$src)>;
7170 }
Craig Topper5471fc22016-11-06 04:12:52 +00007171 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7172 (VCVTPS2PDZ128rm addr:$src)>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007173 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7174 (VCVTPS2PDZ256rm addr:$src)>;
7175}
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00007176
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007177// Convert Signed/Unsigned Doubleword to Double
7178multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007179 SDNode OpNode128, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007180 // No rounding in this op
7181 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007182 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7183 itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007184
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007185 let Predicates = [HasVLX] in {
7186 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007187 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
7188 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7189 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007190 }
7191}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007192
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007193// Convert Signed/Unsigned Doubleword to Float
7194multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007195 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007196 let Predicates = [HasAVX512] in
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007197 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7198 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007199 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007200 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007201
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007202 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007203 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7204 itins>, EVEX_V128;
7205 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7206 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007207 }
7208}
7209
7210// Convert Float to Signed/Unsigned Doubleword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007211multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7212 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007213 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007214 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7215 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007216 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007217 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007218 }
7219 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007220 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7221 itins>, EVEX_V128;
7222 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7223 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007224 }
7225}
7226
7227// Convert Float to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007228multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7229 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007230 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007231 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7232 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007233 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007234 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007235 }
7236 let Predicates = [HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007237 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7238 itins>, EVEX_V128;
7239 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7240 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007241 }
7242}
7243
7244// Convert Double to Signed/Unsigned Doubleword with truncation
Craig Topper731bf9c2016-11-09 07:31:32 +00007245multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007246 SDNode OpNode128, SDNode OpNodeRnd,
7247 OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007248 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007249 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7250 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007251 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007252 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007253 }
7254 let Predicates = [HasVLX] in {
7255 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
Craig Topper731bf9c2016-11-09 07:31:32 +00007256 // memory forms of these instructions in Asm Parser. They have the same
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007257 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7258 // due to the same reason.
Craig Topper731bf9c2016-11-09 07:31:32 +00007259 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007260 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007261 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007262 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007263
7264 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7265 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7266 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7267 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7268 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7269 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7270 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7271 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007272 }
7273}
7274
7275// Convert Double to Signed/Unsigned Doubleword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007276multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7277 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007278 let Predicates = [HasAVX512] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007279 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7280 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007281 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007282 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007283 }
7284 let Predicates = [HasVLX] in {
7285 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7286 // memory forms of these instructions in Asm Parcer. They have the same
7287 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7288 // due to the same reason.
7289 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007290 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007291 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007292 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007293
7294 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7295 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7296 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7297 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7298 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7299 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7300 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7301 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007302 }
7303}
7304
7305// Convert Double to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007306multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7307 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007308 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007309 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7310 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007311 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007312 OpNodeRnd,itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007313 }
7314 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007315 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7316 itins>, EVEX_V128;
7317 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7318 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007319 }
7320}
7321
7322// Convert Double to Signed/Unsigned Quardword with truncation
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007323multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7324 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007325 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007326 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7327 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007328 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007329 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007330 }
7331 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007332 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7333 itins>, EVEX_V128;
7334 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7335 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007336 }
7337}
7338
7339// Convert Signed/Unsigned Quardword to Double
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007340multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7341 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007342 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007343 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7344 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007345 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007346 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007347 }
7348 let Predicates = [HasDQI, HasVLX] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007349 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7350 itins>, EVEX_V128;
7351 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7352 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007353 }
7354}
7355
7356// Convert Float to Signed/Unsigned Quardword
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007357multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7358 SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007359 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007360 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7361 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007362 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007363 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007364 }
7365 let Predicates = [HasDQI, HasVLX] in {
7366 // Explicitly specified broadcast string, since we take only 2 elements
7367 // from v4f32x_info source
7368 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007369 itins, "{1to2}", "", f64mem>, EVEX_V128;
7370 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7371 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007372 }
7373}
7374
7375// Convert Float to Signed/Unsigned Quardword with truncation
Craig Toppera39b6502016-12-10 06:02:48 +00007376multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007377 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007378 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007379 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7380 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007381 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007382 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007383 }
7384 let Predicates = [HasDQI, HasVLX] in {
7385 // Explicitly specified broadcast string, since we take only 2 elements
7386 // from v4f32x_info source
Craig Toppera39b6502016-12-10 06:02:48 +00007387 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007388 itins, "{1to2}", "", f64mem>, EVEX_V128;
7389 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7390 itins>, EVEX_V256;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007391 }
7392}
7393
7394// Convert Signed/Unsigned Quardword to Float
Simon Pilgrima3af7962016-11-24 12:13:46 +00007395multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007396 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007397 let Predicates = [HasDQI] in {
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007398 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7399 itins>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007400 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007401 OpNodeRnd, itins>, EVEX_V512;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007402 }
7403 let Predicates = [HasDQI, HasVLX] in {
7404 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7405 // memory forms of these instructions in Asm Parcer. They have the same
7406 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7407 // due to the same reason.
Simon Pilgrima3af7962016-11-24 12:13:46 +00007408 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007409 itins, "{1to2}", "{x}">, EVEX_V128;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007410 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007411 itins, "{1to4}", "{y}">, EVEX_V256;
Craig Topperb8596e42016-11-14 01:53:29 +00007412
7413 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7414 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7415 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7416 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7417 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7418 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7419 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7420 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007421 }
7422}
7423
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007424defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7425 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007426
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007427defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007428 X86VSintToFpRnd, SSE_CVT_I2PS>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007429 PS, EVEX_CD8<32, CD8VF>;
7430
7431defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007432 X86cvttp2siRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007433 XS, EVEX_CD8<32, CD8VF>;
7434
Simon Pilgrima3af7962016-11-24 12:13:46 +00007435defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007436 X86cvttp2siRnd, SSE_CVT_PD2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007437 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7438
7439defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007440 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007441 EVEX_CD8<32, CD8VF>;
7442
Craig Topperf334ac192016-11-09 07:48:51 +00007443defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007444 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7445 PS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007446
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007447defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7448 X86VUintToFP, SSE_CVT_I2PD>, XS,
7449 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007450
7451defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007452 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007453 EVEX_CD8<32, CD8VF>;
7454
Craig Topper19e04b62016-05-19 06:13:58 +00007455defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007456 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7457 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007458
Craig Topper19e04b62016-05-19 06:13:58 +00007459defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007460 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7461 VEX_W, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007462
Craig Topper19e04b62016-05-19 06:13:58 +00007463defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007464 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007465 PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007466
Craig Topper19e04b62016-05-19 06:13:58 +00007467defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007468 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007469 PS, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007470
Craig Topper19e04b62016-05-19 06:13:58 +00007471defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007472 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007473 PD, EVEX_CD8<64, CD8VF>;
Michael Liao5bf95782014-12-04 05:20:33 +00007474
Craig Topper19e04b62016-05-19 06:13:58 +00007475defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007476 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7477 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007478
Craig Topper19e04b62016-05-19 06:13:58 +00007479defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007480 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007481 PD, EVEX_CD8<64, CD8VF>;
7482
Craig Topper19e04b62016-05-19 06:13:58 +00007483defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007484 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7485 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007486
7487defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007488 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007489 PD, EVEX_CD8<64, CD8VF>;
7490
Craig Toppera39b6502016-12-10 06:02:48 +00007491defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007492 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7493 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007494
7495defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007496 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007497 PD, EVEX_CD8<64, CD8VF>;
7498
Craig Toppera39b6502016-12-10 06:02:48 +00007499defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007500 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7501 EVEX_CD8<32, CD8VH>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007502
7503defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007504 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7505 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007506
7507defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007508 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7509 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007510
Simon Pilgrima3af7962016-11-24 12:13:46 +00007511defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007512 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7513 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007514
Simon Pilgrima3af7962016-11-24 12:13:46 +00007515defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
Simon Pilgrim465a88b2017-12-03 21:16:12 +00007516 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7517 EVEX_CD8<64, CD8VF>;
Elena Demikhovsky0f370932015-07-13 13:26:20 +00007518
Craig Toppere38c57a2015-11-27 05:44:02 +00007519let Predicates = [HasAVX512, NoVLX] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007520def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
Michael Liao5bf95782014-12-04 05:20:33 +00007521 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007522 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7523 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007524
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007525def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7526 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007527 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7528 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007529
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007530def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7531 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
Craig Topper61403202016-09-19 02:53:43 +00007532 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7533 VR256X:$src1, sub_ymm)))), sub_xmm)>;
Elena Demikhovsky95629ca2016-03-29 06:33:41 +00007534
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007535def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7536 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007537 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7538 VR256X:$src1, sub_ymm)))), sub_ymm)>;
Michael Liao5bf95782014-12-04 05:20:33 +00007539
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00007540def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7541 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
Craig Topper61403202016-09-19 02:53:43 +00007542 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7543 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007544
Cameron McInallyf10a7c92014-06-18 14:04:37 +00007545def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7546 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
Craig Topper61403202016-09-19 02:53:43 +00007547 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7548 VR128X:$src1, sub_xmm)))), sub_ymm)>;
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007549
Simon Pilgrima3af7962016-11-24 12:13:46 +00007550def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
Simon Pilgrim096b6d42016-11-20 14:03:23 +00007551 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7552 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7553 VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00007554}
7555
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007556let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007557 let AddedComplexity = 15 in {
7558 def : Pat<(X86vzmovl (v2i64 (bitconvert
7559 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007560 (VCVTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007561 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007562 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7563 (VCVTPD2DQZ128rm addr:$src)>;
7564 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007565 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007566 (VCVTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007567 def : Pat<(X86vzmovl (v2i64 (bitconvert
Simon Pilgrima3af7962016-11-24 12:13:46 +00007568 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007569 (VCVTTPD2DQZ128rr VR128X:$src)>;
Craig Topper009f0aa2017-10-14 04:18:10 +00007570 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topperf7e77772017-10-14 07:04:48 +00007571 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7572 (VCVTTPD2DQZ128rm addr:$src)>;
7573 def : Pat<(X86vzmovl (v2i64 (bitconvert
Craig Topper009f0aa2017-10-14 04:18:10 +00007574 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007575 (VCVTTPD2UDQZ128rr VR128X:$src)>;
Simon Pilgrim3ce6a542016-11-23 22:35:06 +00007576 }
Craig Topperd7467472017-10-14 04:18:09 +00007577
7578 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7579 (VCVTDQ2PDZ128rm addr:$src)>;
7580 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7581 (VCVTDQ2PDZ128rm addr:$src)>;
7582
7583 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7584 (VCVTUDQ2PDZ128rm addr:$src)>;
7585 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7586 (VCVTUDQ2PDZ128rm addr:$src)>;
Simon Pilgrim4ddc92b2016-10-18 07:42:15 +00007587}
7588
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007589let Predicates = [HasAVX512] in {
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00007590 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007591 (VCVTPD2PSZrm addr:$src)>;
7592 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7593 (VCVTPS2PDZrm addr:$src)>;
7594}
7595
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007596let Predicates = [HasDQI, HasVLX] in {
7597 let AddedComplexity = 15 in {
7598 def : Pat<(X86vzmovl (v2f64 (bitconvert
7599 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007600 (VCVTQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007601 def : Pat<(X86vzmovl (v2f64 (bitconvert
7602 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
Craig Topper5ef13ba2016-12-26 07:26:07 +00007603 (VCVTUQQ2PSZ128rr VR128X:$src)>;
Simon Pilgrim7c26a6f2016-11-24 14:02:30 +00007604 }
7605}
7606
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007607let Predicates = [HasDQI, NoVLX] in {
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007608def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7609 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7610 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7611 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7612
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007613def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7614 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7615 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7616 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7617
7618def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7619 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7620 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7621 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7622
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007623def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7624 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7625 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7626 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7627
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007628def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7629 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7630 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7631 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7632
7633def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7634 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7635 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7636 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7637
7638def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7639 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7640 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7641 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7642
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007643def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7644 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7645 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7646 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7647
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007648def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7649 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7650 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7651 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7652
7653def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7654 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7655 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7656 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7657
Simon Pilgrim841d7ca2016-11-24 14:46:55 +00007658def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7659 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7660 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7661 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7662
Simon Pilgrim4e9b9cb2016-11-23 14:01:18 +00007663def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7664 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7665 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7666 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7667}
7668
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007669//===----------------------------------------------------------------------===//
7670// Half precision conversion instructions
7671//===----------------------------------------------------------------------===//
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007672
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007673multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007674 X86MemOperand x86memop, PatFrag ld_frag,
7675 OpndItins itins> {
Craig Toppercf8e6d02017-11-07 07:13:03 +00007676 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7677 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007678 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7679 T8PD, Sched<[itins.Sched]>;
Craig Toppercf8e6d02017-11-07 07:13:03 +00007680 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7681 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7682 (X86cvtph2ps (_src.VT
7683 (bitconvert
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007684 (ld_frag addr:$src)))), itins.rm>,
7685 T8PD, Sched<[itins.Sched.Folded]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007686}
7687
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007688multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7689 OpndItins itins> {
Craig Topperc89e2822017-12-10 09:14:38 +00007690 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7691 (ins _src.RC:$src), "vcvtph2ps",
7692 "{sae}, $src", "$src, {sae}",
7693 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7694 (i32 FROUND_NO_EXC)), itins.rr>,
7695 T8PD, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh7c522452015-10-22 14:01:16 +00007696}
7697
Craig Toppere7fb3002017-11-07 07:13:07 +00007698let Predicates = [HasAVX512] in
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007699 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7700 SSE_CVT_PH2PS>,
7701 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
Asaf Badouh7c522452015-10-22 14:01:16 +00007702 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007703
7704let Predicates = [HasVLX] in {
7705 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007706 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7707 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007708 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007709 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7710 EVEX_CD8<32, CD8VH>;
Craig Toppere7fb3002017-11-07 07:13:07 +00007711
7712 // Pattern match vcvtph2ps of a scalar i64 load.
7713 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7714 (VCVTPH2PSZ128rm addr:$src)>;
7715 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7716 (VCVTPH2PSZ128rm addr:$src)>;
7717 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7718 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7719 (VCVTPH2PSZ128rm addr:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007720}
7721
Simon Pilgrim18bcf932016-02-03 09:41:59 +00007722multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007723 X86MemOperand x86memop, OpndItins itins> {
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007724 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007725 (ins _src.RC:$src1, i32u8imm:$src2),
7726 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007727 (X86cvtps2ph (_src.VT _src.RC:$src1),
Craig Topperd8688702016-09-21 03:58:44 +00007728 (i32 imm:$src2)),
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007729 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007730 let hasSideEffects = 0, mayStore = 1 in {
7731 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7732 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7733 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007734 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007735 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7736 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7737 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007738 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper65e6d0b2017-11-08 04:00:31 +00007739 }
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00007740}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007741
7742multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7743 OpndItins itins> {
Craig Topperd8688702016-09-21 03:58:44 +00007744 let hasSideEffects = 0 in
Craig Topper1de942b2017-12-10 17:42:44 +00007745 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
Craig Topperd8688702016-09-21 03:58:44 +00007746 (outs _dest.RC:$dst),
Igor Breger73ee8ba2016-05-31 08:04:21 +00007747 (ins _src.RC:$src1, i32u8imm:$src2),
7748 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007749 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007750}
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007751
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007752let Predicates = [HasAVX512] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007753 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7754 SSE_CVT_PS2PH>,
7755 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7756 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7757 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007758 let Predicates = [HasVLX] in {
Simon Pilgrim569e53b2017-12-03 21:43:54 +00007759 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7760 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7761 EVEX_CD8<32, CD8VH>;
7762 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7763 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7764 EVEX_CD8<32, CD8VH>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007765 }
Craig Topper65e6d0b2017-11-08 04:00:31 +00007766
7767 def : Pat<(store (f64 (extractelt
7768 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7769 (iPTR 0))), addr:$dst),
7770 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7771 def : Pat<(store (i64 (extractelt
7772 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7773 (iPTR 0))), addr:$dst),
7774 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7775 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7776 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7777 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7778 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
Asaf Badouhc7cb8802015-10-27 15:37:17 +00007779}
Asaf Badouh2489f352015-12-02 08:17:51 +00007780
Craig Topper9820e342016-09-20 05:44:47 +00007781// Patterns for matching conversions from float to half-float and vice versa.
Craig Topperb3b50332016-09-19 02:53:37 +00007782let Predicates = [HasVLX] in {
7783 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7784 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7785 // configurations we support (the default). However, falling back to MXCSR is
7786 // more consistent with other instructions, which are always controlled by it.
7787 // It's encoded as 0b100.
7788 def : Pat<(fp_to_f16 FR32X:$src),
7789 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7790 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7791
7792 def : Pat<(f16_to_fp GR16:$src),
7793 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7794 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7795
7796 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7797 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7798 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7799}
7800
Asaf Badouh2489f352015-12-02 08:17:51 +00007801// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
Craig Topper7e664da2016-09-24 21:42:43 +00007802multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007803 string OpcodeStr, OpndItins itins> {
Craig Topper07a7d562017-07-23 03:59:39 +00007804 let hasSideEffects = 0 in
Craig Topperc89e2822017-12-10 09:14:38 +00007805 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7806 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7807 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7808 Sched<[itins.Sched]>;
Asaf Badouh2489f352015-12-02 08:17:51 +00007809}
7810
7811let Defs = [EFLAGS], Predicates = [HasAVX512] in {
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007812 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007813 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007814 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007815 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007816 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007817 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007818 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
Asaf Badouh2489f352015-12-02 08:17:51 +00007819 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7820}
7821
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007822let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7823 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007824 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007825 EVEX_CD8<32, CD8VT1>;
7826 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007827 "ucomisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007828 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7829 let Pattern = []<dag> in {
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007830 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007831 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007832 EVEX_CD8<32, CD8VT1>;
Marina Yatsina7a4e1ba2015-08-20 11:21:36 +00007833 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
Simon Pilgrimfd3a2632017-12-05 13:49:44 +00007834 "comisd", SSE_COMIS>, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007835 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7836 }
Craig Topper9dd48c82014-01-02 17:28:14 +00007837 let isCodeGenOnly = 1 in {
Craig Topper00265772018-01-23 21:37:51 +00007838 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7839 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7840 EVEX_CD8<32, CD8VT1>;
7841 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7842 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7843 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007844
Craig Topper00265772018-01-23 21:37:51 +00007845 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7846 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7847 EVEX_CD8<32, CD8VT1>;
7848 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7849 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7850 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00007851 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007852}
Michael Liao5bf95782014-12-04 05:20:33 +00007853
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007854/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
Asaf Badouheaf2da12015-09-21 10:23:53 +00007855multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007856 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007857 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
Asaf Badouheaf2da12015-09-21 10:23:53 +00007858 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7859 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7860 "$src2, $src1", "$src1, $src2",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007861 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7862 EVEX_4V, Sched<[itins.Sched]>;
Asaf Badouheaf2da12015-09-21 10:23:53 +00007863 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper75d71542017-11-13 08:07:33 +00007864 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Asaf Badouheaf2da12015-09-21 10:23:53 +00007865 "$src2, $src1", "$src1, $src2",
7866 (OpNode (_.VT _.RC:$src1),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007867 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7868 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00007869}
7870}
7871
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007872defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007873 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007874defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007875 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007876defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007877 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007878defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00007879 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007880
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007881/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7882multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007883 OpndItins itins, X86VectorVTInfo _> {
Craig Topper176f3312017-02-25 19:18:11 +00007884 let ExeDomain = _.ExeDomain in {
Robert Khasanov3e534c92014-10-28 16:37:13 +00007885 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7886 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007887 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7888 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007889 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7890 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7891 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007892 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7893 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00007894 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7895 (ins _.ScalarMemOp:$src), OpcodeStr,
7896 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7897 (OpNode (_.FloatVT
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007898 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7899 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007900 }
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007901}
Robert Khasanov3e534c92014-10-28 16:37:13 +00007902
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007903multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7904 SizeItins itins> {
7905 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7906 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7907 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7908 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanov3e534c92014-10-28 16:37:13 +00007909
7910 // Define only if AVX512VL feature is present.
7911 let Predicates = [HasVLX] in {
7912 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007913 OpNode, itins.s, v4f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007914 EVEX_V128, EVEX_CD8<32, CD8VF>;
7915 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007916 OpNode, itins.s, v8f32x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007917 EVEX_V256, EVEX_CD8<32, CD8VF>;
7918 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007919 OpNode, itins.d, v2f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007920 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7921 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007922 OpNode, itins.d, v4f64x_info>,
Robert Khasanov3e534c92014-10-28 16:37:13 +00007923 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7924 }
7925}
7926
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007927defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7928defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007929
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007930/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007931multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007932 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007933 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007934 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7936 "$src2, $src1", "$src1, $src2",
7937 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007938 (i32 FROUND_CURRENT)), itins.rr>,
7939 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007940
7941 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7942 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
Elena Demikhovsky0d7e9362015-05-11 06:05:05 +00007943 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007944 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007945 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
Craig Toppera2f55282017-12-10 03:16:36 +00007946 Sched<[itins.Sched]>;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007947
7948 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topper512e9e72017-11-19 05:42:54 +00007949 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007950 "$src2, $src1", "$src1, $src2",
Craig Topper512e9e72017-11-19 05:42:54 +00007951 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007952 (i32 FROUND_CURRENT)), itins.rm>,
7953 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007954 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007955}
7956
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007957multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7958 SizeItins itins> {
7959 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007960 EVEX_CD8<32, CD8VT1>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007961 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007962 EVEX_CD8<64, CD8VT1>, VEX_W;
7963}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007964
Craig Toppere1cac152016-06-07 07:27:54 +00007965let Predicates = [HasERI] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007966 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7967 T8PD, EVEX_4V;
7968 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7969 T8PD, EVEX_4V;
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007970}
Igor Breger8352a0d2015-07-28 06:53:28 +00007971
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007972defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7973 T8PD, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00007974/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007975
7976multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007977 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00007978 let ExeDomain = _.ExeDomain in {
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007979 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7980 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007981 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7982 itins.rr>, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007983
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007984 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7985 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7986 (OpNode (_.FloatVT
Elena Demikhovsky905a5a62014-11-26 10:46:49 +00007987 (bitconvert (_.LdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007988 (i32 FROUND_CURRENT)), itins.rm>,
7989 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007990
7991 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Breger4511e762016-02-22 11:48:27 +00007992 (ins _.ScalarMemOp:$src), OpcodeStr,
Asaf Badouh402ebb32015-06-03 13:41:48 +00007993 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00007994 (OpNode (_.FloatVT
7995 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00007996 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7997 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00007998 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007999}
Asaf Badouh402ebb32015-06-03 13:41:48 +00008000multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008001 SDNode OpNode, OpndItins itins> {
Craig Topper176f3312017-02-25 19:18:11 +00008002 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00008003 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8004 (ins _.RC:$src), OpcodeStr,
8005 "{sae}, $src", "$src, {sae}",
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008006 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
8007 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
Asaf Badouh402ebb32015-06-03 13:41:48 +00008008}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00008009
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008010multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8011 SizeItins itins> {
8012 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
8013 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008014 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008015 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
8016 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008017 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00008018}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00008019
Asaf Badouh402ebb32015-06-03 13:41:48 +00008020multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008021 SDNode OpNode, SizeItins itins> {
Asaf Badouh402ebb32015-06-03 13:41:48 +00008022 // Define only if AVX512VL feature is present.
8023 let Predicates = [HasVLX] in {
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008024 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008025 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008026 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008027 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008028 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008029 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008030 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008031 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8032 }
8033}
Craig Toppere1cac152016-06-07 07:27:54 +00008034let Predicates = [HasERI] in {
Michael Liao5bf95782014-12-04 05:20:33 +00008035
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008036 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
8037 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
8038 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00008039}
Simon Pilgrim3e5987c2017-11-30 10:48:47 +00008040defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
8041 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8042 SSE_ALU_ITINS_P>, EVEX;
Asaf Badouh402ebb32015-06-03 13:41:48 +00008043
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008044multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00008045 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00008046 let ExeDomain = _.ExeDomain in
Asaf Badouh402ebb32015-06-03 13:41:48 +00008047 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8048 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008049 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
8050 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Elena Demikhovskybe8808d2014-11-12 07:31:03 +00008051}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00008052
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008053multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
Craig Topper80405072017-11-11 08:24:12 +00008054 X86VectorVTInfo _>{
Craig Topper176f3312017-02-25 19:18:11 +00008055 let ExeDomain = _.ExeDomain in {
Robert Khasanov1cf354c2014-10-28 18:22:41 +00008056 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Robert Khasanoveb126392014-10-28 18:15:20 +00008057 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008058 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
8059 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008060 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8061 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
Craig Topper80405072017-11-11 08:24:12 +00008062 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008063 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
8064 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00008065 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8066 (ins _.ScalarMemOp:$src), OpcodeStr,
8067 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
Craig Topper80405072017-11-11 08:24:12 +00008068 (fsqrt (_.FloatVT
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008069 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
8070 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper176f3312017-02-25 19:18:11 +00008071 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008072}
8073
Craig Topper80405072017-11-11 08:24:12 +00008074multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008075 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008076 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008077 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008078 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8079 // Define only if AVX512VL feature is present.
8080 let Predicates = [HasVLX] in {
8081 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008082 SSE_SQRTPS, v4f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008083 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8084 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008085 SSE_SQRTPS, v8f32x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008086 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8087 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008088 SSE_SQRTPD, v2f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008089 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8090 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008091 SSE_SQRTPD, v4f64x_info>,
Robert Khasanoveb126392014-10-28 18:15:20 +00008092 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8093 }
8094}
8095
Craig Topper80405072017-11-11 08:24:12 +00008096multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008097 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008098 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008099 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
Asaf Badouh402ebb32015-06-03 13:41:48 +00008100 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8101}
8102
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008103multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
8104 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
Craig Topper176f3312017-02-25 19:18:11 +00008105 let ExeDomain = _.ExeDomain in {
Clement Courbet41a13742018-01-15 12:05:33 +00008106 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008107 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8108 "$src2, $src1", "$src1, $src2",
Craig Topper80405072017-11-11 08:24:12 +00008109 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008110 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008111 (i32 FROUND_CURRENT)), itins.rr>,
8112 Sched<[itins.Sched]>;
Clement Courbet41a13742018-01-15 12:05:33 +00008113 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8114 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8115 "$src2, $src1", "$src1, $src2",
8116 (X86fsqrtRnds (_.VT _.RC:$src1),
8117 _.ScalarIntMemCPat:$src2,
8118 (i32 FROUND_CURRENT)), itins.rm>,
8119 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8120 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger4c4cd782015-09-20 09:13:41 +00008121 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8122 "$rc, $src2, $src1", "$src1, $src2, $rc",
Craig Topper80405072017-11-11 08:24:12 +00008123 (X86fsqrtRnds (_.VT _.RC:$src1),
Igor Breger4c4cd782015-09-20 09:13:41 +00008124 (_.VT _.RC:$src2),
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008125 (i32 imm:$rc)), itins.rr>,
Craig Toppera2f55282017-12-10 03:16:36 +00008126 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008127
Clement Courbet41a13742018-01-15 12:05:33 +00008128 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8129 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8130 (ins _.FRC:$src1, _.FRC:$src2),
8131 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8132 itins.rr>, Sched<[itins.Sched]>;
8133 let mayLoad = 1 in
8134 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8135 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8136 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
8137 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
8138 }
Craig Topper176f3312017-02-25 19:18:11 +00008139 }
Igor Breger4c4cd782015-09-20 09:13:41 +00008140
Clement Courbet41a13742018-01-15 12:05:33 +00008141 let Predicates = [HasAVX512] in {
8142 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8143 (!cast<Instruction>(NAME#SUFF#Zr)
8144 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
Igor Breger4c4cd782015-09-20 09:13:41 +00008145
Clement Courbet41a13742018-01-15 12:05:33 +00008146 def : Pat<(Intr VR128X:$src),
8147 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
Craig Toppereff606c2017-11-06 04:04:01 +00008148 VR128X:$src)>;
Clement Courbet41a13742018-01-15 12:05:33 +00008149 }
Craig Toppereff606c2017-11-06 04:04:01 +00008150
Clement Courbet41a13742018-01-15 12:05:33 +00008151 let Predicates = [HasAVX512, OptForSize] in {
8152 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8153 (!cast<Instruction>(NAME#SUFF#Zm)
8154 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
Craig Toppereff606c2017-11-06 04:04:01 +00008155
Clement Courbet41a13742018-01-15 12:05:33 +00008156 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
8157 (!cast<Instruction>(NAME#SUFF#Zm_Int)
8158 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
8159 }
Craig Topperd6471cb2017-11-05 21:14:06 +00008160}
Igor Breger4c4cd782015-09-20 09:13:41 +00008161
8162multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008163 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
Craig Topper80405072017-11-11 08:24:12 +00008164 int_x86_sse_sqrt_ss>,
Craig Toppereff606c2017-11-06 04:04:01 +00008165 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
Simon Pilgrim647dd6a2017-11-27 16:43:18 +00008166 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
Craig Topper80405072017-11-11 08:24:12 +00008167 int_x86_sse2_sqrt_sd>,
Craig Toppereff606c2017-11-06 04:04:01 +00008168 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
Ayman Musa5fc6dc52017-10-08 08:32:56 +00008169 NotMemoryFoldable;
Igor Breger4c4cd782015-09-20 09:13:41 +00008170}
8171
Craig Topper80405072017-11-11 08:24:12 +00008172defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
8173 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008174
Igor Breger4c4cd782015-09-20 09:13:41 +00008175defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008176
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008177multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8178 OpndItins itins, X86VectorVTInfo _> {
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008179 let ExeDomain = _.ExeDomain in {
Craig Topper0ccec702017-11-11 08:24:15 +00008180 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008181 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8182 "$src3, $src2, $src1", "$src1, $src2, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008183 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008184 (i32 imm:$src3))), itins.rr>,
8185 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008186
Craig Topper0ccec702017-11-11 08:24:15 +00008187 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008188 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00008189 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
Craig Topper0af48f12017-11-13 02:02:58 +00008190 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008191 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
8192 Sched<[itins.Sched]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008193
Craig Topper0ccec702017-11-11 08:24:15 +00008194 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Craig Topperbece74c2017-11-19 06:24:26 +00008195 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008196 OpcodeStr,
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008197 "$src3, $src2, $src1", "$src1, $src2, $src3",
Craig Topperdeee24b2017-11-13 02:03:01 +00008198 (_.VT (X86RndScales _.RC:$src1,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008199 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
8200 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008201
Clement Courbetda1fad32018-01-15 14:24:07 +00008202 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
Craig Topper0ccec702017-11-11 08:24:15 +00008203 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8204 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8205 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008206 [], itins.rr>, Sched<[itins.Sched]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008207
8208 let mayLoad = 1 in
8209 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8210 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8211 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008212 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper0ccec702017-11-11 08:24:15 +00008213 }
8214 }
8215
8216 let Predicates = [HasAVX512] in {
8217 def : Pat<(ffloor _.FRC:$src),
8218 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8219 _.FRC:$src, (i32 0x9)))>;
8220 def : Pat<(fceil _.FRC:$src),
8221 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8222 _.FRC:$src, (i32 0xa)))>;
8223 def : Pat<(ftrunc _.FRC:$src),
8224 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8225 _.FRC:$src, (i32 0xb)))>;
8226 def : Pat<(frint _.FRC:$src),
8227 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8228 _.FRC:$src, (i32 0x4)))>;
8229 def : Pat<(fnearbyint _.FRC:$src),
8230 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8231 _.FRC:$src, (i32 0xc)))>;
8232 }
8233
8234 let Predicates = [HasAVX512, OptForSize] in {
8235 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8236 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8237 addr:$src, (i32 0x9)))>;
8238 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8239 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8240 addr:$src, (i32 0xa)))>;
8241 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8242 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8243 addr:$src, (i32 0xb)))>;
8244 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8245 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8246 addr:$src, (i32 0x4)))>;
8247 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8248 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8249 addr:$src, (i32 0xc)))>;
Elena Demikhovsky52e81bc2015-02-23 15:12:31 +00008250 }
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00008251}
8252
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008253defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8254 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008255
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00008256defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8257 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8258 EVEX_CD8<64, CD8VT1>;
Eric Christopher0d94fa92015-02-20 00:45:28 +00008259
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008260//-------------------------------------------------
8261// Integer truncate and extend operations
8262//-------------------------------------------------
8263
Simon Pilgrim833c2602017-12-05 19:21:28 +00008264let Sched = WriteShuffle256 in
8265def AVX512_EXTEND : OpndItins<
8266 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8267>;
8268
8269let Sched = WriteShuffle256 in
8270def AVX512_TRUNCATE : OpndItins<
8271 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8272>;
8273
Igor Breger074a64e2015-07-24 17:24:15 +00008274multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008275 OpndItins itins, X86VectorVTInfo SrcInfo,
8276 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
Craig Topper52e2e832016-07-22 05:46:44 +00008277 let ExeDomain = DestInfo.ExeDomain in
Igor Breger074a64e2015-07-24 17:24:15 +00008278 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8279 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008280 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8281 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
Igor Breger074a64e2015-07-24 17:24:15 +00008282
Craig Topper52e2e832016-07-22 05:46:44 +00008283 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8284 ExeDomain = DestInfo.ExeDomain in {
Igor Breger074a64e2015-07-24 17:24:15 +00008285 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8286 (ins x86memop:$dst, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008287 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008288 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008289
Igor Breger074a64e2015-07-24 17:24:15 +00008290 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8291 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00008292 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008293 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
Craig Topper99f6b622016-05-01 01:03:56 +00008294 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008295}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008296
Igor Breger074a64e2015-07-24 17:24:15 +00008297multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8298 X86VectorVTInfo DestInfo,
8299 PatFrag truncFrag, PatFrag mtruncFrag > {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008300
Igor Breger074a64e2015-07-24 17:24:15 +00008301 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8302 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8303 addr:$dst, SrcInfo.RC:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008304
Igor Breger074a64e2015-07-24 17:24:15 +00008305 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8306 (SrcInfo.VT SrcInfo.RC:$src)),
8307 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8308 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8309}
8310
Craig Topperb2868232018-01-14 08:11:36 +00008311multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
8312 SDNode OpNode256, SDNode OpNode512, OpndItins itins,
8313 AVX512VLVectorVTInfo VTSrcInfo,
8314 X86VectorVTInfo DestInfoZ128,
8315 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8316 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8317 X86MemOperand x86memopZ, PatFrag truncFrag,
8318 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
Igor Breger074a64e2015-07-24 17:24:15 +00008319
8320 let Predicates = [HasVLX, prd] in {
Craig Topperb2868232018-01-14 08:11:36 +00008321 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008322 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
Igor Breger074a64e2015-07-24 17:24:15 +00008323 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8324 truncFrag, mtruncFrag>, EVEX_V128;
8325
Craig Topperb2868232018-01-14 08:11:36 +00008326 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008327 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
Igor Breger074a64e2015-07-24 17:24:15 +00008328 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8329 truncFrag, mtruncFrag>, EVEX_V256;
8330 }
8331 let Predicates = [prd] in
Craig Topperb2868232018-01-14 08:11:36 +00008332 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, itins,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008333 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
Igor Breger074a64e2015-07-24 17:24:15 +00008334 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8335 truncFrag, mtruncFrag>, EVEX_V512;
8336}
8337
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008338multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008339 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008340 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8341 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, itins,
8342 avx512vl_i64_info, v16i8x_info, v16i8x_info,
8343 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
8344 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
Igor Breger074a64e2015-07-24 17:24:15 +00008345}
8346
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008347multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008348 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008349 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8350 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8351 avx512vl_i64_info, v8i16x_info, v8i16x_info,
8352 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
8353 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008354}
8355
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008356multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008357 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008358 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8359 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8360 avx512vl_i64_info, v4i32x_info, v4i32x_info,
8361 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
8362 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008363}
8364
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008365multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008366 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008367 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8368 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins,
8369 avx512vl_i32_info, v16i8x_info, v16i8x_info,
8370 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
8371 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
Igor Breger074a64e2015-07-24 17:24:15 +00008372}
8373
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008374multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008375 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008376 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8377 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins,
8378 avx512vl_i32_info, v8i16x_info, v8i16x_info,
8379 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
8380 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008381}
8382
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008383multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008384 OpndItins itins, PatFrag StoreNode,
Craig Topperb2868232018-01-14 08:11:36 +00008385 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8386 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
8387 itins, avx512vl_i16_info, v16i8x_info, v16i8x_info,
8388 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
8389 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
Igor Breger074a64e2015-07-24 17:24:15 +00008390}
8391
Craig Topperb2868232018-01-14 08:11:36 +00008392defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE,
8393 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008394defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008395 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008396defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008397 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008398
Craig Topperb2868232018-01-14 08:11:36 +00008399defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE,
8400 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008401defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008402 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008403defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008404 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008405
Craig Topperb2868232018-01-14 08:11:36 +00008406defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE,
8407 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008408defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008409 truncstore_s_vi32, masked_truncstore_s_vi32>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008410defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008411 truncstore_us_vi32, masked_truncstore_us_vi32>;
Igor Breger074a64e2015-07-24 17:24:15 +00008412
Craig Topperb2868232018-01-14 08:11:36 +00008413defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE,
8414 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008415defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008416 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008417defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008418 truncstore_us_vi8, masked_truncstore_us_vi8>;
Igor Breger074a64e2015-07-24 17:24:15 +00008419
Craig Topperb2868232018-01-14 08:11:36 +00008420defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE,
8421 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008422defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008423 truncstore_s_vi16, masked_truncstore_s_vi16>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008424defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008425 truncstore_us_vi16, masked_truncstore_us_vi16>;
Igor Breger074a64e2015-07-24 17:24:15 +00008426
Craig Topperb2868232018-01-14 08:11:36 +00008427defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE,
8428 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008429defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008430 truncstore_s_vi8, masked_truncstore_s_vi8>;
Simon Pilgrim833c2602017-12-05 19:21:28 +00008431defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
Elena Demikhovsky7c7bf1b2016-12-21 10:43:36 +00008432 truncstore_us_vi8, masked_truncstore_us_vi8>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008433
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008434let Predicates = [HasAVX512, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008435def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008436 (v8i16 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008437 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008438 VR256X:$src, sub_ymm)))), sub_xmm))>;
Craig Topperb2868232018-01-14 08:11:36 +00008439def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008440 (v4i32 (EXTRACT_SUBREG
Craig Topper61403202016-09-19 02:53:43 +00008441 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008442 VR256X:$src, sub_ymm)))), sub_xmm))>;
8443}
8444
8445let Predicates = [HasBWI, NoVLX] in {
Craig Topperb2868232018-01-14 08:11:36 +00008446def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
Craig Topper61403202016-09-19 02:53:43 +00008447 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
Elena Demikhovskydb738d92015-11-01 11:45:47 +00008448 VR256X:$src, sub_ymm))), sub_xmm))>;
8449}
8450
Simon Pilgrim833c2602017-12-05 19:21:28 +00008451multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
Igor Breger2ba64ab2016-05-22 10:21:04 +00008452 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
Craig Topper6840f112016-07-14 06:41:34 +00008453 X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
Craig Topper52e2e832016-07-22 05:46:44 +00008454 let ExeDomain = DestInfo.ExeDomain in {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008455 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8456 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008457 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8458 EVEX, Sched<[itins.Sched]>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008459
Craig Toppere1cac152016-06-07 07:27:54 +00008460 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8461 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
Simon Pilgrim833c2602017-12-05 19:21:28 +00008462 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8463 EVEX, Sched<[itins.Sched.Folded]>;
Craig Topper52e2e832016-07-22 05:46:44 +00008464 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008465}
8466
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008467multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008468 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8469 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008470 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008471 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008472 v16i8x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008473 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00008474
Simon Pilgrim833c2602017-12-05 19:21:28 +00008475 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008476 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008477 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008478 }
8479 let Predicates = [HasBWI] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008480 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
Craig Topper6840f112016-07-14 06:41:34 +00008481 v32i8x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008482 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008483 }
8484}
8485
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008486multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008487 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8488 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008489 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008490 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008491 v16i8x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008492 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008493
Simon Pilgrim833c2602017-12-05 19:21:28 +00008494 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008495 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008496 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008497 }
8498 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008499 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008500 v16i8x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008501 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008502 }
8503}
8504
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008505multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008506 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8507 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008508 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008509 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008510 v16i8x_info, i16mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008511 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008512
Simon Pilgrim833c2602017-12-05 19:21:28 +00008513 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008514 v16i8x_info, i32mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008515 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008516 }
8517 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008518 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008519 v16i8x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008520 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008521 }
8522}
8523
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008524multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008525 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8526 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008527 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008528 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008529 v8i16x_info, i64mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008530 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008531
Simon Pilgrim833c2602017-12-05 19:21:28 +00008532 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008533 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008534 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008535 }
8536 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008537 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
Craig Topper6840f112016-07-14 06:41:34 +00008538 v16i16x_info, i256mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008539 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008540 }
8541}
8542
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008543multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008544 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8545 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008546 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008547 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008548 v8i16x_info, i32mem, LdFrag, InVecNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008549 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008550
Simon Pilgrim833c2602017-12-05 19:21:28 +00008551 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008552 v8i16x_info, i64mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008553 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008554 }
8555 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008556 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008557 v8i16x_info, i128mem, LdFrag, OpNode>,
Craig Toppera33846a2017-10-22 06:18:23 +00008558 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008559 }
8560}
8561
Simon Pilgrimb13961d2016-06-11 14:34:10 +00008562multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
Simon Pilgrim833c2602017-12-05 19:21:28 +00008563 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8564 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008565
8566 let Predicates = [HasVLX, HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008567 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008568 v4i32x_info, i64mem, LdFrag, InVecNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008569 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8570
Simon Pilgrim833c2602017-12-05 19:21:28 +00008571 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
Craig Topper6840f112016-07-14 06:41:34 +00008572 v4i32x_info, i128mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008573 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8574 }
8575 let Predicates = [HasAVX512] in {
Simon Pilgrim833c2602017-12-05 19:21:28 +00008576 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
Craig Topper6840f112016-07-14 06:41:34 +00008577 v8i32x_info, i256mem, LdFrag, OpNode>,
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008578 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8579 }
8580}
8581
Simon Pilgrim833c2602017-12-05 19:21:28 +00008582defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8583defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8584defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8585defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8586defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8587defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008588
Simon Pilgrim833c2602017-12-05 19:21:28 +00008589defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8590defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8591defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8592defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8593defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8594defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
Elena Demikhovsky3948c592015-05-27 08:15:19 +00008595
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008596
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008597multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8598 SDNode InVecOp, PatFrag ExtLoad16> {
Craig Topper64378f42016-10-09 23:08:39 +00008599 // 128-bit patterns
8600 let Predicates = [HasVLX, HasBWI] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008601 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008602 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008603 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008604 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008605 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008606 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008607 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008608 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008609 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008610 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8611 }
8612 let Predicates = [HasVLX] in {
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008613 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008614 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008615 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008616 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008617 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008618 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008619 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008620 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8621
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008622 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008623 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008624 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008625 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008626 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008627 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008628 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008629 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8630
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008631 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008632 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008633 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008634 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008635 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008636 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008637 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008638 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008639 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008640 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8641
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008642 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008643 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008644 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008645 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008646 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008647 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008648 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008649 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8650
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008651 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008652 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008653 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
Craig Topper64378f42016-10-09 23:08:39 +00008654 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008655 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008656 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008657 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008658 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008659 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
Craig Topper64378f42016-10-09 23:08:39 +00008660 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8661 }
8662 // 256-bit patterns
8663 let Predicates = [HasVLX, HasBWI] in {
8664 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8665 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8666 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8667 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8668 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8669 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8670 }
8671 let Predicates = [HasVLX] in {
8672 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8673 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8674 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8675 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8676 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8677 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8678 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8679 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8680
8681 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8682 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8683 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8684 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8685 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8686 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8687 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8688 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8689
8690 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8691 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8692 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8693 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8694 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8695 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8696
8697 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8698 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8699 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8700 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8701 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8702 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8703 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8704 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8705
8706 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8707 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8708 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8709 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8710 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8711 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8712 }
8713 // 512-bit patterns
8714 let Predicates = [HasBWI] in {
8715 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8716 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8717 }
8718 let Predicates = [HasAVX512] in {
8719 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8720 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8721
8722 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8723 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper9ece2f72016-10-10 06:25:48 +00008724 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8725 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
Craig Topper64378f42016-10-09 23:08:39 +00008726
8727 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8728 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8729
8730 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8731 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8732
8733 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8734 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8735 }
8736}
8737
Simon Pilgrim9f5c2512017-03-05 09:57:20 +00008738defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8739defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
Craig Topper64378f42016-10-09 23:08:39 +00008740
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008741//===----------------------------------------------------------------------===//
8742// GATHER - SCATTER Operations
8743
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008744// FIXME: Improve scheduling of gather/scatter instructions.
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008745multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper16a91ce2017-11-15 07:46:43 +00008746 X86MemOperand memop, PatFrag GatherNode,
8747 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008748 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8749 ExeDomain = _.ExeDomain in
Craig Topper16a91ce2017-11-15 07:46:43 +00008750 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8751 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008752 !strconcat(OpcodeStr#_.Suffix,
Craig Topperedb09112014-11-25 20:11:23 +00008753 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Craig Topper16a91ce2017-11-15 07:46:43 +00008754 [(set _.RC:$dst, MaskRC:$mask_wb,
8755 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008756 vectoraddr:$src2))]>, EVEX, EVEX_K,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008757 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008758}
Cameron McInally45325962014-03-26 13:50:50 +00008759
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008760multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8761 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8762 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008763 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008764 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008765 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008766let Predicates = [HasVLX] in {
8767 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008768 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008769 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008770 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008771 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008772 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008773 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008774 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008775}
Cameron McInally45325962014-03-26 13:50:50 +00008776}
8777
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008778multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8779 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008780 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008781 mgatherv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008782 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008783 mgatherv8i64>, EVEX_V512;
8784let Predicates = [HasVLX] in {
8785 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008786 vy256xmem, mgatherv8i32>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008787 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008788 vy128xmem, mgatherv4i64>, EVEX_V256;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008789 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008790 vx128xmem, mgatherv4i32>, EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008791 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
Craig Topperc1e7b3f2017-11-22 07:11:03 +00008792 vx64xmem, mgatherv2i64, VK2WM>,
Craig Topper16a91ce2017-11-15 07:46:43 +00008793 EVEX_V128;
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008794}
Cameron McInally45325962014-03-26 13:50:50 +00008795}
Michael Liao5bf95782014-12-04 05:20:33 +00008796
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008797
Elena Demikhovsky6a1a3572015-06-28 10:53:29 +00008798defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8799 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8800
8801defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8802 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008803
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008804multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
Craig Topper0b590342018-01-11 06:31:28 +00008805 X86MemOperand memop, PatFrag ScatterNode,
8806 RegisterClass MaskRC = _.KRCWM> {
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008807
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008808let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008809
Craig Topper0b590342018-01-11 06:31:28 +00008810 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
8811 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008812 !strconcat(OpcodeStr#_.Suffix,
Elena Demikhovskye1eda8a2015-04-30 08:38:48 +00008813 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Craig Topper0b590342018-01-11 06:31:28 +00008814 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8815 MaskRC:$mask, vectoraddr:$dst))]>,
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008816 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8817 Sched<[WriteStore]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008818}
8819
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008820multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8821 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8822 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008823 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008824 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
Igor Breger45ef10f2016-02-25 13:30:17 +00008825 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008826let Predicates = [HasVLX] in {
8827 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008828 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008829 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008830 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008831 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008832 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008833 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008834 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008835}
Cameron McInally45325962014-03-26 13:50:50 +00008836}
8837
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008838multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8839 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
Igor Breger45ef10f2016-02-25 13:30:17 +00008840 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008841 mscatterv16i32>, EVEX_V512;
Craig Topper7dfd5832017-01-16 00:55:58 +00008842 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008843 mscatterv8i64>, EVEX_V512;
8844let Predicates = [HasVLX] in {
8845 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
Igor Breger45ef10f2016-02-25 13:30:17 +00008846 vy256xmem, mscatterv8i32>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008847 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008848 vy128xmem, mscatterv4i64>, EVEX_V256;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008849 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
Igor Breger45ef10f2016-02-25 13:30:17 +00008850 vx128xmem, mscatterv4i32>, EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008851 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
Craig Topper0b590342018-01-11 06:31:28 +00008852 vx64xmem, mscatterv2i64, VK2WM>,
8853 EVEX_V128;
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008854}
Cameron McInally45325962014-03-26 13:50:50 +00008855}
8856
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008857defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8858 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008859
Elena Demikhovsky30bc4ca2015-06-29 12:14:24 +00008860defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8861 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008862
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008863// prefetch
8864multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8865 RegisterClass KRC, X86MemOperand memop> {
8866 let Predicates = [HasPFI], hasSideEffects = 1 in
8867 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008868 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
Simon Pilgrimb69dae42017-12-05 20:47:11 +00008869 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008870}
8871
8872defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008873 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008874
8875defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008876 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008877
8878defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008879 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008880
8881defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008882 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Michael Liao5bf95782014-12-04 05:20:33 +00008883
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008884defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008885 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008886
8887defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008888 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008889
8890defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008891 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008892
8893defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008894 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008895
8896defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008897 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008898
8899defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008900 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008901
8902defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008903 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008904
8905defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008906 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008907
8908defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
Igor Breger45ef10f2016-02-25 13:30:17 +00008909 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008910
8911defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
Craig Topper7dfd5832017-01-16 00:55:58 +00008912 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008913
8914defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008915 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00008916
8917defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
Igor Breger45ef10f2016-02-25 13:30:17 +00008918 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00008919
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008920multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008921def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
Craig Topperedb09112014-11-25 20:11:23 +00008922 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
Craig Topper0321ebc2018-01-24 04:51:17 +00008923 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008924 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008925}
Michael Liao5bf95782014-12-04 05:20:33 +00008926
Elena Demikhovsky44bf0632014-10-05 14:11:08 +00008927multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8928 string OpcodeStr, Predicate prd> {
8929let Predicates = [prd] in
8930 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8931
8932 let Predicates = [prd, HasVLX] in {
8933 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8934 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8935 }
8936}
8937
Michael Zuckerman85436ec2017-03-23 09:57:01 +00008938defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8939defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8940defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8941defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008942
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008943multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
Igor Bregerfca0a342016-01-28 13:19:25 +00008944 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8945 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
Craig Topperf090e8a2018-01-08 06:53:54 +00008946 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))],
Simon Pilgrimbfe969c2017-12-06 11:59:05 +00008947 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
Igor Bregerfca0a342016-01-28 13:19:25 +00008948}
8949
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008950// Use 512bit version to implement 128/256 bit in case NoVLX.
8951multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
Igor Bregerfca0a342016-01-28 13:19:25 +00008952 X86VectorVTInfo _> {
8953
Craig Topperf090e8a2018-01-08 06:53:54 +00008954 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
Igor Bregerfca0a342016-01-28 13:19:25 +00008955 (_.KVT (COPY_TO_REGCLASS
8956 (!cast<Instruction>(NAME#"Zrr")
Simon Pilgrim18bcf932016-02-03 09:41:59 +00008957 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
Igor Bregerfca0a342016-01-28 13:19:25 +00008958 _.RC:$src, _.SubRegIdx)),
8959 _.KRC))>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008960}
8961
8962multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
Igor Bregerfca0a342016-01-28 13:19:25 +00008963 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8964 let Predicates = [prd] in
8965 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8966 EVEX_V512;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008967
8968 let Predicates = [prd, HasVLX] in {
8969 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008970 EVEX_V256;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008971 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
Igor Bregerfca0a342016-01-28 13:19:25 +00008972 EVEX_V128;
8973 }
8974 let Predicates = [prd, NoVLX] in {
8975 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8976 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
Elena Demikhovsky0e6d6d52015-04-21 14:38:31 +00008977 }
8978}
8979
8980defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8981 avx512vl_i8_info, HasBWI>;
8982defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8983 avx512vl_i16_info, HasBWI>, VEX_W;
8984defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8985 avx512vl_i32_info, HasDQI>;
8986defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8987 avx512vl_i64_info, HasDQI>, VEX_W;
8988
Craig Topper0321ebc2018-01-24 04:51:17 +00008989// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
8990// is available, but BWI is not. We can't handle this in lowering because
8991// a target independent DAG combine likes to combine sext and trunc.
8992let Predicates = [HasDQI, NoBWI] in {
8993 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
8994 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8995 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
8996 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
8997}
8998
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00008999//===----------------------------------------------------------------------===//
9000// AVX-512 - COMPRESS and EXPAND
9001//
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009002
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009003// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
9004let Sched = WriteShuffle256 in {
9005def AVX512_COMPRESS : OpndItins<
9006 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
9007>;
9008def AVX512_EXPAND : OpndItins<
9009 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
9010>;
9011}
9012
Ayman Musad7a5ed42016-09-26 06:22:08 +00009013multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009014 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009015 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00009016 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009017 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
9018 Sched<[itins.Sched]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009019
Craig Toppere1cac152016-06-07 07:27:54 +00009020 let mayStore = 1, hasSideEffects = 0 in
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009021 def mr : AVX5128I<opc, MRMDestMem, (outs),
9022 (ins _.MemOp:$dst, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00009023 OpcodeStr # "\t{$src, $dst|$dst, $src}",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009024 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9025 Sched<[itins.Sched.Folded]>;
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009026
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009027 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9028 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
Craig Topper9feea572016-01-11 00:44:58 +00009029 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
Ayman Musad7a5ed42016-09-26 06:22:08 +00009030 []>,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009031 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9032 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009033}
9034
Ayman Musad7a5ed42016-09-26 06:22:08 +00009035multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
Ayman Musad7a5ed42016-09-26 06:22:08 +00009036 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
9037 (_.VT _.RC:$src)),
9038 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
9039 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9040}
9041
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009042multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009043 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009044 AVX512VLVectorVTInfo VTInfo,
9045 Predicate Pred = HasAVX512> {
9046 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009047 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009048 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009049
Coby Tayree71e37cc2017-11-21 09:48:44 +00009050 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009051 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009052 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009053 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
Ayman Musad7a5ed42016-09-26 06:22:08 +00009054 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009055 }
9056}
9057
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009058defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
9059 avx512vl_i32_info>, EVEX;
9060defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
9061 avx512vl_i64_info>, EVEX, VEX_W;
9062defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
9063 avx512vl_f32_info>, EVEX;
9064defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
9065 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky908dbf42014-12-11 15:02:24 +00009066
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009067// expand
9068multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009069 string OpcodeStr, OpndItins itins> {
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009070 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Michael Liao66233b72015-08-06 09:06:20 +00009071 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009072 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
9073 Sched<[itins.Sched]>;
Elena Demikhovsky75ede682015-06-01 07:17:23 +00009074
Elena Demikhovskyba5ab322015-06-22 11:16:30 +00009075 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9076 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9077 (_.VT (X86expand (_.VT (bitconvert
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009078 (_.LdFrag addr:$src1))))), itins.rm>,
9079 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9080 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009081}
9082
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009083multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
9084
9085 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9086 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
9087 _.KRCWM:$mask, addr:$src)>;
9088
9089 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9090 (_.VT _.RC:$src0))),
9091 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
9092 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9093}
9094
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009095multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009096 OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009097 AVX512VLVectorVTInfo VTInfo,
9098 Predicate Pred = HasAVX512> {
9099 let Predicates = [Pred] in
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009100 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009101 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009102
Coby Tayree71e37cc2017-11-21 09:48:44 +00009103 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009104 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009105 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009106 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
Elena Demikhovsky5b10aa12016-10-09 10:48:52 +00009107 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
Elena Demikhovsky72860c32014-12-15 10:03:52 +00009108 }
9109}
9110
Simon Pilgrim904d1a82017-12-01 16:20:03 +00009111defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
9112 avx512vl_i32_info>, EVEX;
9113defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
9114 avx512vl_i64_info>, EVEX, VEX_W;
9115defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
9116 avx512vl_f32_info>, EVEX;
9117defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
9118 avx512vl_f64_info>, EVEX, VEX_W;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009119
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009120//handle instruction reg_vec1 = op(reg_vec,imm)
9121// op(mem_vec,imm)
9122// op(broadcast(eltVt),imm)
9123//all instruction created with FROUND_CURRENT
9124multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009125 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009126 let ExeDomain = _.ExeDomain in {
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009127 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9128 (ins _.RC:$src1, i32u8imm:$src2),
Igor Breger252c2d92016-02-22 12:37:41 +00009129 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009130 (OpNode (_.VT _.RC:$src1),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009131 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009132 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9133 (ins _.MemOp:$src1, i32u8imm:$src2),
9134 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9135 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009136 (i32 imm:$src2)), itins.rm>,
9137 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009138 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9139 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9140 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9141 "${src1}"##_.BroadcastStr##", $src2",
9142 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009143 (i32 imm:$src2)), itins.rm>, EVEX_B,
9144 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009145 }
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009146}
9147
9148//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9149multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009150 SDNode OpNode, OpndItins itins,
9151 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009152 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009153 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9154 (ins _.RC:$src1, i32u8imm:$src2),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009155 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009156 "$src1, {sae}, $src2",
9157 (OpNode (_.VT _.RC:$src1),
9158 (i32 imm:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009159 (i32 FROUND_NO_EXC)), itins.rr>,
9160 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009161}
9162
9163multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009164 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009165 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009166 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009167 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9168 _.info512>,
9169 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9170 itins, _.info512>, EVEX_V512;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009171 }
9172 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009173 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9174 _.info128>, EVEX_V128;
9175 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
9176 _.info256>, EVEX_V256;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009177 }
9178}
9179
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009180//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9181// op(reg_vec2,mem_vec,imm)
9182// op(reg_vec2,broadcast(eltVt),imm)
9183//all instruction created with FROUND_CURRENT
9184multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009185 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +00009186 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009187 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009188 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009189 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9190 (OpNode (_.VT _.RC:$src1),
9191 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009192 (i32 imm:$src3)), itins.rr>,
9193 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009194 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9195 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9196 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9197 (OpNode (_.VT _.RC:$src1),
9198 (_.VT (bitconvert (_.LdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009199 (i32 imm:$src3)), itins.rm>,
9200 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009201 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9202 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9203 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9204 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9205 (OpNode (_.VT _.RC:$src1),
9206 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009207 (i32 imm:$src3)), itins.rm>, EVEX_B,
9208 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009209 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009210}
9211
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009212//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9213// op(reg_vec2,mem_vec,imm)
Igor Breger2ae0fe32015-08-31 11:14:02 +00009214multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009215 OpndItins itins, X86VectorVTInfo DestInfo,
9216 X86VectorVTInfo SrcInfo>{
Craig Topper05948fb2016-08-02 05:11:15 +00009217 let ExeDomain = DestInfo.ExeDomain in {
Igor Breger2ae0fe32015-08-31 11:14:02 +00009218 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9219 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9220 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9221 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9222 (SrcInfo.VT SrcInfo.RC:$src2),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009223 (i8 imm:$src3))), itins.rr>,
9224 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009225 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9226 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9227 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9228 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9229 (SrcInfo.VT (bitconvert
9230 (SrcInfo.LdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009231 (i8 imm:$src3))), itins.rm>,
9232 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009233 }
Igor Breger2ae0fe32015-08-31 11:14:02 +00009234}
9235
9236//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9237// op(reg_vec2,mem_vec,imm)
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009238// op(reg_vec2,broadcast(eltVt),imm)
9239multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009240 OpndItins itins, X86VectorVTInfo _>:
9241 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
Igor Breger2ae0fe32015-08-31 11:14:02 +00009242
Craig Topper05948fb2016-08-02 05:11:15 +00009243 let ExeDomain = _.ExeDomain in
Craig Toppere1cac152016-06-07 07:27:54 +00009244 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9245 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9246 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9247 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9248 (OpNode (_.VT _.RC:$src1),
9249 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
Simon Pilgrim36be8522017-11-29 18:52:20 +00009250 (i8 imm:$src3)), itins.rm>, EVEX_B,
9251 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009252}
9253
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009254//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9255// op(reg_vec2,mem_scalar,imm)
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009256multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009257 OpndItins itins, X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009258 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009259 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009260 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009261 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9262 (OpNode (_.VT _.RC:$src1),
9263 (_.VT _.RC:$src2),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009264 (i32 imm:$src3)), itins.rr>,
9265 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009266 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
Igor Bregere73ef852016-09-11 12:38:46 +00009267 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
Craig Toppere1cac152016-06-07 07:27:54 +00009268 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9269 (OpNode (_.VT _.RC:$src1),
9270 (_.VT (scalar_to_vector
9271 (_.ScalarLdFrag addr:$src2))),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009272 (i32 imm:$src3)), itins.rm>,
9273 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper05948fb2016-08-02 05:11:15 +00009274 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009275}
9276
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009277//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9278multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009279 SDNode OpNode, OpndItins itins,
9280 X86VectorVTInfo _> {
Craig Topper05948fb2016-08-02 05:11:15 +00009281 let ExeDomain = _.ExeDomain in
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009282 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009283 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009284 OpcodeStr, "$src3, {sae}, $src2, $src1",
9285 "$src1, $src2, {sae}, $src3",
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009286 (OpNode (_.VT _.RC:$src1),
9287 (_.VT _.RC:$src2),
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009288 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009289 (i32 FROUND_NO_EXC)), itins.rr>,
9290 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009291}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009292
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009293//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009294multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9295 OpndItins itins, X86VectorVTInfo _> {
Craig Toppercac5d692017-02-26 06:45:37 +00009296 let ExeDomain = _.ExeDomain in
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009297 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9298 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
Craig Topperbfe13ff2016-01-11 00:44:52 +00009299 OpcodeStr, "$src3, {sae}, $src2, $src1",
9300 "$src1, $src2, {sae}, $src3",
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009301 (OpNode (_.VT _.RC:$src1),
9302 (_.VT _.RC:$src2),
9303 (i32 imm:$src3),
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009304 (i32 FROUND_NO_EXC)), itins.rr>,
9305 EVEX_B, Sched<[itins.Sched]>;
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009306}
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009307
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009308multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009309 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009310 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009311 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009312 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9313 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009314 EVEX_V512;
9315
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009316 }
9317 let Predicates = [prd, HasVLX] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009318 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009319 EVEX_V128;
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009320 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009321 EVEX_V256;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009322 }
Elena Demikhovsky42c96d92015-06-01 06:50:49 +00009323}
9324
Igor Breger2ae0fe32015-08-31 11:14:02 +00009325multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009326 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9327 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +00009328 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009329 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009330 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9331 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009332 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009333 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009334 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009335 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009336 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9337 }
9338}
9339
Igor Breger00d9f842015-06-08 14:03:17 +00009340multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
Simon Pilgrim36be8522017-11-29 18:52:20 +00009341 bits<8> opc, SDNode OpNode, OpndItins itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +00009342 Predicate Pred = HasAVX512> {
9343 let Predicates = [Pred] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009344 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
Igor Breger00d9f842015-06-08 14:03:17 +00009345 }
Coby Tayree71e37cc2017-11-21 09:48:44 +00009346 let Predicates = [Pred, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +00009347 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9348 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
Igor Breger00d9f842015-06-08 14:03:17 +00009349 }
9350}
9351
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009352multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009353 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009354 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009355 let Predicates = [prd] in {
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009356 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9357 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009358 }
Elena Demikhovsky3425c932015-06-02 08:28:57 +00009359}
9360
Igor Breger1e58e8a2015-09-02 11:18:55 +00009361multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
Craig Topper0af48f12017-11-13 02:02:58 +00009362 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009363 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
Igor Breger1e58e8a2015-09-02 11:18:55 +00009364 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009365 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9366 EVEX_CD8<32, CD8VF>;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009367 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009368 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9369 EVEX_CD8<64, CD8VF>, VEX_W;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009370}
9371
Igor Breger1e58e8a2015-09-02 11:18:55 +00009372defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009373 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
Craig Topper0af48f12017-11-13 02:02:58 +00009374 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009375defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009376 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009377 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009378defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009379 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
Craig Topper0af48f12017-11-13 02:02:58 +00009380 AVX512AIi8Base, EVEX;
Igor Breger1e58e8a2015-09-02 11:18:55 +00009381
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009382defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009383 0x50, X86VRange, X86VRangeRnd,
9384 SSE_ALU_F64P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009385 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9386defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009387 0x50, X86VRange, X86VRangeRnd,
9388 SSE_ALU_F32P, HasDQI>,
Elena Demikhovsky3582eb32015-06-01 11:05:34 +00009389 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9390
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009391defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9392 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009393 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9394defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009395 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
Elena Demikhovsky8938f5a2015-06-02 14:12:54 +00009396 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9397
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009398defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009399 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009400 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9401defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009402 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009403 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009404
Igor Breger1e58e8a2015-09-02 11:18:55 +00009405defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009406 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009407 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9408defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
Simon Pilgrimd1a7d0c2017-11-30 12:01:52 +00009409 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
Igor Breger1e58e8a2015-09-02 11:18:55 +00009410 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9411
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009412let Predicates = [HasAVX512] in {
9413def : Pat<(v16f32 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009414 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009415def : Pat<(v16f32 (fnearbyint VR512:$src)),
9416 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9417def : Pat<(v16f32 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009418 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009419def : Pat<(v16f32 (frint VR512:$src)),
9420 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9421def : Pat<(v16f32 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009422 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009423
9424def : Pat<(v8f64 (ffloor VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009425 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009426def : Pat<(v8f64 (fnearbyint VR512:$src)),
9427 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9428def : Pat<(v8f64 (fceil VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009429 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009430def : Pat<(v8f64 (frint VR512:$src)),
9431 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9432def : Pat<(v8f64 (ftrunc VR512:$src)),
Ahmed Bougacha58a19742017-06-26 16:00:24 +00009433 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
Asaf Badouha5b2e5e2015-07-22 12:00:43 +00009434}
Elena Demikhovsky9e380862015-06-03 10:56:40 +00009435
Craig Topperac2508252017-11-11 21:44:51 +00009436let Predicates = [HasVLX] in {
9437def : Pat<(v4f32 (ffloor VR128X:$src)),
9438 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9439def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9440 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9441def : Pat<(v4f32 (fceil VR128X:$src)),
9442 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9443def : Pat<(v4f32 (frint VR128X:$src)),
9444 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9445def : Pat<(v4f32 (ftrunc VR128X:$src)),
9446 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9447
9448def : Pat<(v2f64 (ffloor VR128X:$src)),
9449 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9450def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9451 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9452def : Pat<(v2f64 (fceil VR128X:$src)),
9453 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9454def : Pat<(v2f64 (frint VR128X:$src)),
9455 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9456def : Pat<(v2f64 (ftrunc VR128X:$src)),
9457 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9458
9459def : Pat<(v8f32 (ffloor VR256X:$src)),
9460 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9461def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9462 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9463def : Pat<(v8f32 (fceil VR256X:$src)),
9464 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9465def : Pat<(v8f32 (frint VR256X:$src)),
9466 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9467def : Pat<(v8f32 (ftrunc VR256X:$src)),
9468 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9469
9470def : Pat<(v4f64 (ffloor VR256X:$src)),
9471 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9472def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9473 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9474def : Pat<(v4f64 (fceil VR256X:$src)),
9475 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9476def : Pat<(v4f64 (frint VR256X:$src)),
9477 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9478def : Pat<(v4f64 (ftrunc VR256X:$src)),
9479 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9480}
9481
Craig Topper25ceba72018-02-05 06:00:23 +00009482multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
9483 OpndItins itins, X86VectorVTInfo _,
9484 X86VectorVTInfo CastInfo> {
9485 let ExeDomain = _.ExeDomain in {
9486 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9487 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
9488 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9489 (_.VT (bitconvert
9490 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
9491 (i8 imm:$src3))))),
9492 itins.rr>, Sched<[itins.Sched]>;
9493 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9494 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
9495 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9496 (_.VT
9497 (bitconvert
9498 (CastInfo.VT (X86Shuf128 _.RC:$src1,
9499 (bitconvert (_.LdFrag addr:$src2)),
9500 (i8 imm:$src3))))), itins.rm>,
9501 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9502 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9503 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9504 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9505 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9506 (_.VT
9507 (bitconvert
9508 (CastInfo.VT
9509 (X86Shuf128 _.RC:$src1,
9510 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
9511 (i8 imm:$src3))))), itins.rm>, EVEX_B,
9512 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Topper42a53532017-08-16 23:38:25 +00009513 }
9514}
9515
Craig Topper25ceba72018-02-05 06:00:23 +00009516multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9517 AVX512VLVectorVTInfo _,
9518 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{
9519 let Predicates = [HasAVX512] in
9520 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, itins,
9521 _.info512, CastInfo.info512>, EVEX_V512;
9522
9523 let Predicates = [HasAVX512, HasVLX] in
9524 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, itins,
9525 _.info256, CastInfo.info256>, EVEX_V256;
9526}
9527
Simon Pilgrim36be8522017-11-29 18:52:20 +00009528defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009529 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009530defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009531 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009532defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009533 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009534defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
Craig Topper25ceba72018-02-05 06:00:23 +00009535 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Igor Breger00d9f842015-06-08 14:03:17 +00009536
Craig Topperb561e662017-01-19 02:34:29 +00009537let Predicates = [HasAVX512] in {
9538// Provide fallback in case the load node that is used in the broadcast
9539// patterns above is used by additional users, which prevents the pattern
9540// selection.
9541def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9542 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9543 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9544 0)>;
9545def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9546 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9547 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9548 0)>;
9549
9550def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9551 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9552 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9553 0)>;
9554def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9555 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9556 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9557 0)>;
9558
9559def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9560 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9561 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9562 0)>;
9563
9564def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9565 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9566 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9567 0)>;
9568}
9569
Simon Pilgrim36be8522017-11-29 18:52:20 +00009570multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9571 AVX512VLVectorVTInfo VTInfo_I> {
9572 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
Igor Breger00d9f842015-06-08 14:03:17 +00009573 AVX512AIi8Base, EVEX_4V;
Igor Breger00d9f842015-06-08 14:03:17 +00009574}
9575
Simon Pilgrim36be8522017-11-29 18:52:20 +00009576defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009577 EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +00009578defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
Igor Breger00d9f842015-06-08 14:03:17 +00009579 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009580
Simon Pilgrim36be8522017-11-29 18:52:20 +00009581defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009582 avx512vl_i8_info, avx512vl_i8_info>,
Igor Breger2ae0fe32015-08-31 11:14:02 +00009583 EVEX_CD8<8, CD8VF>;
9584
Craig Topper333897e2017-11-03 06:48:02 +00009585// Fragments to help convert valignq into masked valignd. Or valignq/valignd
9586// into vpalignr.
9587def ValignqImm32XForm : SDNodeXForm<imm, [{
9588 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9589}]>;
9590def ValignqImm8XForm : SDNodeXForm<imm, [{
9591 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9592}]>;
9593def ValigndImm8XForm : SDNodeXForm<imm, [{
9594 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9595}]>;
9596
9597multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9598 X86VectorVTInfo From, X86VectorVTInfo To,
9599 SDNodeXForm ImmXForm> {
9600 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9601 (bitconvert
9602 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9603 imm:$src3))),
9604 To.RC:$src0)),
9605 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9606 To.RC:$src1, To.RC:$src2,
9607 (ImmXForm imm:$src3))>;
9608
9609 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9610 (bitconvert
9611 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9612 imm:$src3))),
9613 To.ImmAllZerosV)),
9614 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9615 To.RC:$src1, To.RC:$src2,
9616 (ImmXForm imm:$src3))>;
9617
9618 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9619 (bitconvert
9620 (From.VT (OpNode From.RC:$src1,
9621 (bitconvert (To.LdFrag addr:$src2)),
9622 imm:$src3))),
9623 To.RC:$src0)),
9624 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9625 To.RC:$src1, addr:$src2,
9626 (ImmXForm imm:$src3))>;
9627
9628 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9629 (bitconvert
9630 (From.VT (OpNode From.RC:$src1,
9631 (bitconvert (To.LdFrag addr:$src2)),
9632 imm:$src3))),
9633 To.ImmAllZerosV)),
9634 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9635 To.RC:$src1, addr:$src2,
9636 (ImmXForm imm:$src3))>;
9637}
9638
9639multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9640 X86VectorVTInfo From,
9641 X86VectorVTInfo To,
9642 SDNodeXForm ImmXForm> :
9643 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9644 def : Pat<(From.VT (OpNode From.RC:$src1,
9645 (bitconvert (To.VT (X86VBroadcast
9646 (To.ScalarLdFrag addr:$src2)))),
9647 imm:$src3)),
9648 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9649 (ImmXForm imm:$src3))>;
9650
9651 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9652 (bitconvert
9653 (From.VT (OpNode From.RC:$src1,
9654 (bitconvert
9655 (To.VT (X86VBroadcast
9656 (To.ScalarLdFrag addr:$src2)))),
9657 imm:$src3))),
9658 To.RC:$src0)),
9659 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9660 To.RC:$src1, addr:$src2,
9661 (ImmXForm imm:$src3))>;
9662
9663 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9664 (bitconvert
9665 (From.VT (OpNode From.RC:$src1,
9666 (bitconvert
9667 (To.VT (X86VBroadcast
9668 (To.ScalarLdFrag addr:$src2)))),
9669 imm:$src3))),
9670 To.ImmAllZerosV)),
9671 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9672 To.RC:$src1, addr:$src2,
9673 (ImmXForm imm:$src3))>;
9674}
9675
9676let Predicates = [HasAVX512] in {
9677 // For 512-bit we lower to the widest element type we can. So we only need
9678 // to handle converting valignq to valignd.
9679 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9680 v16i32_info, ValignqImm32XForm>;
9681}
9682
9683let Predicates = [HasVLX] in {
9684 // For 128-bit we lower to the widest element type we can. So we only need
9685 // to handle converting valignq to valignd.
9686 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9687 v4i32x_info, ValignqImm32XForm>;
9688 // For 256-bit we lower to the widest element type we can. So we only need
9689 // to handle converting valignq to valignd.
9690 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9691 v8i32x_info, ValignqImm32XForm>;
9692}
9693
9694let Predicates = [HasVLX, HasBWI] in {
9695 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9696 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9697 v16i8x_info, ValignqImm8XForm>;
9698 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9699 v16i8x_info, ValigndImm8XForm>;
9700}
9701
Simon Pilgrim36be8522017-11-29 18:52:20 +00009702defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9703 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9704 EVEX_CD8<8, CD8VF>;
Igor Bregerf3ded812015-08-31 13:09:30 +00009705
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009706multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009707 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009708 let ExeDomain = _.ExeDomain in {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009709 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Igor Breger24cab0f2015-11-16 07:22:00 +00009710 (ins _.RC:$src1), OpcodeStr,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009711 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009712 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9713 Sched<[itins.Sched]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009714
Craig Toppere1cac152016-06-07 07:27:54 +00009715 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9716 (ins _.MemOp:$src1), OpcodeStr,
9717 "$src1", "$src1",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009718 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9719 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9720 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009721 }
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009722}
9723
9724multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009725 OpndItins itins, X86VectorVTInfo _> :
9726 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009727 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9728 (ins _.ScalarMemOp:$src1), OpcodeStr,
9729 "${src1}"##_.BroadcastStr,
9730 "${src1}"##_.BroadcastStr,
9731 (_.VT (OpNode (X86VBroadcast
Simon Pilgrim756348c2017-11-29 13:49:51 +00009732 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9733 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9734 Sched<[itins.Sched.Folded]>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009735}
9736
9737multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009738 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9739 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009740 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009741 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9742 EVEX_V512;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009743
9744 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009745 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009746 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009747 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009748 EVEX_V128;
9749 }
9750}
9751
9752multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009753 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9754 Predicate prd> {
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009755 let Predicates = [prd] in
Simon Pilgrim756348c2017-11-29 13:49:51 +00009756 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009757 EVEX_V512;
9758
9759 let Predicates = [prd, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009760 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009761 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009762 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009763 EVEX_V128;
9764 }
9765}
9766
9767multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009768 SDNode OpNode, OpndItins itins, Predicate prd> {
9769 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9770 avx512vl_i64_info, prd>, VEX_W;
9771 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9772 avx512vl_i32_info, prd>;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009773}
9774
9775multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009776 SDNode OpNode, OpndItins itins, Predicate prd> {
9777 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9778 avx512vl_i16_info, prd>, VEX_WIG;
9779 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9780 avx512vl_i8_info, prd>, VEX_WIG;
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009781}
9782
9783multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9784 bits<8> opc_d, bits<8> opc_q,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009785 string OpcodeStr, SDNode OpNode,
9786 OpndItins itins> {
9787 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009788 HasAVX512>,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009789 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
Elena Demikhovsky5e2f8c42015-06-23 08:19:46 +00009790 HasBWI>;
9791}
9792
Simon Pilgrim756348c2017-11-29 13:49:51 +00009793defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
Igor Bregerf2460112015-07-26 14:41:44 +00009794
Simon Pilgrimfea153f2017-05-06 19:11:59 +00009795// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9796let Predicates = [HasAVX512, NoVLX] in {
9797 def : Pat<(v4i64 (abs VR256X:$src)),
9798 (EXTRACT_SUBREG
9799 (VPABSQZrr
9800 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9801 sub_ymm)>;
9802 def : Pat<(v2i64 (abs VR128X:$src)),
9803 (EXTRACT_SUBREG
9804 (VPABSQZrr
9805 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9806 sub_xmm)>;
9807}
9808
Craig Topperc0896052017-12-16 02:40:28 +00009809// Use 512bit version to implement 128/256 bit.
9810multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9811 AVX512VLVectorVTInfo _, Predicate prd> {
9812 let Predicates = [prd, NoVLX] in {
9813 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9814 (EXTRACT_SUBREG
9815 (!cast<Instruction>(InstrStr # "Zrr")
9816 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9817 _.info256.RC:$src1,
9818 _.info256.SubRegIdx)),
9819 _.info256.SubRegIdx)>;
9820
9821 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9822 (EXTRACT_SUBREG
9823 (!cast<Instruction>(InstrStr # "Zrr")
9824 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9825 _.info128.RC:$src1,
9826 _.info128.SubRegIdx)),
9827 _.info128.SubRegIdx)>;
9828 }
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009829}
9830
Simon Pilgrim756348c2017-11-29 13:49:51 +00009831// FIXME: Is there a better scheduler itinerary for VPLZCNT?
Craig Topperc0896052017-12-16 02:40:28 +00009832defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9833 SSE_INTALU_ITINS_P, HasCDI>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009834
9835// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9836defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9837 SSE_INTALU_ITINS_P, HasCDI>;
Igor Breger0dcd8bc2015-09-03 09:05:31 +00009838
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009839// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
Craig Topperc0896052017-12-16 02:40:28 +00009840defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9841defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
Simon Pilgrimc89aa0b2017-05-05 12:20:34 +00009842
Igor Breger24cab0f2015-11-16 07:22:00 +00009843//===---------------------------------------------------------------------===//
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009844// Counts number of ones - VPOPCNTD and VPOPCNTQ
9845//===---------------------------------------------------------------------===//
9846
Simon Pilgrim756348c2017-11-29 13:49:51 +00009847// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
Craig Topperc0896052017-12-16 02:40:28 +00009848defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9849 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009850
Craig Topperc0896052017-12-16 02:40:28 +00009851defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9852defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
Oren Ben Simhon7bf27f02017-05-25 13:45:23 +00009853
9854//===---------------------------------------------------------------------===//
Igor Breger24cab0f2015-11-16 07:22:00 +00009855// Replicate Single FP - MOVSHDUP and MOVSLDUP
9856//===---------------------------------------------------------------------===//
Simon Pilgrim756348c2017-11-29 13:49:51 +00009857multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9858 OpndItins itins> {
9859 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9860 avx512vl_f32_info, HasAVX512>, XS;
Igor Breger24cab0f2015-11-16 07:22:00 +00009861}
9862
Simon Pilgrim756348c2017-11-29 13:49:51 +00009863defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9864defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009865
9866//===----------------------------------------------------------------------===//
9867// AVX-512 - MOVDDUP
9868//===----------------------------------------------------------------------===//
9869
9870multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009871 OpndItins itins, X86VectorVTInfo _> {
Craig Toppere9e84c82017-01-31 05:18:24 +00009872 let ExeDomain = _.ExeDomain in {
Igor Breger1f782962015-11-19 08:26:56 +00009873 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9874 (ins _.RC:$src), OpcodeStr, "$src", "$src",
Simon Pilgrim756348c2017-11-29 13:49:51 +00009875 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9876 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +00009877 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9878 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9879 (_.VT (OpNode (_.VT (scalar_to_vector
Simon Pilgrim756348c2017-11-29 13:49:51 +00009880 (_.ScalarLdFrag addr:$src))))),
9881 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9882 Sched<[itins.Sched.Folded]>;
Craig Toppere9e84c82017-01-31 05:18:24 +00009883 }
Igor Breger1f782962015-11-19 08:26:56 +00009884}
9885
9886multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim756348c2017-11-29 13:49:51 +00009887 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
Igor Breger1f782962015-11-19 08:26:56 +00009888
Simon Pilgrim756348c2017-11-29 13:49:51 +00009889 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
Igor Breger1f782962015-11-19 08:26:56 +00009890
9891 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim756348c2017-11-29 13:49:51 +00009892 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
Igor Breger1f782962015-11-19 08:26:56 +00009893 EVEX_V256;
Simon Pilgrim756348c2017-11-29 13:49:51 +00009894 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
Craig Topperf6c69562017-10-13 21:56:48 +00009895 EVEX_V128;
Igor Breger1f782962015-11-19 08:26:56 +00009896 }
9897}
9898
Simon Pilgrim756348c2017-11-29 13:49:51 +00009899multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9900 OpndItins itins> {
9901 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
Igor Breger1f782962015-11-19 08:26:56 +00009902 avx512vl_f64_info>, XD, VEX_W;
Igor Breger1f782962015-11-19 08:26:56 +00009903}
9904
Simon Pilgrim756348c2017-11-29 13:49:51 +00009905defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
Igor Breger1f782962015-11-19 08:26:56 +00009906
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009907let Predicates = [HasVLX] in {
Igor Breger1f782962015-11-19 08:26:56 +00009908def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009909 (VMOVDDUPZ128rm addr:$src)>;
9910def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9911 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topperf6c69562017-10-13 21:56:48 +00009912def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9913 (VMOVDDUPZ128rm addr:$src)>;
Craig Topperda84ff32017-01-07 22:20:23 +00009914
9915def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9916 (v2f64 VR128X:$src0)),
9917 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9918 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9919def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9920 (bitconvert (v4i32 immAllZerosV))),
9921 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9922
9923def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9924 (v2f64 VR128X:$src0)),
9925 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9926def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9927 (bitconvert (v4i32 immAllZerosV))),
9928 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topperf6c69562017-10-13 21:56:48 +00009929
9930def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9931 (v2f64 VR128X:$src0)),
9932 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9933def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9934 (bitconvert (v4i32 immAllZerosV))),
9935 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
Craig Topper7eb0e7c2016-09-29 05:54:43 +00009936}
Igor Breger1f782962015-11-19 08:26:56 +00009937
Igor Bregerf2460112015-07-26 14:41:44 +00009938//===----------------------------------------------------------------------===//
9939// AVX-512 - Unpack Instructions
9940//===----------------------------------------------------------------------===//
Craig Topper9433f972016-08-02 06:16:53 +00009941defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9942 SSE_ALU_ITINS_S>;
9943defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9944 SSE_ALU_ITINS_S>;
Igor Bregerf2460112015-07-26 14:41:44 +00009945
9946defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9947 SSE_INTALU_ITINS_P, HasBWI>;
9948defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9949 SSE_INTALU_ITINS_P, HasBWI>;
9950defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9951 SSE_INTALU_ITINS_P, HasBWI>;
9952defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9953 SSE_INTALU_ITINS_P, HasBWI>;
9954
9955defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9956 SSE_INTALU_ITINS_P, HasAVX512>;
9957defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9958 SSE_INTALU_ITINS_P, HasAVX512>;
9959defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9960 SSE_INTALU_ITINS_P, HasAVX512>;
9961defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9962 SSE_INTALU_ITINS_P, HasAVX512>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009963
9964//===----------------------------------------------------------------------===//
9965// AVX-512 - Extract & Insert Integer Instructions
9966//===----------------------------------------------------------------------===//
9967
9968multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9969 X86VectorVTInfo _> {
Craig Toppere1cac152016-06-07 07:27:54 +00009970 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9971 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9972 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Simon Pilgrim1dcb9132017-10-23 16:00:57 +00009973 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9974 addr:$dst)]>,
Craig Topper05af43f2018-01-24 17:58:57 +00009975 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009976}
9977
9978multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9979 let Predicates = [HasBWI] in {
9980 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9981 (ins _.RC:$src1, u8imm:$src2),
9982 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9983 [(set GR32orGR64:$dst,
9984 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009985 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009986
9987 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9988 }
9989}
9990
9991multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9992 let Predicates = [HasBWI] in {
9993 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9994 (ins _.RC:$src1, u8imm:$src2),
9995 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9996 [(set GR32orGR64:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +00009997 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9998 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +00009999
Craig Topper99f6b622016-05-01 01:03:56 +000010000 let hasSideEffects = 0 in
Igor Breger55747302015-11-18 08:46:16 +000010001 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10002 (ins _.RC:$src1, u8imm:$src2),
Simon Pilgrimd255a622017-12-06 18:46:06 +000010003 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
10004 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
10005 Sched<[WriteShuffle]>;
Igor Breger55747302015-11-18 08:46:16 +000010006
Igor Bregerdefab3c2015-10-08 12:55:01 +000010007 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10008 }
10009}
10010
10011multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10012 RegisterClass GRC> {
10013 let Predicates = [HasDQI] in {
10014 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10015 (ins _.RC:$src1, u8imm:$src2),
10016 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10017 [(set GRC:$dst,
10018 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010019 EVEX, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010020
Craig Toppere1cac152016-06-07 07:27:54 +000010021 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10022 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10023 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10024 [(store (extractelt (_.VT _.RC:$src1),
10025 imm:$src2),addr:$dst)]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010026 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
Craig Topper05af43f2018-01-24 17:58:57 +000010027 Sched<[WriteShuffleLd, WriteRMW]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010028 }
10029}
10030
Craig Toppera33846a2017-10-22 06:18:23 +000010031defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10032defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010033defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10034defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10035
10036multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10037 X86VectorVTInfo _, PatFrag LdFrag> {
10038 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10039 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10040 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10041 [(set _.RC:$dst,
10042 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010043 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010044}
10045
10046multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10047 X86VectorVTInfo _, PatFrag LdFrag> {
10048 let Predicates = [HasBWI] in {
10049 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10050 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10051 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10052 [(set _.RC:$dst,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010053 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10054 Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010055
10056 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10057 }
10058}
10059
10060multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10061 X86VectorVTInfo _, RegisterClass GRC> {
10062 let Predicates = [HasDQI] in {
10063 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10064 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10065 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10066 [(set _.RC:$dst,
10067 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
Simon Pilgrimd255a622017-12-06 18:46:06 +000010068 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010069
10070 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10071 _.ScalarLdFrag>, TAPD;
10072 }
10073}
10074
10075defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
Craig Toppera33846a2017-10-22 06:18:23 +000010076 extloadi8>, TAPD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010077defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
Craig Toppera33846a2017-10-22 06:18:23 +000010078 extloadi16>, PD, VEX_WIG;
Igor Bregerdefab3c2015-10-08 12:55:01 +000010079defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10080defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010081
Igor Bregera6297c72015-09-02 10:50:58 +000010082//===----------------------------------------------------------------------===//
10083// VSHUFPS - VSHUFPD Operations
10084//===----------------------------------------------------------------------===//
Simon Pilgrim36be8522017-11-29 18:52:20 +000010085
Igor Bregera6297c72015-09-02 10:50:58 +000010086multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10087 AVX512VLVectorVTInfo VTInfo_FP>{
Simon Pilgrim36be8522017-11-29 18:52:20 +000010088 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10089 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10090 AVX512AIi8Base, EVEX_4V;
Igor Bregera6297c72015-09-02 10:50:58 +000010091}
10092
10093defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10094defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010095
Asaf Badouhd2c35992015-09-02 14:21:54 +000010096//===----------------------------------------------------------------------===//
10097// AVX-512 - Byte shift Left/Right
10098//===----------------------------------------------------------------------===//
10099
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010100let Sched = WriteVecShift in
10101def AVX512_BYTESHIFT : OpndItins<
10102 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
10103>;
10104
Asaf Badouhd2c35992015-09-02 14:21:54 +000010105multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010106 Format MRMm, string OpcodeStr,
10107 OpndItins itins, X86VectorVTInfo _>{
Asaf Badouhd2c35992015-09-02 14:21:54 +000010108 def rr : AVX512<opc, MRMr,
10109 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10110 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010111 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
10112 itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010113 def rm : AVX512<opc, MRMm,
10114 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10115 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10116 [(set _.RC:$dst,(_.VT (OpNode
Simon Pilgrim255fdd02016-06-11 12:54:37 +000010117 (_.VT (bitconvert (_.LdFrag addr:$src1))),
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010118 (i8 imm:$src2))))], itins.rm>,
10119 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010120}
10121
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010122multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010123 Format MRMm, string OpcodeStr,
10124 OpndItins itins, Predicate prd>{
Asaf Badouhd2c35992015-09-02 14:21:54 +000010125 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010126 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
10127 OpcodeStr, itins, v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010128 let Predicates = [prd, HasVLX] in {
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010129 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010130 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010131 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010132 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010133 }
10134}
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010135defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010136 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
10137 EVEX_4V, VEX_WIG;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010138defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
Simon Pilgrim13d449d2017-12-05 20:16:22 +000010139 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
10140 EVEX_4V, VEX_WIG;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010141
10142
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010143multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010144 string OpcodeStr, OpndItins itins,
10145 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010146 def rr : AVX512BI<opc, MRMSrcReg,
Cong Houdb6220f2015-11-24 19:51:26 +000010147 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
Asaf Badouhd2c35992015-09-02 14:21:54 +000010148 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Cong Houdb6220f2015-11-24 19:51:26 +000010149 [(set _dst.RC:$dst,(_dst.VT
10150 (OpNode (_src.VT _src.RC:$src1),
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010151 (_src.VT _src.RC:$src2))))], itins.rr>,
10152 Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010153 def rm : AVX512BI<opc, MRMSrcMem,
10154 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10155 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10156 [(set _dst.RC:$dst,(_dst.VT
10157 (OpNode (_src.VT _src.RC:$src1),
10158 (_src.VT (bitconvert
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010159 (_src.LdFrag addr:$src2))))))], itins.rm>,
10160 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010161}
10162
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010163multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010164 string OpcodeStr, OpndItins itins,
10165 Predicate prd> {
Asaf Badouhd2c35992015-09-02 14:21:54 +000010166 let Predicates = [prd] in
Craig Topperaa904d52017-12-10 17:42:39 +000010167 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
10168 v64i8_info>, EVEX_V512;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010169 let Predicates = [prd, HasVLX] in {
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010170 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010171 v32i8x_info>, EVEX_V256;
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010172 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
Cong Houdb6220f2015-11-24 19:51:26 +000010173 v16i8x_info>, EVEX_V128;
Asaf Badouhd2c35992015-09-02 14:21:54 +000010174 }
10175}
10176
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010177defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
Simon Pilgrim4d08aed2017-12-05 14:59:40 +000010178 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010179
Craig Topper4e794c72017-02-19 19:36:58 +000010180// Transforms to swizzle an immediate to enable better matching when
10181// memory operand isn't in the right place.
10182def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
10183 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
10184 uint8_t Imm = N->getZExtValue();
10185 // Swap bits 1/4 and 3/6.
10186 uint8_t NewImm = Imm & 0xa5;
10187 if (Imm & 0x02) NewImm |= 0x10;
10188 if (Imm & 0x10) NewImm |= 0x02;
10189 if (Imm & 0x08) NewImm |= 0x40;
10190 if (Imm & 0x40) NewImm |= 0x08;
10191 return getI8Imm(NewImm, SDLoc(N));
10192}]>;
10193def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
10194 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10195 uint8_t Imm = N->getZExtValue();
10196 // Swap bits 2/4 and 3/5.
10197 uint8_t NewImm = Imm & 0xc3;
Craig Toppera5fa2e42017-02-20 07:00:34 +000010198 if (Imm & 0x04) NewImm |= 0x10;
10199 if (Imm & 0x10) NewImm |= 0x04;
Craig Topper4e794c72017-02-19 19:36:58 +000010200 if (Imm & 0x08) NewImm |= 0x20;
10201 if (Imm & 0x20) NewImm |= 0x08;
10202 return getI8Imm(NewImm, SDLoc(N));
10203}]>;
Craig Topper48905772017-02-19 21:32:15 +000010204def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
10205 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
10206 uint8_t Imm = N->getZExtValue();
10207 // Swap bits 1/2 and 5/6.
10208 uint8_t NewImm = Imm & 0x99;
10209 if (Imm & 0x02) NewImm |= 0x04;
10210 if (Imm & 0x04) NewImm |= 0x02;
10211 if (Imm & 0x20) NewImm |= 0x40;
10212 if (Imm & 0x40) NewImm |= 0x20;
10213 return getI8Imm(NewImm, SDLoc(N));
10214}]>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010215def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
10216 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
10217 uint8_t Imm = N->getZExtValue();
10218 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
10219 uint8_t NewImm = Imm & 0x81;
10220 if (Imm & 0x02) NewImm |= 0x04;
10221 if (Imm & 0x04) NewImm |= 0x10;
10222 if (Imm & 0x08) NewImm |= 0x40;
10223 if (Imm & 0x10) NewImm |= 0x02;
10224 if (Imm & 0x20) NewImm |= 0x08;
10225 if (Imm & 0x40) NewImm |= 0x20;
10226 return getI8Imm(NewImm, SDLoc(N));
10227}]>;
10228def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
10229 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
10230 uint8_t Imm = N->getZExtValue();
10231 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
10232 uint8_t NewImm = Imm & 0x81;
10233 if (Imm & 0x02) NewImm |= 0x10;
10234 if (Imm & 0x04) NewImm |= 0x02;
10235 if (Imm & 0x08) NewImm |= 0x20;
10236 if (Imm & 0x10) NewImm |= 0x04;
10237 if (Imm & 0x20) NewImm |= 0x40;
10238 if (Imm & 0x40) NewImm |= 0x08;
10239 return getI8Imm(NewImm, SDLoc(N));
10240}]>;
Craig Topper4e794c72017-02-19 19:36:58 +000010241
Igor Bregerb4bb1902015-10-15 12:33:24 +000010242multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010243 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010244 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010245 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10246 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
Igor Breger252c2d92016-02-22 12:37:41 +000010247 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
Igor Bregerb4bb1902015-10-15 12:33:24 +000010248 (OpNode (_.VT _.RC:$src1),
10249 (_.VT _.RC:$src2),
10250 (_.VT _.RC:$src3),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010251 (i8 imm:$src4)), itins.rr, 1, 1>,
10252 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010253 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10254 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10255 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10256 (OpNode (_.VT _.RC:$src1),
10257 (_.VT _.RC:$src2),
10258 (_.VT (bitconvert (_.LdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010259 (i8 imm:$src4)), itins.rm, 1, 0>,
10260 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10261 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010262 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10263 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10264 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10265 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10266 (OpNode (_.VT _.RC:$src1),
10267 (_.VT _.RC:$src2),
10268 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010269 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10270 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10271 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010272 }// Constraints = "$src1 = $dst"
Craig Topper4e794c72017-02-19 19:36:58 +000010273
10274 // Additional patterns for matching passthru operand in other positions.
Craig Topper4e794c72017-02-19 19:36:58 +000010275 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10276 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10277 _.RC:$src1)),
10278 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10279 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10280 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10281 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10282 _.RC:$src1)),
10283 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10284 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010285
10286 // Additional patterns for matching loads in other positions.
10287 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10288 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10289 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10290 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10291 def : Pat<(_.VT (OpNode _.RC:$src1,
10292 (bitconvert (_.LdFrag addr:$src3)),
10293 _.RC:$src2, (i8 imm:$src4))),
10294 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10295 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10296
10297 // Additional patterns for matching zero masking with loads in other
10298 // positions.
Craig Topper48905772017-02-19 21:32:15 +000010299 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10300 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10301 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10302 _.ImmAllZerosV)),
10303 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10304 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10305 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10306 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10307 _.RC:$src2, (i8 imm:$src4)),
10308 _.ImmAllZerosV)),
10309 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10310 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper48905772017-02-19 21:32:15 +000010311
10312 // Additional patterns for matching masked loads with different
10313 // operand orders.
Craig Topper48905772017-02-19 21:32:15 +000010314 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10315 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10316 _.RC:$src2, (i8 imm:$src4)),
10317 _.RC:$src1)),
10318 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10319 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topperc6c68f52017-02-20 07:00:40 +000010320 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10321 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10322 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10323 _.RC:$src1)),
10324 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10325 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10326 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10327 (OpNode _.RC:$src2, _.RC:$src1,
10328 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10329 _.RC:$src1)),
10330 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10331 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10332 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10333 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10334 _.RC:$src1, (i8 imm:$src4)),
10335 _.RC:$src1)),
10336 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10337 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10338 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10339 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10340 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10341 _.RC:$src1)),
10342 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10343 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Craig Topper5b4e36a2017-02-20 02:47:42 +000010344
10345 // Additional patterns for matching broadcasts in other positions.
10346 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10347 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10348 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10349 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10350 def : Pat<(_.VT (OpNode _.RC:$src1,
10351 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10352 _.RC:$src2, (i8 imm:$src4))),
10353 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10354 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10355
10356 // Additional patterns for matching zero masking with broadcasts in other
10357 // positions.
10358 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10359 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10360 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10361 _.ImmAllZerosV)),
10362 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10363 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10364 (VPTERNLOG321_imm8 imm:$src4))>;
10365 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10366 (OpNode _.RC:$src1,
10367 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10368 _.RC:$src2, (i8 imm:$src4)),
10369 _.ImmAllZerosV)),
10370 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10371 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10372 (VPTERNLOG132_imm8 imm:$src4))>;
10373
10374 // Additional patterns for matching masked broadcasts with different
10375 // operand orders.
10376 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10377 (OpNode _.RC:$src1,
10378 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10379 _.RC:$src2, (i8 imm:$src4)),
10380 _.RC:$src1)),
10381 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10382 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
Craig Topper2012dda2017-02-20 17:44:09 +000010383 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10384 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10385 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10386 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010387 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010388 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10389 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10390 (OpNode _.RC:$src2, _.RC:$src1,
10391 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10392 (i8 imm:$src4)), _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010393 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010394 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10395 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10396 (OpNode _.RC:$src2,
10397 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10398 _.RC:$src1, (i8 imm:$src4)),
10399 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010400 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010401 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10402 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10403 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10404 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10405 _.RC:$src1)),
Cameron McInally9d641012017-10-06 22:31:29 +000010406 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
Craig Topper2012dda2017-02-20 17:44:09 +000010407 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010408}
10409
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010410multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10411 AVX512VLVectorVTInfo _> {
Igor Bregerb4bb1902015-10-15 12:33:24 +000010412 let Predicates = [HasAVX512] in
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010413 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010414 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010415 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10416 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010417 }
10418}
10419
Simon Pilgrimbb791b32017-11-30 13:18:06 +000010420defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10421 avx512vl_i32_info>;
10422defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10423 avx512vl_i64_info>, VEX_W;
Igor Bregerb4bb1902015-10-15 12:33:24 +000010424
Craig Topper8a444ee2018-01-26 22:17:40 +000010425
10426// Patterns to implement vnot using vpternlog instead of creating all ones
10427// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
10428// so that the result is only dependent on src0. But we use the same source
10429// for all operands to prevent a false dependency.
10430// TODO: We should maybe have a more generalized algorithm for folding to
10431// vpternlog.
10432let Predicates = [HasAVX512] in {
10433 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
10434 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
10435}
10436
10437let Predicates = [HasAVX512, NoVLX] in {
10438 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10439 (EXTRACT_SUBREG
10440 (VPTERNLOGQZrri
10441 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10442 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10443 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10444 (i8 15)), sub_xmm)>;
10445 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10446 (EXTRACT_SUBREG
10447 (VPTERNLOGQZrri
10448 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10449 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10450 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
10451 (i8 15)), sub_ymm)>;
10452}
10453
10454let Predicates = [HasVLX] in {
10455 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
10456 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
10457 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
10458 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
10459}
10460
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010461//===----------------------------------------------------------------------===//
10462// AVX-512 - FixupImm
10463//===----------------------------------------------------------------------===//
10464
10465multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010466 OpndItins itins, X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010467 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010468 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10469 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10470 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10471 (OpNode (_.VT _.RC:$src1),
10472 (_.VT _.RC:$src2),
10473 (_.IntVT _.RC:$src3),
10474 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010475 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010476 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10477 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10478 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10479 (OpNode (_.VT _.RC:$src1),
10480 (_.VT _.RC:$src2),
10481 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10482 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010483 (i32 FROUND_CURRENT)), itins.rm>,
10484 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010485 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10486 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10487 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10488 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10489 (OpNode (_.VT _.RC:$src1),
10490 (_.VT _.RC:$src2),
10491 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10492 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010493 (i32 FROUND_CURRENT)), itins.rm>,
10494 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010495 } // Constraints = "$src1 = $dst"
10496}
10497
10498multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010499 SDNode OpNode, OpndItins itins,
10500 X86VectorVTInfo _>{
Craig Topper05948fb2016-08-02 05:11:15 +000010501let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010502 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10503 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010504 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010505 "$src2, $src3, {sae}, $src4",
10506 (OpNode (_.VT _.RC:$src1),
10507 (_.VT _.RC:$src2),
10508 (_.IntVT _.RC:$src3),
10509 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010510 (i32 FROUND_NO_EXC)), itins.rr>,
10511 EVEX_B, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010512 }
10513}
10514
10515multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010516 OpndItins itins, X86VectorVTInfo _,
10517 X86VectorVTInfo _src3VT> {
Craig Topper05948fb2016-08-02 05:11:15 +000010518 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10519 ExeDomain = _.ExeDomain in {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010520 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10521 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10522 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10523 (OpNode (_.VT _.RC:$src1),
10524 (_.VT _.RC:$src2),
10525 (_src3VT.VT _src3VT.RC:$src3),
10526 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010527 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010528 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10529 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10530 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10531 "$src2, $src3, {sae}, $src4",
10532 (OpNode (_.VT _.RC:$src1),
10533 (_.VT _.RC:$src2),
10534 (_src3VT.VT _src3VT.RC:$src3),
10535 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010536 (i32 FROUND_NO_EXC)), itins.rm>,
10537 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Craig Toppere1cac152016-06-07 07:27:54 +000010538 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10539 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10540 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10541 (OpNode (_.VT _.RC:$src1),
10542 (_.VT _.RC:$src2),
10543 (_src3VT.VT (scalar_to_vector
10544 (_src3VT.ScalarLdFrag addr:$src3))),
10545 (i32 imm:$src4),
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010546 (i32 FROUND_CURRENT)), itins.rm>,
10547 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010548 }
10549}
10550
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010551multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010552 let Predicates = [HasAVX512] in
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010553 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10554 _Vec.info512>,
10555 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10556 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010557 let Predicates = [HasAVX512, HasVLX] in {
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010558 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10559 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10560 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10561 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010562 }
10563}
10564
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010565defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010566 SSE_ALU_F32S, f32x_info, v4i32x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010567 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Simon Pilgrim18bcf932016-02-03 09:41:59 +000010568defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010569 SSE_ALU_F64S, f64x_info, v2i64x_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010570 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010571defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010572 EVEX_CD8<32, CD8VF>;
Simon Pilgrim54b8aa22017-12-05 11:46:57 +000010573defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
Asaf Badouhd4a0d9a2016-01-19 14:21:39 +000010574 EVEX_CD8<64, CD8VF>, VEX_W;
Craig Topper5625d242016-07-29 06:06:00 +000010575
10576
10577
10578// Patterns used to select SSE scalar fp arithmetic instructions from
10579// either:
10580//
10581// (1) a scalar fp operation followed by a blend
10582//
10583// The effect is that the backend no longer emits unnecessary vector
10584// insert instructions immediately after SSE scalar fp instructions
10585// like addss or mulss.
10586//
10587// For example, given the following code:
10588// __m128 foo(__m128 A, __m128 B) {
10589// A[0] += B[0];
10590// return A;
10591// }
10592//
10593// Previously we generated:
10594// addss %xmm0, %xmm1
10595// movss %xmm1, %xmm0
10596//
10597// We now generate:
10598// addss %xmm1, %xmm0
10599//
10600// (2) a vector packed single/double fp operation followed by a vector insert
10601//
10602// The effect is that the backend converts the packed fp instruction
10603// followed by a vector insert into a single SSE scalar fp instruction.
10604//
10605// For example, given the following code:
10606// __m128 foo(__m128 A, __m128 B) {
10607// __m128 C = A + B;
10608// return (__m128) {c[0], a[1], a[2], a[3]};
10609// }
10610//
10611// Previously we generated:
10612// addps %xmm0, %xmm1
10613// movss %xmm1, %xmm0
10614//
10615// We now generate:
10616// addss %xmm1, %xmm0
10617
10618// TODO: Some canonicalization in lowering would simplify the number of
10619// patterns we have to try to match.
10620multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10621 let Predicates = [HasAVX512] in {
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010622 // extracted scalar math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010623 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10624 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10625 FR32X:$src))))),
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010626 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010627 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
Simon Pilgrimae17cf22016-10-01 15:33:01 +000010628
Craig Topper5625d242016-07-29 06:06:00 +000010629 // vector math op with insert via movss
Craig Topper5ef13ba2016-12-26 07:26:07 +000010630 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10631 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010632 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10633
Craig Topper83f21452016-12-27 01:56:24 +000010634 // extracted masked scalar math op with insert via movss
10635 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10636 (scalar_to_vector
10637 (X86selects VK1WM:$mask,
10638 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10639 FR32X:$src2),
10640 FR32X:$src0))),
10641 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10642 VK1WM:$mask, v4f32:$src1,
10643 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010644 }
10645}
10646
10647defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10648defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10649defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10650defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10651
10652multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10653 let Predicates = [HasAVX512] in {
10654 // extracted scalar math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010655 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10656 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10657 FR64X:$src))))),
Craig Topper5625d242016-07-29 06:06:00 +000010658 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
Craig Topper5ef13ba2016-12-26 07:26:07 +000010659 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010660
Craig Topper5625d242016-07-29 06:06:00 +000010661 // vector math op with insert via movsd
Craig Topper5ef13ba2016-12-26 07:26:07 +000010662 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10663 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
Craig Topper5625d242016-07-29 06:06:00 +000010664 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10665
Craig Topper83f21452016-12-27 01:56:24 +000010666 // extracted masked scalar math op with insert via movss
10667 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10668 (scalar_to_vector
10669 (X86selects VK1WM:$mask,
10670 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10671 FR64X:$src2),
10672 FR64X:$src0))),
10673 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10674 VK1WM:$mask, v2f64:$src1,
10675 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
Craig Topper5625d242016-07-29 06:06:00 +000010676 }
10677}
10678
10679defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10680defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10681defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10682defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010683
10684//===----------------------------------------------------------------------===//
10685// AES instructions
10686//===----------------------------------------------------------------------===//
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010687
Coby Tayree2a1c02f2017-11-21 09:11:41 +000010688multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10689 let Predicates = [HasVLX, HasVAES] in {
10690 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10691 !cast<Intrinsic>(IntPrefix),
10692 loadv2i64, 0, VR128X, i128mem>,
10693 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10694 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10695 !cast<Intrinsic>(IntPrefix##"_256"),
10696 loadv4i64, 0, VR256X, i256mem>,
10697 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10698 }
10699 let Predicates = [HasAVX512, HasVAES] in
10700 defm Z : AESI_binop_rm_int<Op, OpStr,
10701 !cast<Intrinsic>(IntPrefix##"_512"),
10702 loadv8i64, 0, VR512, i512mem>,
10703 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10704}
10705
10706defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10707defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10708defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10709defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10710
Coby Tayree7ca5e5872017-11-21 09:30:33 +000010711//===----------------------------------------------------------------------===//
10712// PCLMUL instructions - Carry less multiplication
10713//===----------------------------------------------------------------------===//
10714
10715let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10716defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10717 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10718
10719let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10720defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10721 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10722
10723defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10724 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10725 EVEX_CD8<64, CD8VF>, VEX_WIG;
10726}
10727
10728// Aliases
10729defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10730defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10731defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10732
Coby Tayree71e37cc2017-11-21 09:48:44 +000010733//===----------------------------------------------------------------------===//
10734// VBMI2
10735//===----------------------------------------------------------------------===//
10736
10737multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010738 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010739 let Constraints = "$src1 = $dst",
10740 ExeDomain = VTI.ExeDomain in {
10741 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10742 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10743 "$src3, $src2", "$src2, $src3",
Simon Pilgrim36be8522017-11-29 18:52:20 +000010744 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10745 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010746 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10747 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10748 "$src3, $src2", "$src2, $src3",
10749 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010750 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10751 itins.rm>, AVX512FMA3Base,
10752 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010753 }
10754}
10755
10756multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010757 OpndItins itins, X86VectorVTInfo VTI>
10758 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010759 let Constraints = "$src1 = $dst",
10760 ExeDomain = VTI.ExeDomain in
10761 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10762 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10763 "${src3}"##VTI.BroadcastStr##", $src2",
10764 "$src2, ${src3}"##VTI.BroadcastStr,
10765 (OpNode VTI.RC:$src1, VTI.RC:$src2,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010766 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10767 itins.rm>, AVX512FMA3Base, EVEX_B,
10768 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010769}
10770
10771multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010772 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010773 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010774 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010775 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010776 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10777 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010778 }
10779}
10780
10781multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010782 OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayree71e37cc2017-11-21 09:48:44 +000010783 let Predicates = [HasVBMI2] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010784 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010785 let Predicates = [HasVBMI2, HasVLX] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010786 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10787 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010788 }
10789}
10790multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010791 SDNode OpNode, OpndItins itins> {
10792 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010793 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010794 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010795 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010796 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
Coby Tayree71e37cc2017-11-21 09:48:44 +000010797 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10798}
10799
10800multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010801 SDNode OpNode, OpndItins itins> {
10802 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10803 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10804 VEX_W, EVEX_CD8<16, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010805 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010806 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010807 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010808 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010809}
10810
10811// Concat & Shift
Simon Pilgrim36be8522017-11-29 18:52:20 +000010812defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10813defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10814defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10815defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10816
Coby Tayree71e37cc2017-11-21 09:48:44 +000010817// Compress
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010818defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10819 avx512vl_i8_info, HasVBMI2>, EVEX;
10820defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10821 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010822// Expand
Simon Pilgrim904d1a82017-12-01 16:20:03 +000010823defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10824 avx512vl_i8_info, HasVBMI2>, EVEX;
10825defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10826 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
Coby Tayree71e37cc2017-11-21 09:48:44 +000010827
Coby Tayree3880f2a2017-11-21 10:04:28 +000010828//===----------------------------------------------------------------------===//
10829// VNNI
10830//===----------------------------------------------------------------------===//
10831
10832let Constraints = "$src1 = $dst" in
10833multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010834 OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010835 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10836 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10837 "$src3, $src2", "$src2, $src3",
10838 (VTI.VT (OpNode VTI.RC:$src1,
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010839 VTI.RC:$src2, VTI.RC:$src3)),
10840 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010841 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10842 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10843 "$src3, $src2", "$src2, $src3",
10844 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10845 (VTI.VT (bitconvert
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010846 (VTI.LdFrag addr:$src3))))),
10847 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10848 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010849 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10850 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10851 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10852 "$src2, ${src3}"##VTI.BroadcastStr,
10853 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10854 (VTI.VT (X86VBroadcast
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010855 (VTI.ScalarLdFrag addr:$src3)))),
10856 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10857 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010858}
10859
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010860multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
Coby Tayree3880f2a2017-11-21 10:04:28 +000010861 let Predicates = [HasVNNI] in
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010862 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010863 let Predicates = [HasVNNI, HasVLX] in {
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010864 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10865 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010866 }
10867}
10868
Simon Pilgrimd9f1ae32017-12-05 16:17:21 +000010869// FIXME: Is there a better scheduler itinerary for VPDP?
10870defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10871defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10872defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10873defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
Coby Tayree3880f2a2017-11-21 10:04:28 +000010874
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010875//===----------------------------------------------------------------------===//
10876// Bit Algorithms
10877//===----------------------------------------------------------------------===//
10878
Simon Pilgrim756348c2017-11-29 13:49:51 +000010879// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10880defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010881 avx512vl_i8_info, HasBITALG>;
Simon Pilgrim756348c2017-11-29 13:49:51 +000010882defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
Craig Topperc0896052017-12-16 02:40:28 +000010883 avx512vl_i16_info, HasBITALG>, VEX_W;
10884
10885defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10886defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
Coby Tayree5c7fe5d2017-11-21 10:32:42 +000010887
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010888multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010889 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10890 (ins VTI.RC:$src1, VTI.RC:$src2),
10891 "vpshufbitqmb",
10892 "$src2, $src1", "$src1, $src2",
10893 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010894 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10895 Sched<[itins.Sched]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010896 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10897 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10898 "vpshufbitqmb",
10899 "$src2, $src1", "$src1, $src2",
10900 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010901 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10902 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10903 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010904}
10905
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010906multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
Coby Tayreee8bdd382017-11-23 11:15:50 +000010907 let Predicates = [HasBITALG] in
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010908 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010909 let Predicates = [HasBITALG, HasVLX] in {
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010910 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10911 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010912 }
10913}
10914
Simon Pilgrim07b4c592017-12-01 16:35:57 +000010915// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10916defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
Coby Tayreee8bdd382017-11-23 11:15:50 +000010917
Coby Tayreed8b17be2017-11-26 09:36:41 +000010918//===----------------------------------------------------------------------===//
10919// GFNI
10920//===----------------------------------------------------------------------===//
10921
10922multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10923 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10924 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10925 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10926 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10927 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10928 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10929 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10930 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10931 }
10932}
10933
Craig Topperb18d6222018-01-06 07:18:08 +000010934defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10935 EVEX_CD8<8, CD8VF>, T8PD;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010936
10937multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
Simon Pilgrim36be8522017-11-29 18:52:20 +000010938 OpndItins itins, X86VectorVTInfo VTI,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010939 X86VectorVTInfo BcstVTI>
Simon Pilgrim36be8522017-11-29 18:52:20 +000010940 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010941 let ExeDomain = VTI.ExeDomain in
10942 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10943 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10944 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10945 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10946 (OpNode (VTI.VT VTI.RC:$src1),
10947 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
Simon Pilgrim36be8522017-11-29 18:52:20 +000010948 (i8 imm:$src3)), itins.rm>, EVEX_B,
10949 Sched<[itins.Sched.Folded, ReadAfterLd]>;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010950}
10951
Simon Pilgrim36be8522017-11-29 18:52:20 +000010952multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10953 OpndItins itins> {
Coby Tayreed8b17be2017-11-26 09:36:41 +000010954 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
Simon Pilgrim36be8522017-11-29 18:52:20 +000010955 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010956 v8i64_info>, EVEX_V512;
10957 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
Simon Pilgrim36be8522017-11-29 18:52:20 +000010958 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010959 v4i64x_info>, EVEX_V256;
Simon Pilgrim36be8522017-11-29 18:52:20 +000010960 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
Coby Tayreed8b17be2017-11-26 09:36:41 +000010961 v2i64x_info>, EVEX_V128;
10962 }
10963}
10964
Craig Topperb18d6222018-01-06 07:18:08 +000010965defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10966 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10967 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10968defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10969 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10970 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
Coby Tayreed8b17be2017-11-26 09:36:41 +000010971