blob: e69bbba9f193d89dfb22bb1b0f8e47323ea4bd2c [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10//-----------------------------------
11// Vector Specific
12//-----------------------------------
13
14//
15// All vector instructions derive from NVPTXVecInst
16//
17
18class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern,
19 NVPTXInst sInst=NOP>
20 : NVPTXInst<outs, ins, asmstr, pattern> {
21 NVPTXInst scalarInst=sInst;
22}
23
24let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
25// Extract v2i16
26def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
27 (ins V2I16Regs:$src, i8imm:$c),
28 "mov.u16 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000029 [(set Int16Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000030 (v2i16 V2I16Regs:$src), imm:$c))],
31 IMOV16rr>;
32
33// Extract v4i16
34def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
35 (ins V4I16Regs:$src, i8imm:$c),
36 "mov.u16 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000037 [(set Int16Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000038 (v4i16 V4I16Regs:$src), imm:$c))],
39 IMOV16rr>;
40
41// Extract v2i8
42def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
43 (ins V2I8Regs:$src, i8imm:$c),
44 "mov.u16 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000045 [(set Int8Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000046 (v2i8 V2I8Regs:$src), imm:$c))],
47 IMOV8rr>;
48
49// Extract v4i8
50def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
51 (ins V4I8Regs:$src, i8imm:$c),
52 "mov.u16 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000053 [(set Int8Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000054 (v4i8 V4I8Regs:$src), imm:$c))],
55 IMOV8rr>;
56
57// Extract v2i32
58def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
59 (ins V2I32Regs:$src, i8imm:$c),
60 "mov.u32 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000061 [(set Int32Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000062 (v2i32 V2I32Regs:$src), imm:$c))],
63 IMOV32rr>;
64
65// Extract v2f32
66def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
67 (ins V2F32Regs:$src, i8imm:$c),
68 "mov.f32 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000069 [(set Float32Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000070 (v2f32 V2F32Regs:$src), imm:$c))],
71 FMOV32rr>;
72
73// Extract v2i64
74def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
75 (ins V2I64Regs:$src, i8imm:$c),
76 "mov.u64 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000077 [(set Int64Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000078 (v2i64 V2I64Regs:$src), imm:$c))],
79 IMOV64rr>;
80
81// Extract v2f64
82def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
83 (ins V2F64Regs:$src, i8imm:$c),
84 "mov.f64 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000085 [(set Float64Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000086 (v2f64 V2F64Regs:$src), imm:$c))],
87 FMOV64rr>;
88
89// Extract v4i32
90def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
91 (ins V4I32Regs:$src, i8imm:$c),
92 "mov.u32 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +000093 [(set Int32Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +000094 (v4i32 V4I32Regs:$src), imm:$c))],
95 IMOV32rr>;
96
97// Extract v4f32
98def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
99 (ins V4F32Regs:$src, i8imm:$c),
100 "mov.f32 \t$dst, $src${c:vecelem};",
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000101 [(set Float32Regs:$dst, (extractelt
Justin Holewinskiae556d32012-05-04 20:18:50 +0000102 (v4f32 V4F32Regs:$src), imm:$c))],
103 FMOV32rr>;
104}
105
106let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in {
107// Insert v2i8
108def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
109 (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c),
110 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
111 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
112 [(set V2I8Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000113 (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000114
115// Insert v4i8
116def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
117 (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c),
118 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
119 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
120 [(set V4I8Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000121 (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000122
123// Insert v2i16
124def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
125 (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c),
126 "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
127 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
128 [(set V2I16Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000129 (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))],
130 IMOV16rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000131
132// Insert v4i16
133def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
134 (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c),
135 "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
136 "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
137 [(set V4I16Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000138 (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))],
139 IMOV16rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000140
141// Insert v2i32
142def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
143 (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c),
144 "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
145 "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
146 [(set V2I32Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000147 (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))],
148 IMOV32rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000149
150// Insert v2f32
151def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
152 (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c),
153 "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
154 "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
155 [(set V2F32Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000156 (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))],
157 FMOV32rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000158
159// Insert v2i64
160def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
161 (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c),
162 "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
163 "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
164 [(set V2I64Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000165 (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))],
166 IMOV64rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000167
168// Insert v2f64
169def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
170 (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c),
171 "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
172 "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
173 [(set V2F64Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000174 (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))],
175 FMOV64rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000176
177// Insert v4i32
178def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
179 (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c),
180 "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
181 "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
182 [(set V4I32Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000183 (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))],
184 IMOV32rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000185
186// Insert v4f32
187def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
188 (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c),
189 "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
190 "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
191 [(set V4F32Regs:$dst,
Matt Arsenaultfbd9bbf2015-12-11 19:20:16 +0000192 (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))],
193 FMOV32rr>;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000194}
195
196class BinOpAsmString<string c> {
197 string s = c;
198}
199
200class V4AsmStr<string opcode> : BinOpAsmString<
201 !strconcat(!strconcat(!strconcat(!strconcat(
202 !strconcat(!strconcat(!strconcat(
203 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
204 opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"),
205 opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"),
206 opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>;
207
208class V2AsmStr<string opcode> : BinOpAsmString<
209 !strconcat(!strconcat(!strconcat(
210 opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"),
211 opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>;
212
213class V4MADStr<string opcode> : BinOpAsmString<
214 !strconcat(!strconcat(!strconcat(!strconcat(
215 !strconcat(!strconcat(!strconcat(
216 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
217 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"),
218 opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"),
219 opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>;
220
221class V2MADStr<string opcode> : BinOpAsmString<
222 !strconcat(!strconcat(!strconcat(
223 opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"),
224 opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>;
225
226class V4UnaryStr<string opcode> : BinOpAsmString<
227 !strconcat(!strconcat(!strconcat(!strconcat(
228 !strconcat(!strconcat(!strconcat(
229 opcode, " \t${dst}_0, ${a}_0;\n\t"),
230 opcode), " \t${dst}_1, ${a}_1;\n\t"),
231 opcode), " \t${dst}_2, ${a}_2;\n\t"),
232 opcode), " \t${dst}_3, ${a}_3;")>;
233
234class V2UnaryStr<string opcode> : BinOpAsmString<
235 !strconcat(!strconcat(!strconcat(
236 opcode, " \t${dst}_0, ${a}_0;\n\t"),
237 opcode), " \t${dst}_1, ${a}_1;")>;
238
239class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass,
240 NVPTXInst sInst=NOP> :
241 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b),
242 asmstr.s,
243 [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))],
244 sInst>;
245
246class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1,
247 NVPTXRegClass regclass2, NVPTXInst sInst=NOP> :
248 NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b),
249 asmstr.s,
250 [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))],
251 sInst>;
252
253class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass,
254 NVPTXInst sInst=NOP> :
255 NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a),
256 asmstr.s,
257 [(set regclass:$dst, (OpNode regclass:$a))], sInst>;
258
259multiclass IntBinVOp<string asmstr, SDNode OpNode,
260 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst
261 i16op=NOP, NVPTXInst i8op=NOP> {
262 def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs,
263 i64op>;
264 def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs,
265 i32op>;
266 def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs,
267 i32op>;
268 def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs,
269 i16op>;
270 def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs,
271 i16op>;
272 def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs,
273 i8op>;
274 def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs,
275 i8op>;
276}
277
278multiclass FloatBinVOp<string asmstr, SDNode OpNode,
279 NVPTXInst f64=NOP, NVPTXInst f32=NOP,
280 NVPTXInst f32_ftz=NOP> {
281 def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode,
282 V2F64Regs, f64>;
283 def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
284 V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
285 def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode,
286 V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>;
287 def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode,
288 V4F32Regs, f32>;
289 def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode,
290 V2F32Regs, f32>;
291}
292
293multiclass IntUnaryVOp<string asmstr, PatFrag OpNode,
294 NVPTXInst i64op=NOP, NVPTXInst i32op=NOP,
295 NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> {
296 def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode,
297 V2I64Regs, i64op>;
298 def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode,
299 V4I32Regs, i32op>;
300 def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode,
301 V2I32Regs, i32op>;
302 def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
303 V4I16Regs, i16op>;
304 def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
305 V2I16Regs, i16op>;
306 def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode,
307 V4I8Regs, i8op>;
308 def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode,
309 V2I8Regs, i8op>;
310}
311
312
313// Integer Arithmetic
314let VecInstType=isVecOther.Value in {
315defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>;
316defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>;
317
318def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs,
319 ADDCCi32rr>;
320def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs,
321 ADDCCi32rr>;
322def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs,
323 SUBCCi32rr>;
324def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs,
325 SUBCCi32rr>;
326def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs,
327 ADDCCCi32rr>;
328def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs,
329 ADDCCCi32rr>;
330def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs,
331 SUBCCCi32rr>;
332def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs,
333 SUBCCCi32rr>;
334
335def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs,
336 SHLi64rr>;
337def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs,
338 SHLi32rr>;
339def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs,
340 SHLi32rr>;
341def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs,
342 SHLi16rr>;
343def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs,
344 SHLi16rr>;
345def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs,
346 SHLi8rr>;
347def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs,
348 SHLi8rr>;
349}
350
351// cvt to v*i32, helpers for shift
352class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr,
353 NVPTXInst sInst=NOP> :
354 NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>;
355
356class VecCVTStrHelper<string op, string dest, string src> {
357 string s=!strconcat(op, !strconcat("\t",
358 !strconcat(dest, !strconcat(", ", !strconcat(src, ";")))));
359}
360
361class Vec2CVTStr<string op> {
362 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
363 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s));
364}
365
366class Vec4CVTStr<string op> {
367 string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s,
368 !strconcat("\n\t",
369 !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s,
370 !strconcat("\n\t",
371 !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s,
372 !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s))))));
373}
374
375let VecInstType=isVecOther.Value in {
376def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs,
377 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
378def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs,
379 Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
380def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs,
381 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>;
382def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs,
383 Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>;
384def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs,
385 Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>;
386}
387
388def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2),
389 (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
390def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2),
391 (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
392def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2),
393 (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
394
395def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2),
396 (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
397def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2),
398 (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
399
400let VecInstType=isVecOther.Value in {
401def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs,
402 SRAi64rr>;
403def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs,
404 SRAi32rr>;
405def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs,
406 SRAi32rr>;
407def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs,
408 SRAi16rr>;
409def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs,
410 SRAi16rr>;
411def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs,
412 SRAi8rr>;
413def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs,
414 SRAi8rr>;
415
416def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs,
417 SRLi64rr>;
418def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs,
419 SRLi32rr>;
420def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs,
421 SRLi32rr>;
422def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs,
423 SRLi16rr>;
424def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs,
425 SRLi16rr>;
426def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs,
427 SRLi8rr>;
428def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs,
429 SRLi8rr>;
430
431defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr,
432 MULTi8rr>;
433defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr,
434 MULTHSi16rr,
435 MULTHSi8rr>;
436defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr,
437 MULTHUi16rr,
438 MULTHUi8rr>;
439defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr,
440 SDIVi8rr>;
441defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr,
442 UDIVi8rr>;
443defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr,
444 SREMi8rr>;
445defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr,
446 UREMi8rr>;
447}
448
449def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2),
450 (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
451def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2),
452 (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
453def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2),
454 (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
455
456def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2),
457 (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
458def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2),
459 (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
460
461def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2),
462 (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>;
463def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2),
464 (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>;
465def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2),
466 (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>;
467
468def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2),
469 (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>;
470def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2),
471 (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>;
472
473multiclass VMAD<string asmstr, NVPTXRegClass regclassv4,
474 NVPTXRegClass regclassv2,
475 SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP,
476 Predicate Pred> {
477 def V4 : NVPTXVecInst<(outs regclassv4:$dst),
478 (ins regclassv4:$a, regclassv4:$b, regclassv4:$c),
479 V4MADStr<asmstr>.s,
480 [(set regclassv4:$dst,
481 (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))],
482 sop>,
483 Requires<[Pred]>;
484 def V2 : NVPTXVecInst<(outs regclassv2:$dst),
485 (ins regclassv2:$a, regclassv2:$b, regclassv2:$c),
486 V2MADStr<asmstr>.s,
487 [(set regclassv2:$dst,
488 (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))],
489 sop>,
490 Requires<[Pred]>;
491}
492
493multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
494 Predicate Pred> {
495 def V2 : NVPTXVecInst<(outs regclass:$dst),
496 (ins regclass:$a, regclass:$b, regclass:$c),
497 V2MADStr<asmstr>.s,
498 [(set regclass:$dst, (add
499 (mul regclass:$a, regclass:$b), regclass:$c))], sop>,
500 Requires<[Pred]>;
501}
502multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP,
503 Predicate Pred> {
504 def V2 : NVPTXVecInst<(outs regclass:$dst),
505 (ins regclass:$a, regclass:$b, regclass:$c),
506 V2MADStr<asmstr>.s,
507 [(set regclass:$dst, (fadd
508 (fmul regclass:$a, regclass:$b), regclass:$c))], sop>,
509 Requires<[Pred]>;
510}
511
512let VecInstType=isVecOther.Value in {
513defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>;
514defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr,
515 true>;
516defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr,
517 true>;
518defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>;
519
520defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>;
521
522defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>;
523defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>;
524defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>;
525
526defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
527 FMAD32_ftzrrr, doFMADF32_ftz>;
528defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul,
529 FMA32_ftzrrr, doFMAF32_ftz>;
530defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr,
531 doFMADF32>;
532defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr,
533 doFMAF32>;
534defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>;
535}
536
537let VecInstType=isVecOther.Value in {
538def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs,
539 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
540def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs,
541 FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>;
542def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs,
543 FDIV32rr_prec>, Requires<[reqPTX20]>;
544def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs,
545 FDIV32rr_prec>, Requires<[reqPTX20]>;
546def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs,
547 FDIV32rr_ftz>, Requires<[doF32FTZ]>;
548def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs,
549 FDIV32rr_ftz>, Requires<[doF32FTZ]>;
550def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>;
551def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>;
552def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>;
553}
554
555def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>;
556
557let VecInstType=isVecOther.Value in {
558def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs,
559 FNEGf32_ftz>, Requires<[doF32FTZ]>;
560def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs,
561 FNEGf32_ftz>, Requires<[doF32FTZ]>;
562def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>;
563def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>;
564def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>;
565
566// Logical Arithmetic
567defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>;
568defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>;
569defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>;
570
571defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>;
572}
573
574
575multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
576 def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)),
577 (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>,
578 Requires<[Pred]>;
579
580 def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c),
581 (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>,
582 Requires<[Pred]>;
583}
584
585defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>;
586defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>;
587defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>;
588defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>;
589
590multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
591 def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)),
592 (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>,
593 Requires<[Pred]>;
594
595 def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c),
596 (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>,
597 Requires<[Pred]>;
598}
599
600defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>;
601defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>;
602defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>;
603defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>;
604
605multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
606 def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)),
607 (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>,
608 Requires<[Pred]>;
609
610 def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c),
611 (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>,
612 Requires<[Pred]>;
613}
614
615defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>;
616
617class VecModStr<string vecsize, string elem, string extra, string l="">
618{
619 string t1 = !strconcat("${c", elem);
620 string t2 = !strconcat(t1, ":vecv");
621 string t3 = !strconcat(t2, vecsize);
622 string t4 = !strconcat(t3, extra);
623 string t5 = !strconcat(t4, l);
624 string s = !strconcat(t5, "}");
625}
626class ShuffleOneLine<string vecsize, string elem, string type>
627{
628 string t1 = VecModStr<vecsize, elem, "comm", "1">.s;
629 string t2 = !strconcat(t1, "mov.");
630 string t3 = !strconcat(t2, type);
631 string t4 = !strconcat(t3, " \t${dst}_");
632 string t5 = !strconcat(t4, elem);
633 string t6 = !strconcat(t5, ", $src1");
634 string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s);
635 string t8 = !strconcat(t7, ";\n\t");
636 string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s);
637 string t10 = !strconcat(t9, "mov.");
638 string t11 = !strconcat(t10, type);
639 string t12 = !strconcat(t11, " \t${dst}_");
640 string t13 = !strconcat(t12, elem);
641 string t14 = !strconcat(t13, ", $src2");
642 string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s);
643 string s = !strconcat(t15, ";");
644}
645class ShuffleAsmStr2<string type>
646{
647 string t1 = ShuffleOneLine<"2", "0", type>.s;
648 string t2 = !strconcat(t1, "\n\t");
649 string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s);
650}
651class ShuffleAsmStr4<string type>
652{
653 string t1 = ShuffleOneLine<"4", "0", type>.s;
654 string t2 = !strconcat(t1, "\n\t");
655 string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s);
656 string t4 = !strconcat(t3, "\n\t");
657 string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s);
658 string t6 = !strconcat(t5, "\n\t");
659 string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
660}
661
Craig Topperc50d64b2014-11-26 00:46:26 +0000662let hasSideEffects=0, VecInstType=isVecShuffle.Value in {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000663def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
664 (ins V4F32Regs:$src1, V4F32Regs:$src2,
665 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
666 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
667 ShuffleAsmStr4<"f32">.s),
668 [], FMOV32rr>;
669
670def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst),
671 (ins V4I32Regs:$src1, V4I32Regs:$src2,
672 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
673 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
674 ShuffleAsmStr4<"u32">.s),
675 [], IMOV32rr>;
676
677def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst),
678 (ins V4I16Regs:$src1, V4I16Regs:$src2,
679 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
680 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
681 ShuffleAsmStr4<"u16">.s),
682 [], IMOV16rr>;
683
684def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst),
685 (ins V4I8Regs:$src1, V4I8Regs:$src2,
686 i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
687 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t",
688 ShuffleAsmStr4<"u16">.s),
689 [], IMOV8rr>;
690
691def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst),
692 (ins V2F32Regs:$src1, V2F32Regs:$src2,
693 i8imm:$c0, i8imm:$c1),
694 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
695 ShuffleAsmStr2<"f32">.s),
696 [], FMOV32rr>;
697
698def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst),
699 (ins V2I32Regs:$src1, V2I32Regs:$src2,
700 i8imm:$c0, i8imm:$c1),
701 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
702 ShuffleAsmStr2<"u32">.s),
703 [], IMOV32rr>;
704
705def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst),
706 (ins V2I8Regs:$src1, V2I8Regs:$src2,
707 i8imm:$c0, i8imm:$c1),
708 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
709 ShuffleAsmStr2<"u16">.s),
710 [], IMOV8rr>;
711
712def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst),
713 (ins V2I16Regs:$src1, V2I16Regs:$src2,
714 i8imm:$c0, i8imm:$c1),
715 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
716 ShuffleAsmStr2<"u16">.s),
717 [], IMOV16rr>;
718
719def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst),
720 (ins V2F64Regs:$src1, V2F64Regs:$src2,
721 i8imm:$c0, i8imm:$c1),
722 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
723 ShuffleAsmStr2<"f64">.s),
724 [], FMOV64rr>;
725
726def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
727 (ins V2I64Regs:$src1, V2I64Regs:$src2,
728 i8imm:$c0, i8imm:$c1),
729 !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t",
730 ShuffleAsmStr2<"u64">.s),
731 [], IMOV64rr>;
732}
733
734def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
735 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000736 return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000737}]>;
738def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
739 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000740 return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000741}]>;
742def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
743 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000744 return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000745}]>;
746def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
747 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000748 return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000749}]>;
750
751// The spurious call is here to silence a compiler warning about N being
752// unused.
753def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs),
754 (vector_shuffle node:$lhs, node:$rhs),
755 [{ N->getGluedNode(); return true; }]>;
756
757def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)),
758 (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2,
759 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
760
761def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)),
762 (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2,
763 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
764 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
765
766def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)),
767 (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2,
768 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
769
770def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)),
771 (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2,
772 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
773
774def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)),
775 (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2,
776 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
777 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
778
779def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)),
780 (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2,
781 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
782
783def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)),
784 (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2,
785 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
786 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
787
788def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)),
789 (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2,
790 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
791
792def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)),
793 (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2,
794 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op),
795 (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>;
796
797def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)),
798 (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2,
799 (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>;
800
801class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
802 NVPTXInst si>
803 : NVPTXVecInst<(outs vclass:$dst),
804 (ins sclass:$a1, sclass:$a2),
805 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"),
806 [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))],
807 si>;
808class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass,
809 NVPTXInst si>
810 : NVPTXVecInst<(outs vclass:$dst),
811 (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4),
812 !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"),
813 [(set vclass:$dst,
814 (build_vector sclass:$a1, sclass:$a2,
815 sclass:$a3, sclass:$a4))], si>;
816
817let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in {
818def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs,
819 FMOV32rr>;
820def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs,
821 FMOV64rr>;
822
823def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs,
824 IMOV32rr>;
825def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs,
826 IMOV64rr>;
827def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs,
828 IMOV16rr>;
829def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs,
830 IMOV8rr>;
831
832def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs,
833 FMOV32rr>;
834
835def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs,
836 IMOV32rr>;
837def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs,
838 IMOV16rr>;
839def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs,
840 IMOV8rr>;
841}
842
843class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
844 : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src),
845 !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
846 [], sop>;
847
Craig Topperc50d64b2014-11-26 00:46:26 +0000848let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1,
Justin Holewinskiae556d32012-05-04 20:18:50 +0000849 VecInstType=isVecOther.Value in {
850def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
851def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
852
853def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>;
854def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>;
855
856def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>;
857def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>;
858
859def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>;
860def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>;
861
862def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>;
863def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>;
864}
865
866// extract subvector patterns
867def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
868 SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>;
869
870def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)),
871 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0),
872 (V4f32Extract V4F32Regs:$src, 1))>;
873def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)),
874 (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2),
875 (V4f32Extract V4F32Regs:$src, 3))>;
876def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)),
877 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0),
878 (V4i32Extract V4I32Regs:$src, 1))>;
879def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)),
880 (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2),
881 (V4i32Extract V4I32Regs:$src, 3))>;
882def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)),
883 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0),
884 (V4i16Extract V4I16Regs:$src, 1))>;
885def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)),
886 (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2),
887 (V4i16Extract V4I16Regs:$src, 3))>;
888def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)),
889 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0),
890 (V4i8Extract V4I8Regs:$src, 1))>;
891def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)),
892 (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2),
893 (V4i8Extract V4I8Regs:$src, 3))>;
894
895// Select instructions
896class Select_OneLine<string type, string pos> {
897 string t1 = !strconcat("selp.", type);
898 string t2 = !strconcat(t1, " \t${dst}_");
899 string t3 = !strconcat(t2, pos);
900 string t4 = !strconcat(t3, ", ${src1}_");
901 string t5 = !strconcat(t4, pos);
902 string t6 = !strconcat(t5, ", ${src2}_");
903 string t7 = !strconcat(t6, pos);
904 string s = !strconcat(t7, ", $p;");
905}
906
907class Select_Str2<string type> {
908 string t1 = Select_OneLine<type, "0">.s;
909 string t2 = !strconcat(t1, "\n\t");
910 string s = !strconcat(t2, Select_OneLine<type, "1">.s);
911}
912
913class Select_Str4<string type> {
914 string t1 = Select_OneLine<type, "0">.s;
915 string t2 = !strconcat(t1, "\n\t");
916 string t3 = !strconcat(t2, Select_OneLine<type, "1">.s);
917 string t4 = !strconcat(t3, "\n\t");
918 string t5 = !strconcat(t4, Select_OneLine<type, "2">.s);
919 string t6 = !strconcat(t5, "\n\t");
920 string s = !strconcat(t6, Select_OneLine<type, "3">.s);
921
922}
923
924class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop>
925 : NVPTXVecInst<(outs vclass:$dst),
926 (ins vclass:$src1, vclass:$src2, Int1Regs:$p),
927 asmstr,
928 [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1,
929 vclass:$src2))],
930 sop>;
931
932let VecInstType=isVecOther.Value in {
933def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>;
934def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>;
935def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>;
936def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>;
937def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>;
938def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>;
939def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>;
940
941def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>;
942def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>;
943def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>;
944}
945
946// Comparison instructions
947
948// setcc convenience fragments.
949def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs),
950 (setcc node:$lhs, node:$rhs, SETOEQ)>;
951def vsetogt : PatFrag<(ops node:$lhs, node:$rhs),
952 (setcc node:$lhs, node:$rhs, SETOGT)>;
953def vsetoge : PatFrag<(ops node:$lhs, node:$rhs),
954 (setcc node:$lhs, node:$rhs, SETOGE)>;
955def vsetolt : PatFrag<(ops node:$lhs, node:$rhs),
956 (setcc node:$lhs, node:$rhs, SETOLT)>;
957def vsetole : PatFrag<(ops node:$lhs, node:$rhs),
958 (setcc node:$lhs, node:$rhs, SETOLE)>;
959def vsetone : PatFrag<(ops node:$lhs, node:$rhs),
960 (setcc node:$lhs, node:$rhs, SETONE)>;
961def vseto : PatFrag<(ops node:$lhs, node:$rhs),
962 (setcc node:$lhs, node:$rhs, SETO)>;
963def vsetuo : PatFrag<(ops node:$lhs, node:$rhs),
964 (setcc node:$lhs, node:$rhs, SETUO)>;
965def vsetueq : PatFrag<(ops node:$lhs, node:$rhs),
966 (setcc node:$lhs, node:$rhs, SETUEQ)>;
967def vsetugt : PatFrag<(ops node:$lhs, node:$rhs),
968 (setcc node:$lhs, node:$rhs, SETUGT)>;
969def vsetuge : PatFrag<(ops node:$lhs, node:$rhs),
970 (setcc node:$lhs, node:$rhs, SETUGE)>;
971def vsetult : PatFrag<(ops node:$lhs, node:$rhs),
972 (setcc node:$lhs, node:$rhs, SETULT)>;
973def vsetule : PatFrag<(ops node:$lhs, node:$rhs),
974 (setcc node:$lhs, node:$rhs, SETULE)>;
975def vsetune : PatFrag<(ops node:$lhs, node:$rhs),
976 (setcc node:$lhs, node:$rhs, SETUNE)>;
977def vseteq : PatFrag<(ops node:$lhs, node:$rhs),
978 (setcc node:$lhs, node:$rhs, SETEQ)>;
979def vsetgt : PatFrag<(ops node:$lhs, node:$rhs),
980 (setcc node:$lhs, node:$rhs, SETGT)>;
981def vsetge : PatFrag<(ops node:$lhs, node:$rhs),
982 (setcc node:$lhs, node:$rhs, SETGE)>;
983def vsetlt : PatFrag<(ops node:$lhs, node:$rhs),
984 (setcc node:$lhs, node:$rhs, SETLT)>;
985def vsetle : PatFrag<(ops node:$lhs, node:$rhs),
986 (setcc node:$lhs, node:$rhs, SETLE)>;
987def vsetne : PatFrag<(ops node:$lhs, node:$rhs),
988 (setcc node:$lhs, node:$rhs, SETNE)>;
989
990class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass,
991 NVPTXInst sop>
992 : NVPTXVecInst<(outs outrclass:$dst),
993 (ins inrclass:$a, inrclass:$b),
994 "Unsupported",
995 [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))],
996 sop>;
997
998multiclass Vec_Compare_All<PatFrag op,
999 NVPTXInst inst8,
1000 NVPTXInst inst16,
1001 NVPTXInst inst32,
1002 NVPTXInst inst64>
1003{
1004 def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>;
1005 def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>;
1006 def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>;
1007 def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>;
1008 def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>;
1009 def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>;
1010 def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>;
1011}
1012
1013let VecInstType=isVecOther.Value in {
1014 defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16,
1015 ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>;
1016 defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16,
1017 ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>;
1018 defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16,
1019 ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>;
1020 defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16,
1021 ISetULTi32rr_toi32, ISetULTi64rr_toi64>;
1022 defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16,
1023 ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>;
1024 defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16,
1025 ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>;
1026 defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16,
1027 ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>;
1028 defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16,
1029 ISetULEi32rr_toi32, ISetULEi64rr_toi64>;
1030 defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16,
1031 ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>;
1032 defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16,
1033 ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>;
1034 defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16,
1035 ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>;
1036 defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16,
1037 ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>;
1038}
1039
1040multiclass FVec_Compare_All<PatFrag op,
1041 NVPTXInst instf32,
1042 NVPTXInst instf64>
1043{
1044 def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>;
1045 def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>;
1046 def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>;
1047}
1048
1049let VecInstType=isVecOther.Value in {
1050 defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32,
1051 FSetGTf64rr_toi64>;
1052 defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32,
1053 FSetLTf64rr_toi64>;
1054 defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32,
1055 FSetGEf64rr_toi64>;
1056 defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32,
1057 FSetLEf64rr_toi64>;
1058 defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32,
1059 FSetEQf64rr_toi64>;
1060 defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32,
1061 FSetNEf64rr_toi64>;
1062
1063 defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32,
1064 FSetUGTf64rr_toi64>;
1065 defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32,
1066 FSetULTf64rr_toi64>;
1067 defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32,
1068 FSetUGEf64rr_toi64>;
1069 defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32,
1070 FSetULEf64rr_toi64>;
1071 defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32,
1072 FSetUEQf64rr_toi64>;
1073 defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32,
1074 FSetUNEf64rr_toi64>;
1075
1076 defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32,
1077 FSetNUMf64rr_toi64>;
1078 defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32,
1079 FSetNANf64rr_toi64>;
1080}
1081
1082class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1083 NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4),
1084 (ins i32imm:$a, i32imm:$b),
1085 !strconcat(!strconcat("ld.param", opstr),
1086 "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>;
1087
1088class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1089 NVPTXInst<(outs regclass:$d1, regclass:$d2),
1090 (ins i32imm:$a, i32imm:$b),
1091 !strconcat(!strconcat("ld.param", opstr),
1092 "\t{{$d1, $d2}}, [retval0+$b];"), []>;
1093
1094
1095class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> :
1096 NVPTXInst<(outs),
1097 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1098 i32imm:$a, i32imm:$b),
1099 !strconcat(!strconcat("st.param", opstr),
1100 "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>;
1101
1102class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> :
1103 NVPTXInst<(outs),
1104 (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b),
1105 !strconcat(!strconcat("st.param", opstr),
1106 "\t[param$a+$b], {{$s1, $s2}};"), []>;
1107
1108class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> :
1109 NVPTXInst<(outs),
1110 (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4,
1111 i32imm:$a),
1112 !strconcat(!strconcat("st.param", opstr),
1113 "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>;
1114
1115class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> :
1116 NVPTXInst<(outs),
1117 (ins regclass:$s1, regclass:$s2, i32imm:$a),
1118 !strconcat(!strconcat("st.param", opstr),
1119 "\t[func_retval+$a], {{$s1, $s2}};"), []>;
1120
1121def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">;
1122def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">;
1123def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">;
1124
1125def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">;
1126def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">;
1127def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">;
1128def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">;
1129
1130def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">;
1131def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">;
1132def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">;
1133
1134def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">;
1135def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">;
1136def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">;
1137
1138def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">;
1139def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">;
1140def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">;
1141def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">;
1142
1143def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">;
1144def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">;
1145def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">;
1146
1147def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">;
1148def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">;
1149def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">;
1150
1151def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">;
1152def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">;
1153def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">;
1154def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">;
1155
1156def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">;
1157def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">;
1158def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">;
1159
1160class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>:
1161 NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b),
1162 "loadparam : $dst <- [$a, $b]",
1163 [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))],
1164 sop>;
1165
1166class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>
1167 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1168 "storeparam : [$a, $b] <- $val",
1169 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>;
1170
1171class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr,
1172 NVPTXInst sop=NOP>
1173 : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a),
1174 "storeretval : retval[$a] <- $val",
1175 [(StoreRetval (i32 imm:$a), regclass:$val)], sop>;
1176
1177let VecInstType=isVecLD.Value in {
1178def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32",
1179 LoadParamScalar4I32>;
1180def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16",
1181 LoadParamScalar4I16>;
1182def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8",
1183 LoadParamScalar4I8>;
1184
1185def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64",
1186 LoadParamScalar2I64>;
1187def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32",
1188 LoadParamScalar2I32>;
1189def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16",
1190 LoadParamScalar2I16>;
1191def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8",
1192 LoadParamScalar2I8>;
1193
1194def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32",
1195 LoadParamScalar4F32>;
1196def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32",
1197 LoadParamScalar2F32>;
1198def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64",
1199 LoadParamScalar2F64>;
1200}
1201
1202let VecInstType=isVecST.Value in {
1203def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32",
1204 StoreParamScalar4I32>;
1205def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16",
1206 StoreParamScalar4I16>;
1207def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8",
1208 StoreParamScalar4I8>;
1209
1210def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64",
1211 StoreParamScalar2I64>;
1212def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32",
1213 StoreParamScalar2I32>;
1214def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16",
1215 StoreParamScalar2I16>;
1216def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8",
1217 StoreParamScalar2I8>;
1218
1219def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32",
1220 StoreParamScalar4F32>;
1221def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32",
1222 StoreParamScalar2F32>;
1223def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64",
1224 StoreParamScalar2F64>;
1225
1226def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32",
1227 StoreRetvalScalar4I32>;
1228def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16",
1229 StoreRetvalScalar4I16>;
1230def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8",
1231 StoreRetvalScalar4I8>;
1232
1233def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64",
1234 StoreRetvalScalar2I64>;
1235def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32",
1236 StoreRetvalScalar2I32>;
1237def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16",
1238 StoreRetvalScalar2I16>;
1239def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8",
1240 StoreRetvalScalar2I8>;
1241
1242def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32",
1243 StoreRetvalScalar4F32>;
1244def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32",
1245 StoreRetvalScalar2F32>;
1246def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64",
1247 StoreRetvalScalar2F64>;
1248
1249}
1250
1251
1252// Int vector to int scalar bit convert
1253// v4i8 -> i32
1254def : Pat<(i32 (bitconvert V4I8Regs:$s)),
1255 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1256 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>;
1257// v4i16 -> i64
1258def : Pat<(i64 (bitconvert V4I16Regs:$s)),
1259 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1260 (V4i16Extract V4I16Regs:$s,1),
1261 (V4i16Extract V4I16Regs:$s,2),
1262 (V4i16Extract V4I16Regs:$s,3))>;
1263// v2i8 -> i16
1264def : Pat<(i16 (bitconvert V2I8Regs:$s)),
1265 (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>;
1266// v2i16 -> i32
1267def : Pat<(i32 (bitconvert V2I16Regs:$s)),
1268 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0),
1269 (V2i16Extract V2I16Regs:$s,1))>;
1270// v2i32 -> i64
1271def : Pat<(i64 (bitconvert V2I32Regs:$s)),
1272 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0),
1273 (V2i32Extract V2I32Regs:$s,1))>;
1274
1275// Int scalar to int vector bit convert
1276let VecInstType=isVecDest.Value in {
1277// i32 -> v4i8
1278def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s),
1279 "Error!",
1280 [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))],
1281 I32toV4I8>;
1282// i64 -> v4i16
1283def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s),
1284 "Error!",
1285 [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))],
1286 I64toV4I16>;
1287// i16 -> v2i8
1288def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s),
1289 "Error!",
1290 [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))],
1291 I16toV2I8>;
1292// i32 -> v2i16
1293def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s),
1294 "Error!",
1295 [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))],
1296 I32toV2I16>;
1297// i64 -> v2i32
1298def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s),
1299 "Error!",
1300 [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))],
1301 I64toV2I32>;
1302}
1303
1304// Int vector to int vector bit convert
1305// v4i8 -> v2i16
1306def : Pat<(v2i16 (bitconvert V4I8Regs:$s)),
1307 (VecI32toV2I16
1308 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1309 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1310// v4i16 -> v2i32
1311def : Pat<(v2i32 (bitconvert V4I16Regs:$s)),
1312 (VecI64toV2I32
1313 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1314 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1315// v2i16 -> v4i8
1316def : Pat<(v4i8 (bitconvert V2I16Regs:$s)),
1317 (VecI32toV4I8
1318 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1319// v2i32 -> v4i16
1320def : Pat<(v4i16 (bitconvert V2I32Regs:$s)),
1321 (VecI64toV4I16
1322 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1323// v2i64 -> v4i32
1324def : Pat<(v4i32 (bitconvert V2I64Regs:$s)),
1325 (Build_Vector4_i32
1326 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0),
1327 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1),
1328 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0),
1329 (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>;
1330// v4i32 -> v2i64
1331def : Pat<(v2i64 (bitconvert V4I32Regs:$s)),
1332 (Build_Vector2_i64
1333 (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)),
1334 (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>;
1335
1336// Fp scalar to fp vector convert
1337// f64 -> v2f32
1338let VecInstType=isVecDest.Value in {
1339def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s),
1340 "Error!",
1341 [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))],
1342 F64toV2F32>;
1343}
1344
1345// Fp vector to fp scalar convert
1346// v2f32 -> f64
1347def : Pat<(f64 (bitconvert V2F32Regs:$s)),
1348 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>;
1349
1350// Fp scalar to int vector convert
1351// f32 -> v4i8
1352def : Pat<(v4i8 (bitconvert Float32Regs:$s)),
1353 (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>;
1354// f32 -> v2i16
1355def : Pat<(v2i16 (bitconvert Float32Regs:$s)),
1356 (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>;
1357// f64 -> v4i16
1358def : Pat<(v4i16 (bitconvert Float64Regs:$s)),
1359 (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>;
1360// f64 -> v2i32
1361def : Pat<(v2i32 (bitconvert Float64Regs:$s)),
1362 (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>;
1363
1364// Int vector to fp scalar convert
1365// v4i8 -> f32
1366def : Pat<(f32 (bitconvert V4I8Regs:$s)),
1367 (BITCONVERT_32_I2F
1368 (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1),
1369 (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>;
1370// v4i16 -> f64
1371def : Pat<(f64 (bitconvert V4I16Regs:$s)),
1372 (BITCONVERT_64_I2F
1373 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1),
1374 (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>;
1375// v2i16 -> f32
1376def : Pat<(f32 (bitconvert V2I16Regs:$s)),
1377 (BITCONVERT_32_I2F
1378 (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>;
1379// v2i32 -> f64
1380def : Pat<(f64 (bitconvert V2I32Regs:$s)),
1381 (BITCONVERT_64_I2F
1382 (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>;
1383
1384// Int scalar to fp vector convert
1385// i64 -> v2f32
1386def : Pat<(v2f32 (bitconvert Int64Regs:$s)),
1387 (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>;
1388
1389// Fp vector to int scalar convert
1390// v2f32 -> i64
1391def : Pat<(i64 (bitconvert V2F32Regs:$s)),
1392 (BITCONVERT_64_F2I
1393 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>;
1394
1395// Int vector to fp vector convert
1396// v2i64 -> v4f32
1397def : Pat<(v4f32 (bitconvert V2I64Regs:$s)),
1398 (Build_Vector4_f32
1399 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1400 (V2i64Extract V2I64Regs:$s, 0)), 0)),
1401 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1402 (V2i64Extract V2I64Regs:$s, 0)), 1)),
1403 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1404 (V2i64Extract V2I64Regs:$s, 1)), 0)),
1405 (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32
1406 (V2i64Extract V2I64Regs:$s, 1)), 1)))>;
1407// v2i64 -> v2f64
1408def : Pat<(v2f64 (bitconvert V2I64Regs:$s)),
1409 (Build_Vector2_f64
1410 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)),
1411 (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>;
1412// v2i32 -> v2f32
1413def : Pat<(v2f32 (bitconvert V2I32Regs:$s)),
1414 (Build_Vector2_f32
1415 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)),
1416 (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>;
1417// v4i32 -> v2f64
1418def : Pat<(v2f64 (bitconvert V4I32Regs:$s)),
1419 (Build_Vector2_f64
1420 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0),
1421 (V4i32Extract V4I32Regs:$s,1))),
1422 (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2),
1423 (V4i32Extract V4I32Regs:$s,3))))>;
1424// v4i32 -> v4f32
1425def : Pat<(v4f32 (bitconvert V4I32Regs:$s)),
1426 (Build_Vector4_f32
1427 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)),
1428 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)),
1429 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)),
1430 (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>;
1431// v4i16 -> v2f32
1432def : Pat<(v2f32 (bitconvert V4I16Regs:$s)),
1433 (VecF64toV2F32 (BITCONVERT_64_I2F
1434 (V4I16toI64 (V4i16Extract V4I16Regs:$s,0),
1435 (V4i16Extract V4I16Regs:$s,1),
1436 (V4i16Extract V4I16Regs:$s,2),
1437 (V4i16Extract V4I16Regs:$s,3))))>;
1438
1439// Fp vector to int vector convert
1440// v2i64 <- v4f32
1441def : Pat<(v2i64 (bitconvert V4F32Regs:$s)),
1442 (Build_Vector2_i64
1443 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0),
1444 (V4f32Extract V4F32Regs:$s,1))),
1445 (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2),
1446 (V4f32Extract V4F32Regs:$s,3))))>;
1447// v2i64 <- v2f64
1448def : Pat<(v2i64 (bitconvert V2F64Regs:$s)),
1449 (Build_Vector2_i64
1450 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)),
1451 (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>;
1452// v2i32 <- v2f32
1453def : Pat<(v2i32 (bitconvert V2F32Regs:$s)),
1454 (Build_Vector2_i32
1455 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)),
1456 (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>;
1457// v4i32 <- v2f64
1458def : Pat<(v4i32 (bitconvert V2F64Regs:$s)),
1459 (Build_Vector4_i32
1460 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1461 (V2f64Extract V2F64Regs:$s, 0)), 0)),
1462 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1463 (V2f64Extract V2F64Regs:$s, 0)), 1)),
1464 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1465 (V2f64Extract V2F64Regs:$s, 1)), 0)),
1466 (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32
1467 (V2f64Extract V2F64Regs:$s, 1)), 1)))>;
1468// v4i32 <- v4f32
1469def : Pat<(v4i32 (bitconvert V4F32Regs:$s)),
1470 (Build_Vector4_i32
1471 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)),
1472 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)),
1473 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)),
1474 (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>;
1475// v4i16 <- v2f32
1476def : Pat<(v4i16 (bitconvert V2F32Regs:$s)),
1477 (VecI64toV4I16 (BITCONVERT_64_F2I
1478 (V2F32toF64 (V2f32Extract V2F32Regs:$s,0),
1479 (V2f32Extract V2F32Regs:$s,1))))>;