Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 1 | //===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | //----------------------------------- |
| 11 | // Vector Specific |
| 12 | //----------------------------------- |
| 13 | |
| 14 | // |
| 15 | // All vector instructions derive from NVPTXVecInst |
| 16 | // |
| 17 | |
| 18 | class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern, |
| 19 | NVPTXInst sInst=NOP> |
| 20 | : NVPTXInst<outs, ins, asmstr, pattern> { |
| 21 | NVPTXInst scalarInst=sInst; |
| 22 | } |
| 23 | |
| 24 | let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { |
| 25 | // Extract v2i16 |
| 26 | def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), |
| 27 | (ins V2I16Regs:$src, i8imm:$c), |
| 28 | "mov.u16 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 29 | [(set Int16Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 30 | (v2i16 V2I16Regs:$src), imm:$c))], |
| 31 | IMOV16rr>; |
| 32 | |
| 33 | // Extract v4i16 |
| 34 | def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), |
| 35 | (ins V4I16Regs:$src, i8imm:$c), |
| 36 | "mov.u16 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 37 | [(set Int16Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 38 | (v4i16 V4I16Regs:$src), imm:$c))], |
| 39 | IMOV16rr>; |
| 40 | |
| 41 | // Extract v2i8 |
| 42 | def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), |
| 43 | (ins V2I8Regs:$src, i8imm:$c), |
| 44 | "mov.u16 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 45 | [(set Int8Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 46 | (v2i8 V2I8Regs:$src), imm:$c))], |
| 47 | IMOV8rr>; |
| 48 | |
| 49 | // Extract v4i8 |
| 50 | def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), |
| 51 | (ins V4I8Regs:$src, i8imm:$c), |
| 52 | "mov.u16 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 53 | [(set Int8Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 54 | (v4i8 V4I8Regs:$src), imm:$c))], |
| 55 | IMOV8rr>; |
| 56 | |
| 57 | // Extract v2i32 |
| 58 | def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), |
| 59 | (ins V2I32Regs:$src, i8imm:$c), |
| 60 | "mov.u32 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 61 | [(set Int32Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 62 | (v2i32 V2I32Regs:$src), imm:$c))], |
| 63 | IMOV32rr>; |
| 64 | |
| 65 | // Extract v2f32 |
| 66 | def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), |
| 67 | (ins V2F32Regs:$src, i8imm:$c), |
| 68 | "mov.f32 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 69 | [(set Float32Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 70 | (v2f32 V2F32Regs:$src), imm:$c))], |
| 71 | FMOV32rr>; |
| 72 | |
| 73 | // Extract v2i64 |
| 74 | def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), |
| 75 | (ins V2I64Regs:$src, i8imm:$c), |
| 76 | "mov.u64 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 77 | [(set Int64Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 78 | (v2i64 V2I64Regs:$src), imm:$c))], |
| 79 | IMOV64rr>; |
| 80 | |
| 81 | // Extract v2f64 |
| 82 | def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), |
| 83 | (ins V2F64Regs:$src, i8imm:$c), |
| 84 | "mov.f64 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 85 | [(set Float64Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 86 | (v2f64 V2F64Regs:$src), imm:$c))], |
| 87 | FMOV64rr>; |
| 88 | |
| 89 | // Extract v4i32 |
| 90 | def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), |
| 91 | (ins V4I32Regs:$src, i8imm:$c), |
| 92 | "mov.u32 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 93 | [(set Int32Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 94 | (v4i32 V4I32Regs:$src), imm:$c))], |
| 95 | IMOV32rr>; |
| 96 | |
| 97 | // Extract v4f32 |
| 98 | def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), |
| 99 | (ins V4F32Regs:$src, i8imm:$c), |
| 100 | "mov.f32 \t$dst, $src${c:vecelem};", |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 101 | [(set Float32Regs:$dst, (extractelt |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 102 | (v4f32 V4F32Regs:$src), imm:$c))], |
| 103 | FMOV32rr>; |
| 104 | } |
| 105 | |
| 106 | let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { |
| 107 | // Insert v2i8 |
| 108 | def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), |
| 109 | (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), |
| 110 | "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 111 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 112 | [(set V2I8Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 113 | (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 114 | |
| 115 | // Insert v4i8 |
| 116 | def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), |
| 117 | (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), |
| 118 | "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 119 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 120 | [(set V4I8Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 121 | (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 122 | |
| 123 | // Insert v2i16 |
| 124 | def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), |
| 125 | (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), |
| 126 | "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 127 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 128 | [(set V2I16Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 129 | (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))], |
| 130 | IMOV16rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 131 | |
| 132 | // Insert v4i16 |
| 133 | def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), |
| 134 | (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), |
| 135 | "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 136 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 137 | [(set V4I16Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 138 | (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))], |
| 139 | IMOV16rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 140 | |
| 141 | // Insert v2i32 |
| 142 | def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), |
| 143 | (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), |
| 144 | "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" |
| 145 | "\n\tmov.u32 \t$dst${c:vecelem}, $val;", |
| 146 | [(set V2I32Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 147 | (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))], |
| 148 | IMOV32rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 149 | |
| 150 | // Insert v2f32 |
| 151 | def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), |
| 152 | (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), |
| 153 | "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" |
| 154 | "\n\tmov.f32 \t$dst${c:vecelem}, $val;", |
| 155 | [(set V2F32Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 156 | (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))], |
| 157 | FMOV32rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 158 | |
| 159 | // Insert v2i64 |
| 160 | def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), |
| 161 | (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), |
| 162 | "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" |
| 163 | "\n\tmov.u64 \t$dst${c:vecelem}, $val;", |
| 164 | [(set V2I64Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 165 | (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))], |
| 166 | IMOV64rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 167 | |
| 168 | // Insert v2f64 |
| 169 | def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), |
| 170 | (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), |
| 171 | "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" |
| 172 | "\n\tmov.f64 \t$dst${c:vecelem}, $val;", |
| 173 | [(set V2F64Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 174 | (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))], |
| 175 | FMOV64rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 176 | |
| 177 | // Insert v4i32 |
| 178 | def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), |
| 179 | (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), |
| 180 | "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" |
| 181 | "\n\tmov.u32 \t$dst${c:vecelem}, $val;", |
| 182 | [(set V4I32Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 183 | (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))], |
| 184 | IMOV32rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 185 | |
| 186 | // Insert v4f32 |
| 187 | def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), |
| 188 | (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), |
| 189 | "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" |
| 190 | "\n\tmov.f32 \t$dst${c:vecelem}, $val;", |
| 191 | [(set V4F32Regs:$dst, |
Matt Arsenault | fbd9bbf | 2015-12-11 19:20:16 +0000 | [diff] [blame] | 192 | (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))], |
| 193 | FMOV32rr>; |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 194 | } |
| 195 | |
| 196 | class BinOpAsmString<string c> { |
| 197 | string s = c; |
| 198 | } |
| 199 | |
| 200 | class V4AsmStr<string opcode> : BinOpAsmString< |
| 201 | !strconcat(!strconcat(!strconcat(!strconcat( |
| 202 | !strconcat(!strconcat(!strconcat( |
| 203 | opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), |
| 204 | opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), |
| 205 | opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), |
| 206 | opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; |
| 207 | |
| 208 | class V2AsmStr<string opcode> : BinOpAsmString< |
| 209 | !strconcat(!strconcat(!strconcat( |
| 210 | opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), |
| 211 | opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; |
| 212 | |
| 213 | class V4MADStr<string opcode> : BinOpAsmString< |
| 214 | !strconcat(!strconcat(!strconcat(!strconcat( |
| 215 | !strconcat(!strconcat(!strconcat( |
| 216 | opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), |
| 217 | opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), |
| 218 | opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), |
| 219 | opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; |
| 220 | |
| 221 | class V2MADStr<string opcode> : BinOpAsmString< |
| 222 | !strconcat(!strconcat(!strconcat( |
| 223 | opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), |
| 224 | opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; |
| 225 | |
| 226 | class V4UnaryStr<string opcode> : BinOpAsmString< |
| 227 | !strconcat(!strconcat(!strconcat(!strconcat( |
| 228 | !strconcat(!strconcat(!strconcat( |
| 229 | opcode, " \t${dst}_0, ${a}_0;\n\t"), |
| 230 | opcode), " \t${dst}_1, ${a}_1;\n\t"), |
| 231 | opcode), " \t${dst}_2, ${a}_2;\n\t"), |
| 232 | opcode), " \t${dst}_3, ${a}_3;")>; |
| 233 | |
| 234 | class V2UnaryStr<string opcode> : BinOpAsmString< |
| 235 | !strconcat(!strconcat(!strconcat( |
| 236 | opcode, " \t${dst}_0, ${a}_0;\n\t"), |
| 237 | opcode), " \t${dst}_1, ${a}_1;")>; |
| 238 | |
| 239 | class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass, |
| 240 | NVPTXInst sInst=NOP> : |
| 241 | NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), |
| 242 | asmstr.s, |
| 243 | [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], |
| 244 | sInst>; |
| 245 | |
| 246 | class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1, |
| 247 | NVPTXRegClass regclass2, NVPTXInst sInst=NOP> : |
| 248 | NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), |
| 249 | asmstr.s, |
| 250 | [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], |
| 251 | sInst>; |
| 252 | |
| 253 | class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass, |
| 254 | NVPTXInst sInst=NOP> : |
| 255 | NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), |
| 256 | asmstr.s, |
| 257 | [(set regclass:$dst, (OpNode regclass:$a))], sInst>; |
| 258 | |
| 259 | multiclass IntBinVOp<string asmstr, SDNode OpNode, |
| 260 | NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst |
| 261 | i16op=NOP, NVPTXInst i8op=NOP> { |
| 262 | def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs, |
| 263 | i64op>; |
| 264 | def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs, |
| 265 | i32op>; |
| 266 | def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs, |
| 267 | i32op>; |
| 268 | def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs, |
| 269 | i16op>; |
| 270 | def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs, |
| 271 | i16op>; |
| 272 | def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs, |
| 273 | i8op>; |
| 274 | def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs, |
| 275 | i8op>; |
| 276 | } |
| 277 | |
| 278 | multiclass FloatBinVOp<string asmstr, SDNode OpNode, |
| 279 | NVPTXInst f64=NOP, NVPTXInst f32=NOP, |
| 280 | NVPTXInst f32_ftz=NOP> { |
| 281 | def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode, |
| 282 | V2F64Regs, f64>; |
| 283 | def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, |
| 284 | V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; |
| 285 | def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, |
| 286 | V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; |
| 287 | def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode, |
| 288 | V4F32Regs, f32>; |
| 289 | def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode, |
| 290 | V2F32Regs, f32>; |
| 291 | } |
| 292 | |
| 293 | multiclass IntUnaryVOp<string asmstr, PatFrag OpNode, |
| 294 | NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, |
| 295 | NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> { |
| 296 | def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode, |
| 297 | V2I64Regs, i64op>; |
| 298 | def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode, |
| 299 | V4I32Regs, i32op>; |
| 300 | def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode, |
| 301 | V2I32Regs, i32op>; |
| 302 | def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 303 | V4I16Regs, i16op>; |
| 304 | def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 305 | V2I16Regs, i16op>; |
| 306 | def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 307 | V4I8Regs, i8op>; |
| 308 | def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 309 | V2I8Regs, i8op>; |
| 310 | } |
| 311 | |
| 312 | |
| 313 | // Integer Arithmetic |
| 314 | let VecInstType=isVecOther.Value in { |
| 315 | defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; |
| 316 | defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; |
| 317 | |
| 318 | def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs, |
| 319 | ADDCCi32rr>; |
| 320 | def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs, |
| 321 | ADDCCi32rr>; |
| 322 | def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs, |
| 323 | SUBCCi32rr>; |
| 324 | def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs, |
| 325 | SUBCCi32rr>; |
| 326 | def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs, |
| 327 | ADDCCCi32rr>; |
| 328 | def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs, |
| 329 | ADDCCCi32rr>; |
| 330 | def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs, |
| 331 | SUBCCCi32rr>; |
| 332 | def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs, |
| 333 | SUBCCCi32rr>; |
| 334 | |
| 335 | def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs, |
| 336 | SHLi64rr>; |
| 337 | def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs, |
| 338 | SHLi32rr>; |
| 339 | def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs, |
| 340 | SHLi32rr>; |
| 341 | def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs, |
| 342 | SHLi16rr>; |
| 343 | def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs, |
| 344 | SHLi16rr>; |
| 345 | def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs, |
| 346 | SHLi8rr>; |
| 347 | def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs, |
| 348 | SHLi8rr>; |
| 349 | } |
| 350 | |
| 351 | // cvt to v*i32, helpers for shift |
| 352 | class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr, |
| 353 | NVPTXInst sInst=NOP> : |
| 354 | NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; |
| 355 | |
| 356 | class VecCVTStrHelper<string op, string dest, string src> { |
| 357 | string s=!strconcat(op, !strconcat("\t", |
| 358 | !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); |
| 359 | } |
| 360 | |
| 361 | class Vec2CVTStr<string op> { |
| 362 | string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, |
| 363 | !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s)); |
| 364 | } |
| 365 | |
| 366 | class Vec4CVTStr<string op> { |
| 367 | string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, |
| 368 | !strconcat("\n\t", |
| 369 | !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s, |
| 370 | !strconcat("\n\t", |
| 371 | !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s, |
| 372 | !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s)))))); |
| 373 | } |
| 374 | |
| 375 | let VecInstType=isVecOther.Value in { |
| 376 | def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs, |
| 377 | Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; |
| 378 | def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs, |
| 379 | Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; |
| 380 | def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs, |
| 381 | Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; |
| 382 | def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs, |
| 383 | Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; |
| 384 | def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs, |
| 385 | Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>; |
| 386 | } |
| 387 | |
| 388 | def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), |
| 389 | (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; |
| 390 | def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), |
| 391 | (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; |
| 392 | def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), |
| 393 | (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; |
| 394 | |
| 395 | def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), |
| 396 | (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; |
| 397 | def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), |
| 398 | (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; |
| 399 | |
| 400 | let VecInstType=isVecOther.Value in { |
| 401 | def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs, |
| 402 | SRAi64rr>; |
| 403 | def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs, |
| 404 | SRAi32rr>; |
| 405 | def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs, |
| 406 | SRAi32rr>; |
| 407 | def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs, |
| 408 | SRAi16rr>; |
| 409 | def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs, |
| 410 | SRAi16rr>; |
| 411 | def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs, |
| 412 | SRAi8rr>; |
| 413 | def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs, |
| 414 | SRAi8rr>; |
| 415 | |
| 416 | def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs, |
| 417 | SRLi64rr>; |
| 418 | def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs, |
| 419 | SRLi32rr>; |
| 420 | def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs, |
| 421 | SRLi32rr>; |
| 422 | def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs, |
| 423 | SRLi16rr>; |
| 424 | def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs, |
| 425 | SRLi16rr>; |
| 426 | def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs, |
| 427 | SRLi8rr>; |
| 428 | def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs, |
| 429 | SRLi8rr>; |
| 430 | |
| 431 | defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, |
| 432 | MULTi8rr>; |
| 433 | defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, |
| 434 | MULTHSi16rr, |
| 435 | MULTHSi8rr>; |
| 436 | defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, |
| 437 | MULTHUi16rr, |
| 438 | MULTHUi8rr>; |
| 439 | defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, |
| 440 | SDIVi8rr>; |
| 441 | defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, |
| 442 | UDIVi8rr>; |
| 443 | defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, |
| 444 | SREMi8rr>; |
| 445 | defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, |
| 446 | UREMi8rr>; |
| 447 | } |
| 448 | |
| 449 | def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), |
| 450 | (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; |
| 451 | def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), |
| 452 | (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; |
| 453 | def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), |
| 454 | (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; |
| 455 | |
| 456 | def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), |
| 457 | (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; |
| 458 | def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), |
| 459 | (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; |
| 460 | |
| 461 | def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), |
| 462 | (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; |
| 463 | def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), |
| 464 | (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; |
| 465 | def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), |
| 466 | (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; |
| 467 | |
| 468 | def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), |
| 469 | (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; |
| 470 | def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), |
| 471 | (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; |
| 472 | |
| 473 | multiclass VMAD<string asmstr, NVPTXRegClass regclassv4, |
| 474 | NVPTXRegClass regclassv2, |
| 475 | SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP, |
| 476 | Predicate Pred> { |
| 477 | def V4 : NVPTXVecInst<(outs regclassv4:$dst), |
| 478 | (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), |
| 479 | V4MADStr<asmstr>.s, |
| 480 | [(set regclassv4:$dst, |
| 481 | (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], |
| 482 | sop>, |
| 483 | Requires<[Pred]>; |
| 484 | def V2 : NVPTXVecInst<(outs regclassv2:$dst), |
| 485 | (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), |
| 486 | V2MADStr<asmstr>.s, |
| 487 | [(set regclassv2:$dst, |
| 488 | (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], |
| 489 | sop>, |
| 490 | Requires<[Pred]>; |
| 491 | } |
| 492 | |
| 493 | multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, |
| 494 | Predicate Pred> { |
| 495 | def V2 : NVPTXVecInst<(outs regclass:$dst), |
| 496 | (ins regclass:$a, regclass:$b, regclass:$c), |
| 497 | V2MADStr<asmstr>.s, |
| 498 | [(set regclass:$dst, (add |
| 499 | (mul regclass:$a, regclass:$b), regclass:$c))], sop>, |
| 500 | Requires<[Pred]>; |
| 501 | } |
| 502 | multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, |
| 503 | Predicate Pred> { |
| 504 | def V2 : NVPTXVecInst<(outs regclass:$dst), |
| 505 | (ins regclass:$a, regclass:$b, regclass:$c), |
| 506 | V2MADStr<asmstr>.s, |
| 507 | [(set regclass:$dst, (fadd |
| 508 | (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, |
| 509 | Requires<[Pred]>; |
| 510 | } |
| 511 | |
| 512 | let VecInstType=isVecOther.Value in { |
| 513 | defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; |
| 514 | defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, |
| 515 | true>; |
| 516 | defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, |
| 517 | true>; |
| 518 | defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; |
| 519 | |
| 520 | defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; |
| 521 | |
| 522 | defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; |
| 523 | defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; |
| 524 | defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; |
| 525 | |
| 526 | defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, |
| 527 | FMAD32_ftzrrr, doFMADF32_ftz>; |
| 528 | defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, |
| 529 | FMA32_ftzrrr, doFMAF32_ftz>; |
| 530 | defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, |
| 531 | doFMADF32>; |
| 532 | defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, |
| 533 | doFMAF32>; |
| 534 | defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; |
| 535 | } |
| 536 | |
| 537 | let VecInstType=isVecOther.Value in { |
| 538 | def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs, |
| 539 | FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; |
| 540 | def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs, |
| 541 | FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; |
| 542 | def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs, |
| 543 | FDIV32rr_prec>, Requires<[reqPTX20]>; |
| 544 | def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs, |
| 545 | FDIV32rr_prec>, Requires<[reqPTX20]>; |
| 546 | def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs, |
| 547 | FDIV32rr_ftz>, Requires<[doF32FTZ]>; |
| 548 | def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs, |
| 549 | FDIV32rr_ftz>, Requires<[doF32FTZ]>; |
| 550 | def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>; |
| 551 | def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>; |
| 552 | def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>; |
| 553 | } |
| 554 | |
| 555 | def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; |
| 556 | |
| 557 | let VecInstType=isVecOther.Value in { |
| 558 | def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs, |
| 559 | FNEGf32_ftz>, Requires<[doF32FTZ]>; |
| 560 | def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs, |
| 561 | FNEGf32_ftz>, Requires<[doF32FTZ]>; |
| 562 | def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>; |
| 563 | def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>; |
| 564 | def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>; |
| 565 | |
| 566 | // Logical Arithmetic |
| 567 | defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; |
| 568 | defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; |
| 569 | defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; |
| 570 | |
| 571 | defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; |
| 572 | } |
| 573 | |
| 574 | |
| 575 | multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { |
| 576 | def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), |
| 577 | (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, |
| 578 | Requires<[Pred]>; |
| 579 | |
| 580 | def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), |
| 581 | (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, |
| 582 | Requires<[Pred]>; |
| 583 | } |
| 584 | |
| 585 | defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>; |
| 586 | defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>; |
| 587 | defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>; |
| 588 | defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>; |
| 589 | |
| 590 | multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { |
| 591 | def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), |
| 592 | (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, |
| 593 | Requires<[Pred]>; |
| 594 | |
| 595 | def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), |
| 596 | (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, |
| 597 | Requires<[Pred]>; |
| 598 | } |
| 599 | |
| 600 | defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>; |
| 601 | defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>; |
| 602 | defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>; |
| 603 | defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>; |
| 604 | |
| 605 | multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { |
| 606 | def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), |
| 607 | (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, |
| 608 | Requires<[Pred]>; |
| 609 | |
| 610 | def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), |
| 611 | (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, |
| 612 | Requires<[Pred]>; |
| 613 | } |
| 614 | |
| 615 | defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>; |
| 616 | |
| 617 | class VecModStr<string vecsize, string elem, string extra, string l=""> |
| 618 | { |
| 619 | string t1 = !strconcat("${c", elem); |
| 620 | string t2 = !strconcat(t1, ":vecv"); |
| 621 | string t3 = !strconcat(t2, vecsize); |
| 622 | string t4 = !strconcat(t3, extra); |
| 623 | string t5 = !strconcat(t4, l); |
| 624 | string s = !strconcat(t5, "}"); |
| 625 | } |
| 626 | class ShuffleOneLine<string vecsize, string elem, string type> |
| 627 | { |
| 628 | string t1 = VecModStr<vecsize, elem, "comm", "1">.s; |
| 629 | string t2 = !strconcat(t1, "mov."); |
| 630 | string t3 = !strconcat(t2, type); |
| 631 | string t4 = !strconcat(t3, " \t${dst}_"); |
| 632 | string t5 = !strconcat(t4, elem); |
| 633 | string t6 = !strconcat(t5, ", $src1"); |
| 634 | string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s); |
| 635 | string t8 = !strconcat(t7, ";\n\t"); |
| 636 | string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s); |
| 637 | string t10 = !strconcat(t9, "mov."); |
| 638 | string t11 = !strconcat(t10, type); |
| 639 | string t12 = !strconcat(t11, " \t${dst}_"); |
| 640 | string t13 = !strconcat(t12, elem); |
| 641 | string t14 = !strconcat(t13, ", $src2"); |
| 642 | string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s); |
| 643 | string s = !strconcat(t15, ";"); |
| 644 | } |
| 645 | class ShuffleAsmStr2<string type> |
| 646 | { |
| 647 | string t1 = ShuffleOneLine<"2", "0", type>.s; |
| 648 | string t2 = !strconcat(t1, "\n\t"); |
| 649 | string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); |
| 650 | } |
| 651 | class ShuffleAsmStr4<string type> |
| 652 | { |
| 653 | string t1 = ShuffleOneLine<"4", "0", type>.s; |
| 654 | string t2 = !strconcat(t1, "\n\t"); |
| 655 | string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); |
| 656 | string t4 = !strconcat(t3, "\n\t"); |
| 657 | string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); |
| 658 | string t6 = !strconcat(t5, "\n\t"); |
| 659 | string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); |
| 660 | } |
| 661 | |
Craig Topper | c50d64b | 2014-11-26 00:46:26 +0000 | [diff] [blame] | 662 | let hasSideEffects=0, VecInstType=isVecShuffle.Value in { |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 663 | def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), |
| 664 | (ins V4F32Regs:$src1, V4F32Regs:$src2, |
| 665 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 666 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 667 | ShuffleAsmStr4<"f32">.s), |
| 668 | [], FMOV32rr>; |
| 669 | |
| 670 | def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), |
| 671 | (ins V4I32Regs:$src1, V4I32Regs:$src2, |
| 672 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 673 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 674 | ShuffleAsmStr4<"u32">.s), |
| 675 | [], IMOV32rr>; |
| 676 | |
| 677 | def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), |
| 678 | (ins V4I16Regs:$src1, V4I16Regs:$src2, |
| 679 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 680 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 681 | ShuffleAsmStr4<"u16">.s), |
| 682 | [], IMOV16rr>; |
| 683 | |
| 684 | def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), |
| 685 | (ins V4I8Regs:$src1, V4I8Regs:$src2, |
| 686 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 687 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 688 | ShuffleAsmStr4<"u16">.s), |
| 689 | [], IMOV8rr>; |
| 690 | |
| 691 | def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), |
| 692 | (ins V2F32Regs:$src1, V2F32Regs:$src2, |
| 693 | i8imm:$c0, i8imm:$c1), |
| 694 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 695 | ShuffleAsmStr2<"f32">.s), |
| 696 | [], FMOV32rr>; |
| 697 | |
| 698 | def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), |
| 699 | (ins V2I32Regs:$src1, V2I32Regs:$src2, |
| 700 | i8imm:$c0, i8imm:$c1), |
| 701 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 702 | ShuffleAsmStr2<"u32">.s), |
| 703 | [], IMOV32rr>; |
| 704 | |
| 705 | def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), |
| 706 | (ins V2I8Regs:$src1, V2I8Regs:$src2, |
| 707 | i8imm:$c0, i8imm:$c1), |
| 708 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 709 | ShuffleAsmStr2<"u16">.s), |
| 710 | [], IMOV8rr>; |
| 711 | |
| 712 | def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), |
| 713 | (ins V2I16Regs:$src1, V2I16Regs:$src2, |
| 714 | i8imm:$c0, i8imm:$c1), |
| 715 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 716 | ShuffleAsmStr2<"u16">.s), |
| 717 | [], IMOV16rr>; |
| 718 | |
| 719 | def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), |
| 720 | (ins V2F64Regs:$src1, V2F64Regs:$src2, |
| 721 | i8imm:$c0, i8imm:$c1), |
| 722 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 723 | ShuffleAsmStr2<"f64">.s), |
| 724 | [], FMOV64rr>; |
| 725 | |
| 726 | def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), |
| 727 | (ins V2I64Regs:$src1, V2I64Regs:$src2, |
| 728 | i8imm:$c0, i8imm:$c1), |
| 729 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 730 | ShuffleAsmStr2<"u64">.s), |
| 731 | [], IMOV64rr>; |
| 732 | } |
| 733 | |
| 734 | def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ |
| 735 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
Sergey Dmitrouk | 842a51b | 2015-04-28 14:05:47 +0000 | [diff] [blame] | 736 | return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32); |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 737 | }]>; |
| 738 | def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ |
| 739 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
Sergey Dmitrouk | 842a51b | 2015-04-28 14:05:47 +0000 | [diff] [blame] | 740 | return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32); |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 741 | }]>; |
| 742 | def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ |
| 743 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
Sergey Dmitrouk | 842a51b | 2015-04-28 14:05:47 +0000 | [diff] [blame] | 744 | return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32); |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 745 | }]>; |
| 746 | def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ |
| 747 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
Sergey Dmitrouk | 842a51b | 2015-04-28 14:05:47 +0000 | [diff] [blame] | 748 | return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32); |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 749 | }]>; |
| 750 | |
| 751 | // The spurious call is here to silence a compiler warning about N being |
| 752 | // unused. |
| 753 | def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), |
| 754 | (vector_shuffle node:$lhs, node:$rhs), |
| 755 | [{ N->getGluedNode(); return true; }]>; |
| 756 | |
| 757 | def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), |
| 758 | (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, |
| 759 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 760 | |
| 761 | def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), |
| 762 | (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, |
| 763 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 764 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 765 | |
| 766 | def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), |
| 767 | (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, |
| 768 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 769 | |
| 770 | def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), |
| 771 | (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, |
| 772 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 773 | |
| 774 | def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), |
| 775 | (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, |
| 776 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 777 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 778 | |
| 779 | def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), |
| 780 | (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, |
| 781 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 782 | |
| 783 | def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), |
| 784 | (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, |
| 785 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 786 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 787 | |
| 788 | def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), |
| 789 | (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, |
| 790 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 791 | |
| 792 | def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), |
| 793 | (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, |
| 794 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 795 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 796 | |
| 797 | def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), |
| 798 | (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, |
| 799 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 800 | |
| 801 | class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, |
| 802 | NVPTXInst si> |
| 803 | : NVPTXVecInst<(outs vclass:$dst), |
| 804 | (ins sclass:$a1, sclass:$a2), |
| 805 | !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), |
| 806 | [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], |
| 807 | si>; |
| 808 | class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, |
| 809 | NVPTXInst si> |
| 810 | : NVPTXVecInst<(outs vclass:$dst), |
| 811 | (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), |
| 812 | !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), |
| 813 | [(set vclass:$dst, |
| 814 | (build_vector sclass:$a1, sclass:$a2, |
| 815 | sclass:$a3, sclass:$a4))], si>; |
| 816 | |
| 817 | let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { |
| 818 | def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, |
| 819 | FMOV32rr>; |
| 820 | def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, |
| 821 | FMOV64rr>; |
| 822 | |
| 823 | def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, |
| 824 | IMOV32rr>; |
| 825 | def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, |
| 826 | IMOV64rr>; |
| 827 | def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, |
| 828 | IMOV16rr>; |
| 829 | def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, |
| 830 | IMOV8rr>; |
| 831 | |
| 832 | def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, |
| 833 | FMOV32rr>; |
| 834 | |
| 835 | def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, |
| 836 | IMOV32rr>; |
| 837 | def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, |
| 838 | IMOV16rr>; |
| 839 | def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, |
| 840 | IMOV8rr>; |
| 841 | } |
| 842 | |
| 843 | class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> |
| 844 | : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), |
| 845 | !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), |
| 846 | [], sop>; |
| 847 | |
Craig Topper | c50d64b | 2014-11-26 00:46:26 +0000 | [diff] [blame] | 848 | let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1, |
Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 849 | VecInstType=isVecOther.Value in { |
| 850 | def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; |
| 851 | def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; |
| 852 | |
| 853 | def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; |
| 854 | def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; |
| 855 | |
| 856 | def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; |
| 857 | def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; |
| 858 | |
| 859 | def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; |
| 860 | def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; |
| 861 | |
| 862 | def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; |
| 863 | def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; |
| 864 | } |
| 865 | |
| 866 | // extract subvector patterns |
| 867 | def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", |
| 868 | SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; |
| 869 | |
| 870 | def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), |
| 871 | (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), |
| 872 | (V4f32Extract V4F32Regs:$src, 1))>; |
| 873 | def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), |
| 874 | (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), |
| 875 | (V4f32Extract V4F32Regs:$src, 3))>; |
| 876 | def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), |
| 877 | (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), |
| 878 | (V4i32Extract V4I32Regs:$src, 1))>; |
| 879 | def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), |
| 880 | (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), |
| 881 | (V4i32Extract V4I32Regs:$src, 3))>; |
| 882 | def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), |
| 883 | (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), |
| 884 | (V4i16Extract V4I16Regs:$src, 1))>; |
| 885 | def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), |
| 886 | (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), |
| 887 | (V4i16Extract V4I16Regs:$src, 3))>; |
| 888 | def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), |
| 889 | (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), |
| 890 | (V4i8Extract V4I8Regs:$src, 1))>; |
| 891 | def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), |
| 892 | (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), |
| 893 | (V4i8Extract V4I8Regs:$src, 3))>; |
| 894 | |
| 895 | // Select instructions |
| 896 | class Select_OneLine<string type, string pos> { |
| 897 | string t1 = !strconcat("selp.", type); |
| 898 | string t2 = !strconcat(t1, " \t${dst}_"); |
| 899 | string t3 = !strconcat(t2, pos); |
| 900 | string t4 = !strconcat(t3, ", ${src1}_"); |
| 901 | string t5 = !strconcat(t4, pos); |
| 902 | string t6 = !strconcat(t5, ", ${src2}_"); |
| 903 | string t7 = !strconcat(t6, pos); |
| 904 | string s = !strconcat(t7, ", $p;"); |
| 905 | } |
| 906 | |
| 907 | class Select_Str2<string type> { |
| 908 | string t1 = Select_OneLine<type, "0">.s; |
| 909 | string t2 = !strconcat(t1, "\n\t"); |
| 910 | string s = !strconcat(t2, Select_OneLine<type, "1">.s); |
| 911 | } |
| 912 | |
| 913 | class Select_Str4<string type> { |
| 914 | string t1 = Select_OneLine<type, "0">.s; |
| 915 | string t2 = !strconcat(t1, "\n\t"); |
| 916 | string t3 = !strconcat(t2, Select_OneLine<type, "1">.s); |
| 917 | string t4 = !strconcat(t3, "\n\t"); |
| 918 | string t5 = !strconcat(t4, Select_OneLine<type, "2">.s); |
| 919 | string t6 = !strconcat(t5, "\n\t"); |
| 920 | string s = !strconcat(t6, Select_OneLine<type, "3">.s); |
| 921 | |
| 922 | } |
| 923 | |
| 924 | class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop> |
| 925 | : NVPTXVecInst<(outs vclass:$dst), |
| 926 | (ins vclass:$src1, vclass:$src2, Int1Regs:$p), |
| 927 | asmstr, |
| 928 | [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, |
| 929 | vclass:$src2))], |
| 930 | sop>; |
| 931 | |
| 932 | let VecInstType=isVecOther.Value in { |
| 933 | def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>; |
| 934 | def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>; |
| 935 | def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>; |
| 936 | def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>; |
| 937 | def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>; |
| 938 | def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>; |
| 939 | def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>; |
| 940 | |
| 941 | def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>; |
| 942 | def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>; |
| 943 | def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>; |
| 944 | } |
| 945 | |
| 946 | // Comparison instructions |
| 947 | |
| 948 | // setcc convenience fragments. |
| 949 | def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), |
| 950 | (setcc node:$lhs, node:$rhs, SETOEQ)>; |
| 951 | def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), |
| 952 | (setcc node:$lhs, node:$rhs, SETOGT)>; |
| 953 | def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), |
| 954 | (setcc node:$lhs, node:$rhs, SETOGE)>; |
| 955 | def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), |
| 956 | (setcc node:$lhs, node:$rhs, SETOLT)>; |
| 957 | def vsetole : PatFrag<(ops node:$lhs, node:$rhs), |
| 958 | (setcc node:$lhs, node:$rhs, SETOLE)>; |
| 959 | def vsetone : PatFrag<(ops node:$lhs, node:$rhs), |
| 960 | (setcc node:$lhs, node:$rhs, SETONE)>; |
| 961 | def vseto : PatFrag<(ops node:$lhs, node:$rhs), |
| 962 | (setcc node:$lhs, node:$rhs, SETO)>; |
| 963 | def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), |
| 964 | (setcc node:$lhs, node:$rhs, SETUO)>; |
| 965 | def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), |
| 966 | (setcc node:$lhs, node:$rhs, SETUEQ)>; |
| 967 | def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), |
| 968 | (setcc node:$lhs, node:$rhs, SETUGT)>; |
| 969 | def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), |
| 970 | (setcc node:$lhs, node:$rhs, SETUGE)>; |
| 971 | def vsetult : PatFrag<(ops node:$lhs, node:$rhs), |
| 972 | (setcc node:$lhs, node:$rhs, SETULT)>; |
| 973 | def vsetule : PatFrag<(ops node:$lhs, node:$rhs), |
| 974 | (setcc node:$lhs, node:$rhs, SETULE)>; |
| 975 | def vsetune : PatFrag<(ops node:$lhs, node:$rhs), |
| 976 | (setcc node:$lhs, node:$rhs, SETUNE)>; |
| 977 | def vseteq : PatFrag<(ops node:$lhs, node:$rhs), |
| 978 | (setcc node:$lhs, node:$rhs, SETEQ)>; |
| 979 | def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), |
| 980 | (setcc node:$lhs, node:$rhs, SETGT)>; |
| 981 | def vsetge : PatFrag<(ops node:$lhs, node:$rhs), |
| 982 | (setcc node:$lhs, node:$rhs, SETGE)>; |
| 983 | def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), |
| 984 | (setcc node:$lhs, node:$rhs, SETLT)>; |
| 985 | def vsetle : PatFrag<(ops node:$lhs, node:$rhs), |
| 986 | (setcc node:$lhs, node:$rhs, SETLE)>; |
| 987 | def vsetne : PatFrag<(ops node:$lhs, node:$rhs), |
| 988 | (setcc node:$lhs, node:$rhs, SETNE)>; |
| 989 | |
| 990 | class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass, |
| 991 | NVPTXInst sop> |
| 992 | : NVPTXVecInst<(outs outrclass:$dst), |
| 993 | (ins inrclass:$a, inrclass:$b), |
| 994 | "Unsupported", |
| 995 | [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], |
| 996 | sop>; |
| 997 | |
| 998 | multiclass Vec_Compare_All<PatFrag op, |
| 999 | NVPTXInst inst8, |
| 1000 | NVPTXInst inst16, |
| 1001 | NVPTXInst inst32, |
| 1002 | NVPTXInst inst64> |
| 1003 | { |
| 1004 | def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>; |
| 1005 | def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>; |
| 1006 | def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>; |
| 1007 | def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>; |
| 1008 | def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>; |
| 1009 | def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>; |
| 1010 | def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>; |
| 1011 | } |
| 1012 | |
| 1013 | let VecInstType=isVecOther.Value in { |
| 1014 | defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16, |
| 1015 | ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>; |
| 1016 | defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16, |
| 1017 | ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>; |
| 1018 | defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16, |
| 1019 | ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>; |
| 1020 | defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16, |
| 1021 | ISetULTi32rr_toi32, ISetULTi64rr_toi64>; |
| 1022 | defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16, |
| 1023 | ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>; |
| 1024 | defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16, |
| 1025 | ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>; |
| 1026 | defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16, |
| 1027 | ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>; |
| 1028 | defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16, |
| 1029 | ISetULEi32rr_toi32, ISetULEi64rr_toi64>; |
| 1030 | defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16, |
| 1031 | ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>; |
| 1032 | defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16, |
| 1033 | ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>; |
| 1034 | defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16, |
| 1035 | ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>; |
| 1036 | defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16, |
| 1037 | ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>; |
| 1038 | } |
| 1039 | |
| 1040 | multiclass FVec_Compare_All<PatFrag op, |
| 1041 | NVPTXInst instf32, |
| 1042 | NVPTXInst instf64> |
| 1043 | { |
| 1044 | def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>; |
| 1045 | def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>; |
| 1046 | def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>; |
| 1047 | } |
| 1048 | |
| 1049 | let VecInstType=isVecOther.Value in { |
| 1050 | defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32, |
| 1051 | FSetGTf64rr_toi64>; |
| 1052 | defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32, |
| 1053 | FSetLTf64rr_toi64>; |
| 1054 | defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32, |
| 1055 | FSetGEf64rr_toi64>; |
| 1056 | defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32, |
| 1057 | FSetLEf64rr_toi64>; |
| 1058 | defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32, |
| 1059 | FSetEQf64rr_toi64>; |
| 1060 | defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32, |
| 1061 | FSetNEf64rr_toi64>; |
| 1062 | |
| 1063 | defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32, |
| 1064 | FSetUGTf64rr_toi64>; |
| 1065 | defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32, |
| 1066 | FSetULTf64rr_toi64>; |
| 1067 | defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32, |
| 1068 | FSetUGEf64rr_toi64>; |
| 1069 | defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32, |
| 1070 | FSetULEf64rr_toi64>; |
| 1071 | defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32, |
| 1072 | FSetUEQf64rr_toi64>; |
| 1073 | defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32, |
| 1074 | FSetUNEf64rr_toi64>; |
| 1075 | |
| 1076 | defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32, |
| 1077 | FSetNUMf64rr_toi64>; |
| 1078 | defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32, |
| 1079 | FSetNANf64rr_toi64>; |
| 1080 | } |
| 1081 | |
| 1082 | class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> : |
| 1083 | NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), |
| 1084 | (ins i32imm:$a, i32imm:$b), |
| 1085 | !strconcat(!strconcat("ld.param", opstr), |
| 1086 | "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; |
| 1087 | |
| 1088 | class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> : |
| 1089 | NVPTXInst<(outs regclass:$d1, regclass:$d2), |
| 1090 | (ins i32imm:$a, i32imm:$b), |
| 1091 | !strconcat(!strconcat("ld.param", opstr), |
| 1092 | "\t{{$d1, $d2}}, [retval0+$b];"), []>; |
| 1093 | |
| 1094 | |
| 1095 | class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> : |
| 1096 | NVPTXInst<(outs), |
| 1097 | (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, |
| 1098 | i32imm:$a, i32imm:$b), |
| 1099 | !strconcat(!strconcat("st.param", opstr), |
| 1100 | "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; |
| 1101 | |
| 1102 | class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> : |
| 1103 | NVPTXInst<(outs), |
| 1104 | (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), |
| 1105 | !strconcat(!strconcat("st.param", opstr), |
| 1106 | "\t[param$a+$b], {{$s1, $s2}};"), []>; |
| 1107 | |
| 1108 | class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> : |
| 1109 | NVPTXInst<(outs), |
| 1110 | (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, |
| 1111 | i32imm:$a), |
| 1112 | !strconcat(!strconcat("st.param", opstr), |
| 1113 | "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; |
| 1114 | |
| 1115 | class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> : |
| 1116 | NVPTXInst<(outs), |
| 1117 | (ins regclass:$s1, regclass:$s2, i32imm:$a), |
| 1118 | !strconcat(!strconcat("st.param", opstr), |
| 1119 | "\t[func_retval+$a], {{$s1, $s2}};"), []>; |
| 1120 | |
| 1121 | def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">; |
| 1122 | def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">; |
| 1123 | def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">; |
| 1124 | |
| 1125 | def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">; |
| 1126 | def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">; |
| 1127 | def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">; |
| 1128 | def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">; |
| 1129 | |
| 1130 | def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">; |
| 1131 | def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">; |
| 1132 | def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">; |
| 1133 | |
| 1134 | def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">; |
| 1135 | def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">; |
| 1136 | def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">; |
| 1137 | |
| 1138 | def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">; |
| 1139 | def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">; |
| 1140 | def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">; |
| 1141 | def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">; |
| 1142 | |
| 1143 | def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">; |
| 1144 | def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">; |
| 1145 | def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">; |
| 1146 | |
| 1147 | def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">; |
| 1148 | def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">; |
| 1149 | def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">; |
| 1150 | |
| 1151 | def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">; |
| 1152 | def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">; |
| 1153 | def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">; |
| 1154 | def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">; |
| 1155 | |
| 1156 | def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">; |
| 1157 | def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">; |
| 1158 | def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">; |
| 1159 | |
| 1160 | class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>: |
| 1161 | NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), |
| 1162 | "loadparam : $dst <- [$a, $b]", |
| 1163 | [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], |
| 1164 | sop>; |
| 1165 | |
| 1166 | class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP> |
| 1167 | : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), |
| 1168 | "storeparam : [$a, $b] <- $val", |
| 1169 | [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; |
| 1170 | |
| 1171 | class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr, |
| 1172 | NVPTXInst sop=NOP> |
| 1173 | : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), |
| 1174 | "storeretval : retval[$a] <- $val", |
| 1175 | [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; |
| 1176 | |
| 1177 | let VecInstType=isVecLD.Value in { |
| 1178 | def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32", |
| 1179 | LoadParamScalar4I32>; |
| 1180 | def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16", |
| 1181 | LoadParamScalar4I16>; |
| 1182 | def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8", |
| 1183 | LoadParamScalar4I8>; |
| 1184 | |
| 1185 | def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64", |
| 1186 | LoadParamScalar2I64>; |
| 1187 | def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32", |
| 1188 | LoadParamScalar2I32>; |
| 1189 | def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16", |
| 1190 | LoadParamScalar2I16>; |
| 1191 | def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8", |
| 1192 | LoadParamScalar2I8>; |
| 1193 | |
| 1194 | def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32", |
| 1195 | LoadParamScalar4F32>; |
| 1196 | def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32", |
| 1197 | LoadParamScalar2F32>; |
| 1198 | def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64", |
| 1199 | LoadParamScalar2F64>; |
| 1200 | } |
| 1201 | |
| 1202 | let VecInstType=isVecST.Value in { |
| 1203 | def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32", |
| 1204 | StoreParamScalar4I32>; |
| 1205 | def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16", |
| 1206 | StoreParamScalar4I16>; |
| 1207 | def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8", |
| 1208 | StoreParamScalar4I8>; |
| 1209 | |
| 1210 | def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64", |
| 1211 | StoreParamScalar2I64>; |
| 1212 | def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32", |
| 1213 | StoreParamScalar2I32>; |
| 1214 | def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16", |
| 1215 | StoreParamScalar2I16>; |
| 1216 | def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8", |
| 1217 | StoreParamScalar2I8>; |
| 1218 | |
| 1219 | def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32", |
| 1220 | StoreParamScalar4F32>; |
| 1221 | def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32", |
| 1222 | StoreParamScalar2F32>; |
| 1223 | def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64", |
| 1224 | StoreParamScalar2F64>; |
| 1225 | |
| 1226 | def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32", |
| 1227 | StoreRetvalScalar4I32>; |
| 1228 | def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16", |
| 1229 | StoreRetvalScalar4I16>; |
| 1230 | def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8", |
| 1231 | StoreRetvalScalar4I8>; |
| 1232 | |
| 1233 | def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64", |
| 1234 | StoreRetvalScalar2I64>; |
| 1235 | def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32", |
| 1236 | StoreRetvalScalar2I32>; |
| 1237 | def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16", |
| 1238 | StoreRetvalScalar2I16>; |
| 1239 | def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8", |
| 1240 | StoreRetvalScalar2I8>; |
| 1241 | |
| 1242 | def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32", |
| 1243 | StoreRetvalScalar4F32>; |
| 1244 | def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32", |
| 1245 | StoreRetvalScalar2F32>; |
| 1246 | def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64", |
| 1247 | StoreRetvalScalar2F64>; |
| 1248 | |
| 1249 | } |
| 1250 | |
| 1251 | |
| 1252 | // Int vector to int scalar bit convert |
| 1253 | // v4i8 -> i32 |
| 1254 | def : Pat<(i32 (bitconvert V4I8Regs:$s)), |
| 1255 | (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), |
| 1256 | (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; |
| 1257 | // v4i16 -> i64 |
| 1258 | def : Pat<(i64 (bitconvert V4I16Regs:$s)), |
| 1259 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), |
| 1260 | (V4i16Extract V4I16Regs:$s,1), |
| 1261 | (V4i16Extract V4I16Regs:$s,2), |
| 1262 | (V4i16Extract V4I16Regs:$s,3))>; |
| 1263 | // v2i8 -> i16 |
| 1264 | def : Pat<(i16 (bitconvert V2I8Regs:$s)), |
| 1265 | (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; |
| 1266 | // v2i16 -> i32 |
| 1267 | def : Pat<(i32 (bitconvert V2I16Regs:$s)), |
| 1268 | (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), |
| 1269 | (V2i16Extract V2I16Regs:$s,1))>; |
| 1270 | // v2i32 -> i64 |
| 1271 | def : Pat<(i64 (bitconvert V2I32Regs:$s)), |
| 1272 | (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), |
| 1273 | (V2i32Extract V2I32Regs:$s,1))>; |
| 1274 | |
| 1275 | // Int scalar to int vector bit convert |
| 1276 | let VecInstType=isVecDest.Value in { |
| 1277 | // i32 -> v4i8 |
| 1278 | def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), |
| 1279 | "Error!", |
| 1280 | [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], |
| 1281 | I32toV4I8>; |
| 1282 | // i64 -> v4i16 |
| 1283 | def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), |
| 1284 | "Error!", |
| 1285 | [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], |
| 1286 | I64toV4I16>; |
| 1287 | // i16 -> v2i8 |
| 1288 | def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), |
| 1289 | "Error!", |
| 1290 | [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], |
| 1291 | I16toV2I8>; |
| 1292 | // i32 -> v2i16 |
| 1293 | def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), |
| 1294 | "Error!", |
| 1295 | [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], |
| 1296 | I32toV2I16>; |
| 1297 | // i64 -> v2i32 |
| 1298 | def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), |
| 1299 | "Error!", |
| 1300 | [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], |
| 1301 | I64toV2I32>; |
| 1302 | } |
| 1303 | |
| 1304 | // Int vector to int vector bit convert |
| 1305 | // v4i8 -> v2i16 |
| 1306 | def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), |
| 1307 | (VecI32toV2I16 |
| 1308 | (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), |
| 1309 | (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; |
| 1310 | // v4i16 -> v2i32 |
| 1311 | def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), |
| 1312 | (VecI64toV2I32 |
| 1313 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), |
| 1314 | (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; |
| 1315 | // v2i16 -> v4i8 |
| 1316 | def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), |
| 1317 | (VecI32toV4I8 |
| 1318 | (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; |
| 1319 | // v2i32 -> v4i16 |
| 1320 | def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), |
| 1321 | (VecI64toV4I16 |
| 1322 | (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; |
| 1323 | // v2i64 -> v4i32 |
| 1324 | def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), |
| 1325 | (Build_Vector4_i32 |
| 1326 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), |
| 1327 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), |
| 1328 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), |
| 1329 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; |
| 1330 | // v4i32 -> v2i64 |
| 1331 | def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), |
| 1332 | (Build_Vector2_i64 |
| 1333 | (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), |
| 1334 | (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; |
| 1335 | |
| 1336 | // Fp scalar to fp vector convert |
| 1337 | // f64 -> v2f32 |
| 1338 | let VecInstType=isVecDest.Value in { |
| 1339 | def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), |
| 1340 | "Error!", |
| 1341 | [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], |
| 1342 | F64toV2F32>; |
| 1343 | } |
| 1344 | |
| 1345 | // Fp vector to fp scalar convert |
| 1346 | // v2f32 -> f64 |
| 1347 | def : Pat<(f64 (bitconvert V2F32Regs:$s)), |
| 1348 | (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; |
| 1349 | |
| 1350 | // Fp scalar to int vector convert |
| 1351 | // f32 -> v4i8 |
| 1352 | def : Pat<(v4i8 (bitconvert Float32Regs:$s)), |
| 1353 | (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; |
| 1354 | // f32 -> v2i16 |
| 1355 | def : Pat<(v2i16 (bitconvert Float32Regs:$s)), |
| 1356 | (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; |
| 1357 | // f64 -> v4i16 |
| 1358 | def : Pat<(v4i16 (bitconvert Float64Regs:$s)), |
| 1359 | (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; |
| 1360 | // f64 -> v2i32 |
| 1361 | def : Pat<(v2i32 (bitconvert Float64Regs:$s)), |
| 1362 | (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; |
| 1363 | |
| 1364 | // Int vector to fp scalar convert |
| 1365 | // v4i8 -> f32 |
| 1366 | def : Pat<(f32 (bitconvert V4I8Regs:$s)), |
| 1367 | (BITCONVERT_32_I2F |
| 1368 | (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), |
| 1369 | (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; |
| 1370 | // v4i16 -> f64 |
| 1371 | def : Pat<(f64 (bitconvert V4I16Regs:$s)), |
| 1372 | (BITCONVERT_64_I2F |
| 1373 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), |
| 1374 | (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; |
| 1375 | // v2i16 -> f32 |
| 1376 | def : Pat<(f32 (bitconvert V2I16Regs:$s)), |
| 1377 | (BITCONVERT_32_I2F |
| 1378 | (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; |
| 1379 | // v2i32 -> f64 |
| 1380 | def : Pat<(f64 (bitconvert V2I32Regs:$s)), |
| 1381 | (BITCONVERT_64_I2F |
| 1382 | (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; |
| 1383 | |
| 1384 | // Int scalar to fp vector convert |
| 1385 | // i64 -> v2f32 |
| 1386 | def : Pat<(v2f32 (bitconvert Int64Regs:$s)), |
| 1387 | (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; |
| 1388 | |
| 1389 | // Fp vector to int scalar convert |
| 1390 | // v2f32 -> i64 |
| 1391 | def : Pat<(i64 (bitconvert V2F32Regs:$s)), |
| 1392 | (BITCONVERT_64_F2I |
| 1393 | (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; |
| 1394 | |
| 1395 | // Int vector to fp vector convert |
| 1396 | // v2i64 -> v4f32 |
| 1397 | def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), |
| 1398 | (Build_Vector4_f32 |
| 1399 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1400 | (V2i64Extract V2I64Regs:$s, 0)), 0)), |
| 1401 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1402 | (V2i64Extract V2I64Regs:$s, 0)), 1)), |
| 1403 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1404 | (V2i64Extract V2I64Regs:$s, 1)), 0)), |
| 1405 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1406 | (V2i64Extract V2I64Regs:$s, 1)), 1)))>; |
| 1407 | // v2i64 -> v2f64 |
| 1408 | def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), |
| 1409 | (Build_Vector2_f64 |
| 1410 | (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), |
| 1411 | (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; |
| 1412 | // v2i32 -> v2f32 |
| 1413 | def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), |
| 1414 | (Build_Vector2_f32 |
| 1415 | (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), |
| 1416 | (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; |
| 1417 | // v4i32 -> v2f64 |
| 1418 | def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), |
| 1419 | (Build_Vector2_f64 |
| 1420 | (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), |
| 1421 | (V4i32Extract V4I32Regs:$s,1))), |
| 1422 | (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), |
| 1423 | (V4i32Extract V4I32Regs:$s,3))))>; |
| 1424 | // v4i32 -> v4f32 |
| 1425 | def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), |
| 1426 | (Build_Vector4_f32 |
| 1427 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), |
| 1428 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), |
| 1429 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), |
| 1430 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; |
| 1431 | // v4i16 -> v2f32 |
| 1432 | def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), |
| 1433 | (VecF64toV2F32 (BITCONVERT_64_I2F |
| 1434 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), |
| 1435 | (V4i16Extract V4I16Regs:$s,1), |
| 1436 | (V4i16Extract V4I16Regs:$s,2), |
| 1437 | (V4i16Extract V4I16Regs:$s,3))))>; |
| 1438 | |
| 1439 | // Fp vector to int vector convert |
| 1440 | // v2i64 <- v4f32 |
| 1441 | def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), |
| 1442 | (Build_Vector2_i64 |
| 1443 | (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), |
| 1444 | (V4f32Extract V4F32Regs:$s,1))), |
| 1445 | (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), |
| 1446 | (V4f32Extract V4F32Regs:$s,3))))>; |
| 1447 | // v2i64 <- v2f64 |
| 1448 | def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), |
| 1449 | (Build_Vector2_i64 |
| 1450 | (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), |
| 1451 | (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; |
| 1452 | // v2i32 <- v2f32 |
| 1453 | def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), |
| 1454 | (Build_Vector2_i32 |
| 1455 | (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), |
| 1456 | (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; |
| 1457 | // v4i32 <- v2f64 |
| 1458 | def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), |
| 1459 | (Build_Vector4_i32 |
| 1460 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1461 | (V2f64Extract V2F64Regs:$s, 0)), 0)), |
| 1462 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1463 | (V2f64Extract V2F64Regs:$s, 0)), 1)), |
| 1464 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1465 | (V2f64Extract V2F64Regs:$s, 1)), 0)), |
| 1466 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1467 | (V2f64Extract V2F64Regs:$s, 1)), 1)))>; |
| 1468 | // v4i32 <- v4f32 |
| 1469 | def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), |
| 1470 | (Build_Vector4_i32 |
| 1471 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), |
| 1472 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), |
| 1473 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), |
| 1474 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; |
| 1475 | // v4i16 <- v2f32 |
| 1476 | def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), |
| 1477 | (VecI64toV4I16 (BITCONVERT_64_F2I |
| 1478 | (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), |
| 1479 | (V2f32Extract V2F32Regs:$s,1))))>; |