Justin Holewinski | ae556d3 | 2012-05-04 20:18:50 +0000 | [diff] [blame] | 1 | //===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | //----------------------------------- |
| 11 | // Vector Specific |
| 12 | //----------------------------------- |
| 13 | |
| 14 | // |
| 15 | // All vector instructions derive from NVPTXVecInst |
| 16 | // |
| 17 | |
| 18 | class NVPTXVecInst<dag outs, dag ins, string asmstr, list<dag> pattern, |
| 19 | NVPTXInst sInst=NOP> |
| 20 | : NVPTXInst<outs, ins, asmstr, pattern> { |
| 21 | NVPTXInst scalarInst=sInst; |
| 22 | } |
| 23 | |
| 24 | let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { |
| 25 | // Extract v2i16 |
| 26 | def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), |
| 27 | (ins V2I16Regs:$src, i8imm:$c), |
| 28 | "mov.u16 \t$dst, $src${c:vecelem};", |
| 29 | [(set Int16Regs:$dst, (vector_extract |
| 30 | (v2i16 V2I16Regs:$src), imm:$c))], |
| 31 | IMOV16rr>; |
| 32 | |
| 33 | // Extract v4i16 |
| 34 | def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), |
| 35 | (ins V4I16Regs:$src, i8imm:$c), |
| 36 | "mov.u16 \t$dst, $src${c:vecelem};", |
| 37 | [(set Int16Regs:$dst, (vector_extract |
| 38 | (v4i16 V4I16Regs:$src), imm:$c))], |
| 39 | IMOV16rr>; |
| 40 | |
| 41 | // Extract v2i8 |
| 42 | def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), |
| 43 | (ins V2I8Regs:$src, i8imm:$c), |
| 44 | "mov.u16 \t$dst, $src${c:vecelem};", |
| 45 | [(set Int8Regs:$dst, (vector_extract |
| 46 | (v2i8 V2I8Regs:$src), imm:$c))], |
| 47 | IMOV8rr>; |
| 48 | |
| 49 | // Extract v4i8 |
| 50 | def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), |
| 51 | (ins V4I8Regs:$src, i8imm:$c), |
| 52 | "mov.u16 \t$dst, $src${c:vecelem};", |
| 53 | [(set Int8Regs:$dst, (vector_extract |
| 54 | (v4i8 V4I8Regs:$src), imm:$c))], |
| 55 | IMOV8rr>; |
| 56 | |
| 57 | // Extract v2i32 |
| 58 | def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), |
| 59 | (ins V2I32Regs:$src, i8imm:$c), |
| 60 | "mov.u32 \t$dst, $src${c:vecelem};", |
| 61 | [(set Int32Regs:$dst, (vector_extract |
| 62 | (v2i32 V2I32Regs:$src), imm:$c))], |
| 63 | IMOV32rr>; |
| 64 | |
| 65 | // Extract v2f32 |
| 66 | def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), |
| 67 | (ins V2F32Regs:$src, i8imm:$c), |
| 68 | "mov.f32 \t$dst, $src${c:vecelem};", |
| 69 | [(set Float32Regs:$dst, (vector_extract |
| 70 | (v2f32 V2F32Regs:$src), imm:$c))], |
| 71 | FMOV32rr>; |
| 72 | |
| 73 | // Extract v2i64 |
| 74 | def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), |
| 75 | (ins V2I64Regs:$src, i8imm:$c), |
| 76 | "mov.u64 \t$dst, $src${c:vecelem};", |
| 77 | [(set Int64Regs:$dst, (vector_extract |
| 78 | (v2i64 V2I64Regs:$src), imm:$c))], |
| 79 | IMOV64rr>; |
| 80 | |
| 81 | // Extract v2f64 |
| 82 | def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), |
| 83 | (ins V2F64Regs:$src, i8imm:$c), |
| 84 | "mov.f64 \t$dst, $src${c:vecelem};", |
| 85 | [(set Float64Regs:$dst, (vector_extract |
| 86 | (v2f64 V2F64Regs:$src), imm:$c))], |
| 87 | FMOV64rr>; |
| 88 | |
| 89 | // Extract v4i32 |
| 90 | def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), |
| 91 | (ins V4I32Regs:$src, i8imm:$c), |
| 92 | "mov.u32 \t$dst, $src${c:vecelem};", |
| 93 | [(set Int32Regs:$dst, (vector_extract |
| 94 | (v4i32 V4I32Regs:$src), imm:$c))], |
| 95 | IMOV32rr>; |
| 96 | |
| 97 | // Extract v4f32 |
| 98 | def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), |
| 99 | (ins V4F32Regs:$src, i8imm:$c), |
| 100 | "mov.f32 \t$dst, $src${c:vecelem};", |
| 101 | [(set Float32Regs:$dst, (vector_extract |
| 102 | (v4f32 V4F32Regs:$src), imm:$c))], |
| 103 | FMOV32rr>; |
| 104 | } |
| 105 | |
| 106 | let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { |
| 107 | // Insert v2i8 |
| 108 | def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), |
| 109 | (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), |
| 110 | "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 111 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 112 | [(set V2I8Regs:$dst, |
| 113 | (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], |
| 114 | IMOV8rr>; |
| 115 | |
| 116 | // Insert v4i8 |
| 117 | def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), |
| 118 | (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), |
| 119 | "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 120 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 121 | [(set V4I8Regs:$dst, |
| 122 | (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], |
| 123 | IMOV8rr>; |
| 124 | |
| 125 | // Insert v2i16 |
| 126 | def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), |
| 127 | (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), |
| 128 | "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 129 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 130 | [(set V2I16Regs:$dst, |
| 131 | (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], |
| 132 | IMOV16rr>; |
| 133 | |
| 134 | // Insert v4i16 |
| 135 | def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), |
| 136 | (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), |
| 137 | "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" |
| 138 | "\n\tmov.u16 \t$dst${c:vecelem}, $val;", |
| 139 | [(set V4I16Regs:$dst, |
| 140 | (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], |
| 141 | IMOV16rr>; |
| 142 | |
| 143 | // Insert v2i32 |
| 144 | def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), |
| 145 | (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), |
| 146 | "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" |
| 147 | "\n\tmov.u32 \t$dst${c:vecelem}, $val;", |
| 148 | [(set V2I32Regs:$dst, |
| 149 | (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], |
| 150 | IMOV32rr>; |
| 151 | |
| 152 | // Insert v2f32 |
| 153 | def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), |
| 154 | (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), |
| 155 | "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" |
| 156 | "\n\tmov.f32 \t$dst${c:vecelem}, $val;", |
| 157 | [(set V2F32Regs:$dst, |
| 158 | (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], |
| 159 | FMOV32rr>; |
| 160 | |
| 161 | // Insert v2i64 |
| 162 | def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), |
| 163 | (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), |
| 164 | "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" |
| 165 | "\n\tmov.u64 \t$dst${c:vecelem}, $val;", |
| 166 | [(set V2I64Regs:$dst, |
| 167 | (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], |
| 168 | IMOV64rr>; |
| 169 | |
| 170 | // Insert v2f64 |
| 171 | def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), |
| 172 | (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), |
| 173 | "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" |
| 174 | "\n\tmov.f64 \t$dst${c:vecelem}, $val;", |
| 175 | [(set V2F64Regs:$dst, |
| 176 | (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], |
| 177 | FMOV64rr>; |
| 178 | |
| 179 | // Insert v4i32 |
| 180 | def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), |
| 181 | (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), |
| 182 | "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" |
| 183 | "\n\tmov.u32 \t$dst${c:vecelem}, $val;", |
| 184 | [(set V4I32Regs:$dst, |
| 185 | (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], |
| 186 | IMOV32rr>; |
| 187 | |
| 188 | // Insert v4f32 |
| 189 | def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), |
| 190 | (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), |
| 191 | "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" |
| 192 | "\n\tmov.f32 \t$dst${c:vecelem}, $val;", |
| 193 | [(set V4F32Regs:$dst, |
| 194 | (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], |
| 195 | FMOV32rr>; |
| 196 | } |
| 197 | |
| 198 | class BinOpAsmString<string c> { |
| 199 | string s = c; |
| 200 | } |
| 201 | |
| 202 | class V4AsmStr<string opcode> : BinOpAsmString< |
| 203 | !strconcat(!strconcat(!strconcat(!strconcat( |
| 204 | !strconcat(!strconcat(!strconcat( |
| 205 | opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), |
| 206 | opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), |
| 207 | opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), |
| 208 | opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; |
| 209 | |
| 210 | class V2AsmStr<string opcode> : BinOpAsmString< |
| 211 | !strconcat(!strconcat(!strconcat( |
| 212 | opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), |
| 213 | opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; |
| 214 | |
| 215 | class V4MADStr<string opcode> : BinOpAsmString< |
| 216 | !strconcat(!strconcat(!strconcat(!strconcat( |
| 217 | !strconcat(!strconcat(!strconcat( |
| 218 | opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), |
| 219 | opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), |
| 220 | opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), |
| 221 | opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; |
| 222 | |
| 223 | class V2MADStr<string opcode> : BinOpAsmString< |
| 224 | !strconcat(!strconcat(!strconcat( |
| 225 | opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), |
| 226 | opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; |
| 227 | |
| 228 | class V4UnaryStr<string opcode> : BinOpAsmString< |
| 229 | !strconcat(!strconcat(!strconcat(!strconcat( |
| 230 | !strconcat(!strconcat(!strconcat( |
| 231 | opcode, " \t${dst}_0, ${a}_0;\n\t"), |
| 232 | opcode), " \t${dst}_1, ${a}_1;\n\t"), |
| 233 | opcode), " \t${dst}_2, ${a}_2;\n\t"), |
| 234 | opcode), " \t${dst}_3, ${a}_3;")>; |
| 235 | |
| 236 | class V2UnaryStr<string opcode> : BinOpAsmString< |
| 237 | !strconcat(!strconcat(!strconcat( |
| 238 | opcode, " \t${dst}_0, ${a}_0;\n\t"), |
| 239 | opcode), " \t${dst}_1, ${a}_1;")>; |
| 240 | |
| 241 | class VecBinaryOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass, |
| 242 | NVPTXInst sInst=NOP> : |
| 243 | NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), |
| 244 | asmstr.s, |
| 245 | [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], |
| 246 | sInst>; |
| 247 | |
| 248 | class VecShiftOp<BinOpAsmString asmstr, SDNode OpNode, NVPTXRegClass regclass1, |
| 249 | NVPTXRegClass regclass2, NVPTXInst sInst=NOP> : |
| 250 | NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), |
| 251 | asmstr.s, |
| 252 | [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], |
| 253 | sInst>; |
| 254 | |
| 255 | class VecUnaryOp<BinOpAsmString asmstr, PatFrag OpNode, NVPTXRegClass regclass, |
| 256 | NVPTXInst sInst=NOP> : |
| 257 | NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), |
| 258 | asmstr.s, |
| 259 | [(set regclass:$dst, (OpNode regclass:$a))], sInst>; |
| 260 | |
| 261 | multiclass IntBinVOp<string asmstr, SDNode OpNode, |
| 262 | NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, NVPTXInst |
| 263 | i16op=NOP, NVPTXInst i8op=NOP> { |
| 264 | def V2I64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "64")>, OpNode, V2I64Regs, |
| 265 | i64op>; |
| 266 | def V4I32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "32")>, OpNode, V4I32Regs, |
| 267 | i32op>; |
| 268 | def V2I32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "32")>, OpNode, V2I32Regs, |
| 269 | i32op>; |
| 270 | def V4I16 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I16Regs, |
| 271 | i16op>; |
| 272 | def V2I16 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I16Regs, |
| 273 | i16op>; |
| 274 | def V4I8 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "16")>, OpNode, V4I8Regs, |
| 275 | i8op>; |
| 276 | def V2I8 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "16")>, OpNode, V2I8Regs, |
| 277 | i8op>; |
| 278 | } |
| 279 | |
| 280 | multiclass FloatBinVOp<string asmstr, SDNode OpNode, |
| 281 | NVPTXInst f64=NOP, NVPTXInst f32=NOP, |
| 282 | NVPTXInst f32_ftz=NOP> { |
| 283 | def V2F64 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f64")>, OpNode, |
| 284 | V2F64Regs, f64>; |
| 285 | def V4F32_ftz : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, |
| 286 | V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; |
| 287 | def V2F32_ftz : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "ftz.f32")>, OpNode, |
| 288 | V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; |
| 289 | def V4F32 : VecBinaryOp<V4AsmStr<!strconcat(asmstr, "f32")>, OpNode, |
| 290 | V4F32Regs, f32>; |
| 291 | def V2F32 : VecBinaryOp<V2AsmStr<!strconcat(asmstr, "f32")>, OpNode, |
| 292 | V2F32Regs, f32>; |
| 293 | } |
| 294 | |
| 295 | multiclass IntUnaryVOp<string asmstr, PatFrag OpNode, |
| 296 | NVPTXInst i64op=NOP, NVPTXInst i32op=NOP, |
| 297 | NVPTXInst i16op=NOP, NVPTXInst i8op=NOP> { |
| 298 | def V2I64 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "64")>, OpNode, |
| 299 | V2I64Regs, i64op>; |
| 300 | def V4I32 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "32")>, OpNode, |
| 301 | V4I32Regs, i32op>; |
| 302 | def V2I32 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "32")>, OpNode, |
| 303 | V2I32Regs, i32op>; |
| 304 | def V4I16 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 305 | V4I16Regs, i16op>; |
| 306 | def V2I16 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 307 | V2I16Regs, i16op>; |
| 308 | def V4I8 : VecUnaryOp<V4UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 309 | V4I8Regs, i8op>; |
| 310 | def V2I8 : VecUnaryOp<V2UnaryStr<!strconcat(asmstr, "16")>, OpNode, |
| 311 | V2I8Regs, i8op>; |
| 312 | } |
| 313 | |
| 314 | |
| 315 | // Integer Arithmetic |
| 316 | let VecInstType=isVecOther.Value in { |
| 317 | defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; |
| 318 | defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; |
| 319 | |
| 320 | def AddCCV4I32 : VecBinaryOp<V4AsmStr<"add.cc.s32">, addc, V4I32Regs, |
| 321 | ADDCCi32rr>; |
| 322 | def AddCCV2I32 : VecBinaryOp<V2AsmStr<"add.cc.s32">, addc, V2I32Regs, |
| 323 | ADDCCi32rr>; |
| 324 | def SubCCV4I32 : VecBinaryOp<V4AsmStr<"sub.cc.s32">, subc, V4I32Regs, |
| 325 | SUBCCi32rr>; |
| 326 | def SubCCV2I32 : VecBinaryOp<V2AsmStr<"sub.cc.s32">, subc, V2I32Regs, |
| 327 | SUBCCi32rr>; |
| 328 | def AddCCCV4I32 : VecBinaryOp<V4AsmStr<"addc.cc.s32">, adde, V4I32Regs, |
| 329 | ADDCCCi32rr>; |
| 330 | def AddCCCV2I32 : VecBinaryOp<V2AsmStr<"addc.cc.s32">, adde, V2I32Regs, |
| 331 | ADDCCCi32rr>; |
| 332 | def SubCCCV4I32 : VecBinaryOp<V4AsmStr<"subc.cc.s32">, sube, V4I32Regs, |
| 333 | SUBCCCi32rr>; |
| 334 | def SubCCCV2I32 : VecBinaryOp<V2AsmStr<"subc.cc.s32">, sube, V2I32Regs, |
| 335 | SUBCCCi32rr>; |
| 336 | |
| 337 | def ShiftLV2I64 : VecShiftOp<V2AsmStr<"shl.b64">, shl, V2I64Regs, V2I32Regs, |
| 338 | SHLi64rr>; |
| 339 | def ShiftLV2I32 : VecShiftOp<V2AsmStr<"shl.b32">, shl, V2I32Regs, V2I32Regs, |
| 340 | SHLi32rr>; |
| 341 | def ShiftLV4I32 : VecShiftOp<V4AsmStr<"shl.b32">, shl, V4I32Regs, V4I32Regs, |
| 342 | SHLi32rr>; |
| 343 | def ShiftLV2I16 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I16Regs, V2I32Regs, |
| 344 | SHLi16rr>; |
| 345 | def ShiftLV4I16 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I16Regs, V4I32Regs, |
| 346 | SHLi16rr>; |
| 347 | def ShiftLV2I8 : VecShiftOp<V2AsmStr<"shl.b16">, shl, V2I8Regs, V2I32Regs, |
| 348 | SHLi8rr>; |
| 349 | def ShiftLV4I8 : VecShiftOp<V4AsmStr<"shl.b16">, shl, V4I8Regs, V4I32Regs, |
| 350 | SHLi8rr>; |
| 351 | } |
| 352 | |
| 353 | // cvt to v*i32, helpers for shift |
| 354 | class CVTtoVeci32<NVPTXRegClass inclass, NVPTXRegClass outclass, string asmstr, |
| 355 | NVPTXInst sInst=NOP> : |
| 356 | NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; |
| 357 | |
| 358 | class VecCVTStrHelper<string op, string dest, string src> { |
| 359 | string s=!strconcat(op, !strconcat("\t", |
| 360 | !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); |
| 361 | } |
| 362 | |
| 363 | class Vec2CVTStr<string op> { |
| 364 | string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, |
| 365 | !strconcat("\n\t", VecCVTStrHelper<op, "${d}_1", "${s}_1">.s)); |
| 366 | } |
| 367 | |
| 368 | class Vec4CVTStr<string op> { |
| 369 | string s=!strconcat(VecCVTStrHelper<op, "${d}_0", "${s}_0">.s, |
| 370 | !strconcat("\n\t", |
| 371 | !strconcat(VecCVTStrHelper<op, "${d}_1", "${s}_1">.s, |
| 372 | !strconcat("\n\t", |
| 373 | !strconcat(VecCVTStrHelper<op, "${d}_2", "${s}_2">.s, |
| 374 | !strconcat("\n\t", VecCVTStrHelper<op, "${d}_3", "${s}_3">.s)))))); |
| 375 | } |
| 376 | |
| 377 | let VecInstType=isVecOther.Value in { |
| 378 | def CVTv2i8tov2i32 : CVTtoVeci32<V2I8Regs, V2I32Regs, |
| 379 | Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; |
| 380 | def CVTv2i16tov2i32 : CVTtoVeci32<V2I16Regs, V2I32Regs, |
| 381 | Vec2CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; |
| 382 | def CVTv4i8tov4i32 : CVTtoVeci32<V4I8Regs, V4I32Regs, |
| 383 | Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext8to32>; |
| 384 | def CVTv4i16tov4i32 : CVTtoVeci32<V4I16Regs, V4I32Regs, |
| 385 | Vec4CVTStr<"cvt.u32.u16">.s, Zint_extendext16to32>; |
| 386 | def CVTv2i64tov2i32 : CVTtoVeci32<V2I64Regs, V2I32Regs, |
| 387 | Vec2CVTStr<"cvt.u32.u64">.s, TRUNC_64to32>; |
| 388 | } |
| 389 | |
| 390 | def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), |
| 391 | (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; |
| 392 | def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), |
| 393 | (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; |
| 394 | def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), |
| 395 | (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; |
| 396 | |
| 397 | def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), |
| 398 | (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; |
| 399 | def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), |
| 400 | (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; |
| 401 | |
| 402 | let VecInstType=isVecOther.Value in { |
| 403 | def ShiftRAV2I64 : VecShiftOp<V2AsmStr<"shr.s64">, sra, V2I64Regs, V2I32Regs, |
| 404 | SRAi64rr>; |
| 405 | def ShiftRAV2I32 : VecShiftOp<V2AsmStr<"shr.s32">, sra, V2I32Regs, V2I32Regs, |
| 406 | SRAi32rr>; |
| 407 | def ShiftRAV4I32 : VecShiftOp<V4AsmStr<"shr.s32">, sra, V4I32Regs, V4I32Regs, |
| 408 | SRAi32rr>; |
| 409 | def ShiftRAV2I16 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I16Regs, V2I32Regs, |
| 410 | SRAi16rr>; |
| 411 | def ShiftRAV4I16 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I16Regs, V4I32Regs, |
| 412 | SRAi16rr>; |
| 413 | def ShiftRAV2I8 : VecShiftOp<V2AsmStr<"shr.s16">, sra, V2I8Regs, V2I32Regs, |
| 414 | SRAi8rr>; |
| 415 | def ShiftRAV4I8 : VecShiftOp<V4AsmStr<"shr.s16">, sra, V4I8Regs, V4I32Regs, |
| 416 | SRAi8rr>; |
| 417 | |
| 418 | def ShiftRLV2I64 : VecShiftOp<V2AsmStr<"shr.u64">, srl, V2I64Regs, V2I32Regs, |
| 419 | SRLi64rr>; |
| 420 | def ShiftRLV2I32 : VecShiftOp<V2AsmStr<"shr.u32">, srl, V2I32Regs, V2I32Regs, |
| 421 | SRLi32rr>; |
| 422 | def ShiftRLV4I32 : VecShiftOp<V4AsmStr<"shr.u32">, srl, V4I32Regs, V4I32Regs, |
| 423 | SRLi32rr>; |
| 424 | def ShiftRLV2I16 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I16Regs, V2I32Regs, |
| 425 | SRLi16rr>; |
| 426 | def ShiftRLV4I16 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I16Regs, V4I32Regs, |
| 427 | SRLi16rr>; |
| 428 | def ShiftRLV2I8 : VecShiftOp<V2AsmStr<"shr.u16">, srl, V2I8Regs, V2I32Regs, |
| 429 | SRLi8rr>; |
| 430 | def ShiftRLV4I8 : VecShiftOp<V4AsmStr<"shr.u16">, srl, V4I8Regs, V4I32Regs, |
| 431 | SRLi8rr>; |
| 432 | |
| 433 | defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, |
| 434 | MULTi8rr>; |
| 435 | defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, |
| 436 | MULTHSi16rr, |
| 437 | MULTHSi8rr>; |
| 438 | defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, |
| 439 | MULTHUi16rr, |
| 440 | MULTHUi8rr>; |
| 441 | defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, |
| 442 | SDIVi8rr>; |
| 443 | defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, |
| 444 | UDIVi8rr>; |
| 445 | defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, |
| 446 | SREMi8rr>; |
| 447 | defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, |
| 448 | UREMi8rr>; |
| 449 | } |
| 450 | |
| 451 | def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), |
| 452 | (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; |
| 453 | def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), |
| 454 | (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; |
| 455 | def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), |
| 456 | (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; |
| 457 | |
| 458 | def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), |
| 459 | (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; |
| 460 | def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), |
| 461 | (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; |
| 462 | |
| 463 | def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), |
| 464 | (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; |
| 465 | def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), |
| 466 | (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; |
| 467 | def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), |
| 468 | (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; |
| 469 | |
| 470 | def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), |
| 471 | (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; |
| 472 | def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), |
| 473 | (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; |
| 474 | |
| 475 | multiclass VMAD<string asmstr, NVPTXRegClass regclassv4, |
| 476 | NVPTXRegClass regclassv2, |
| 477 | SDNode an=add, SDNode mn=mul, NVPTXInst sop=NOP, |
| 478 | Predicate Pred> { |
| 479 | def V4 : NVPTXVecInst<(outs regclassv4:$dst), |
| 480 | (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), |
| 481 | V4MADStr<asmstr>.s, |
| 482 | [(set regclassv4:$dst, |
| 483 | (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], |
| 484 | sop>, |
| 485 | Requires<[Pred]>; |
| 486 | def V2 : NVPTXVecInst<(outs regclassv2:$dst), |
| 487 | (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), |
| 488 | V2MADStr<asmstr>.s, |
| 489 | [(set regclassv2:$dst, |
| 490 | (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], |
| 491 | sop>, |
| 492 | Requires<[Pred]>; |
| 493 | } |
| 494 | |
| 495 | multiclass VMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, |
| 496 | Predicate Pred> { |
| 497 | def V2 : NVPTXVecInst<(outs regclass:$dst), |
| 498 | (ins regclass:$a, regclass:$b, regclass:$c), |
| 499 | V2MADStr<asmstr>.s, |
| 500 | [(set regclass:$dst, (add |
| 501 | (mul regclass:$a, regclass:$b), regclass:$c))], sop>, |
| 502 | Requires<[Pred]>; |
| 503 | } |
| 504 | multiclass VFMADV2Only<string asmstr, NVPTXRegClass regclass, NVPTXInst sop=NOP, |
| 505 | Predicate Pred> { |
| 506 | def V2 : NVPTXVecInst<(outs regclass:$dst), |
| 507 | (ins regclass:$a, regclass:$b, regclass:$c), |
| 508 | V2MADStr<asmstr>.s, |
| 509 | [(set regclass:$dst, (fadd |
| 510 | (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, |
| 511 | Requires<[Pred]>; |
| 512 | } |
| 513 | |
| 514 | let VecInstType=isVecOther.Value in { |
| 515 | defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; |
| 516 | defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, |
| 517 | true>; |
| 518 | defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, |
| 519 | true>; |
| 520 | defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; |
| 521 | |
| 522 | defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; |
| 523 | |
| 524 | defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; |
| 525 | defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; |
| 526 | defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; |
| 527 | |
| 528 | defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, |
| 529 | FMAD32_ftzrrr, doFMADF32_ftz>; |
| 530 | defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, |
| 531 | FMA32_ftzrrr, doFMAF32_ftz>; |
| 532 | defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, |
| 533 | doFMADF32>; |
| 534 | defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, |
| 535 | doFMAF32>; |
| 536 | defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; |
| 537 | } |
| 538 | |
| 539 | let VecInstType=isVecOther.Value in { |
| 540 | def V4F32Div_prec_ftz : VecBinaryOp<V4AsmStr<"div.rn.ftz.f32">, fdiv, V4F32Regs, |
| 541 | FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; |
| 542 | def V2F32Div_prec_ftz : VecBinaryOp<V2AsmStr<"div.rn.ftz.f32">, fdiv, V2F32Regs, |
| 543 | FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; |
| 544 | def V4F32Div_prec : VecBinaryOp<V4AsmStr<"div.rn.f32">, fdiv, V4F32Regs, |
| 545 | FDIV32rr_prec>, Requires<[reqPTX20]>; |
| 546 | def V2F32Div_prec : VecBinaryOp<V2AsmStr<"div.rn.f32">, fdiv, V2F32Regs, |
| 547 | FDIV32rr_prec>, Requires<[reqPTX20]>; |
| 548 | def V2F32Div_ftz : VecBinaryOp<V2AsmStr<"div.full.ftz.f32">, fdiv, V2F32Regs, |
| 549 | FDIV32rr_ftz>, Requires<[doF32FTZ]>; |
| 550 | def V4F32Div_ftz : VecBinaryOp<V4AsmStr<"div.full.ftz.f32">, fdiv, V4F32Regs, |
| 551 | FDIV32rr_ftz>, Requires<[doF32FTZ]>; |
| 552 | def V2F32Div : VecBinaryOp<V2AsmStr<"div.full.f32">, fdiv, V2F32Regs, FDIV32rr>; |
| 553 | def V4F32Div : VecBinaryOp<V4AsmStr<"div.full.f32">, fdiv, V4F32Regs, FDIV32rr>; |
| 554 | def V2F64Div : VecBinaryOp<V2AsmStr<"div.rn.f64">, fdiv, V2F64Regs, FDIV64rr>; |
| 555 | } |
| 556 | |
| 557 | def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; |
| 558 | |
| 559 | let VecInstType=isVecOther.Value in { |
| 560 | def VNegv2f32_ftz : VecUnaryOp<V2UnaryStr<"neg.ftz.f32">, fnegpat, V2F32Regs, |
| 561 | FNEGf32_ftz>, Requires<[doF32FTZ]>; |
| 562 | def VNegv4f32_ftz : VecUnaryOp<V4UnaryStr<"neg.ftz.f32">, fnegpat, V4F32Regs, |
| 563 | FNEGf32_ftz>, Requires<[doF32FTZ]>; |
| 564 | def VNegv2f32 : VecUnaryOp<V2UnaryStr<"neg.f32">, fnegpat, V2F32Regs, FNEGf32>; |
| 565 | def VNegv4f32 : VecUnaryOp<V4UnaryStr<"neg.f32">, fnegpat, V4F32Regs, FNEGf32>; |
| 566 | def VNegv2f64 : VecUnaryOp<V2UnaryStr<"neg.f64">, fnegpat, V2F64Regs, FNEGf64>; |
| 567 | |
| 568 | // Logical Arithmetic |
| 569 | defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; |
| 570 | defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; |
| 571 | defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; |
| 572 | |
| 573 | defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; |
| 574 | } |
| 575 | |
| 576 | |
| 577 | multiclass V2FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { |
| 578 | def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), |
| 579 | (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, |
| 580 | Requires<[Pred]>; |
| 581 | |
| 582 | def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), |
| 583 | (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, |
| 584 | Requires<[Pred]>; |
| 585 | } |
| 586 | |
| 587 | defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32FMA_ftzV2, doFMAF32AGG_ftz>; |
| 588 | defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT<F32MAD_ftzV2, doFMADF32_ftz>; |
| 589 | defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT<F32FMAV2, doFMAF32AGG>; |
| 590 | defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT<F32MADV2, doFMADF32>; |
| 591 | |
| 592 | multiclass V4FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { |
| 593 | def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), |
| 594 | (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, |
| 595 | Requires<[Pred]>; |
| 596 | |
| 597 | def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), |
| 598 | (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, |
| 599 | Requires<[Pred]>; |
| 600 | } |
| 601 | |
| 602 | defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32FMA_ftzV4, doFMAF32AGG_ftz>; |
| 603 | defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT<F32MAD_ftzV4, doFMADF32_ftz>; |
| 604 | defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT<F32FMAV4, doFMAF32AGG>; |
| 605 | defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT<F32MADV4, doFMADF32>; |
| 606 | |
| 607 | multiclass V2FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { |
| 608 | def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), |
| 609 | (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, |
| 610 | Requires<[Pred]>; |
| 611 | |
| 612 | def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), |
| 613 | (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, |
| 614 | Requires<[Pred]>; |
| 615 | } |
| 616 | |
| 617 | defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT<F64FMAV2, doFMAF64AGG>; |
| 618 | |
| 619 | class VecModStr<string vecsize, string elem, string extra, string l=""> |
| 620 | { |
| 621 | string t1 = !strconcat("${c", elem); |
| 622 | string t2 = !strconcat(t1, ":vecv"); |
| 623 | string t3 = !strconcat(t2, vecsize); |
| 624 | string t4 = !strconcat(t3, extra); |
| 625 | string t5 = !strconcat(t4, l); |
| 626 | string s = !strconcat(t5, "}"); |
| 627 | } |
| 628 | class ShuffleOneLine<string vecsize, string elem, string type> |
| 629 | { |
| 630 | string t1 = VecModStr<vecsize, elem, "comm", "1">.s; |
| 631 | string t2 = !strconcat(t1, "mov."); |
| 632 | string t3 = !strconcat(t2, type); |
| 633 | string t4 = !strconcat(t3, " \t${dst}_"); |
| 634 | string t5 = !strconcat(t4, elem); |
| 635 | string t6 = !strconcat(t5, ", $src1"); |
| 636 | string t7 = !strconcat(t6, VecModStr<vecsize, elem, "pos">.s); |
| 637 | string t8 = !strconcat(t7, ";\n\t"); |
| 638 | string t9 = !strconcat(t8, VecModStr<vecsize, elem, "comm", "2">.s); |
| 639 | string t10 = !strconcat(t9, "mov."); |
| 640 | string t11 = !strconcat(t10, type); |
| 641 | string t12 = !strconcat(t11, " \t${dst}_"); |
| 642 | string t13 = !strconcat(t12, elem); |
| 643 | string t14 = !strconcat(t13, ", $src2"); |
| 644 | string t15 = !strconcat(t14, VecModStr<vecsize, elem, "pos">.s); |
| 645 | string s = !strconcat(t15, ";"); |
| 646 | } |
| 647 | class ShuffleAsmStr2<string type> |
| 648 | { |
| 649 | string t1 = ShuffleOneLine<"2", "0", type>.s; |
| 650 | string t2 = !strconcat(t1, "\n\t"); |
| 651 | string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); |
| 652 | } |
| 653 | class ShuffleAsmStr4<string type> |
| 654 | { |
| 655 | string t1 = ShuffleOneLine<"4", "0", type>.s; |
| 656 | string t2 = !strconcat(t1, "\n\t"); |
| 657 | string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); |
| 658 | string t4 = !strconcat(t3, "\n\t"); |
| 659 | string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); |
| 660 | string t6 = !strconcat(t5, "\n\t"); |
| 661 | string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); |
| 662 | } |
| 663 | |
| 664 | let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { |
| 665 | def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), |
| 666 | (ins V4F32Regs:$src1, V4F32Regs:$src2, |
| 667 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 668 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 669 | ShuffleAsmStr4<"f32">.s), |
| 670 | [], FMOV32rr>; |
| 671 | |
| 672 | def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), |
| 673 | (ins V4I32Regs:$src1, V4I32Regs:$src2, |
| 674 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 675 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 676 | ShuffleAsmStr4<"u32">.s), |
| 677 | [], IMOV32rr>; |
| 678 | |
| 679 | def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), |
| 680 | (ins V4I16Regs:$src1, V4I16Regs:$src2, |
| 681 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 682 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 683 | ShuffleAsmStr4<"u16">.s), |
| 684 | [], IMOV16rr>; |
| 685 | |
| 686 | def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), |
| 687 | (ins V4I8Regs:$src1, V4I8Regs:$src2, |
| 688 | i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), |
| 689 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", |
| 690 | ShuffleAsmStr4<"u16">.s), |
| 691 | [], IMOV8rr>; |
| 692 | |
| 693 | def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), |
| 694 | (ins V2F32Regs:$src1, V2F32Regs:$src2, |
| 695 | i8imm:$c0, i8imm:$c1), |
| 696 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 697 | ShuffleAsmStr2<"f32">.s), |
| 698 | [], FMOV32rr>; |
| 699 | |
| 700 | def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), |
| 701 | (ins V2I32Regs:$src1, V2I32Regs:$src2, |
| 702 | i8imm:$c0, i8imm:$c1), |
| 703 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 704 | ShuffleAsmStr2<"u32">.s), |
| 705 | [], IMOV32rr>; |
| 706 | |
| 707 | def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), |
| 708 | (ins V2I8Regs:$src1, V2I8Regs:$src2, |
| 709 | i8imm:$c0, i8imm:$c1), |
| 710 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 711 | ShuffleAsmStr2<"u16">.s), |
| 712 | [], IMOV8rr>; |
| 713 | |
| 714 | def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), |
| 715 | (ins V2I16Regs:$src1, V2I16Regs:$src2, |
| 716 | i8imm:$c0, i8imm:$c1), |
| 717 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 718 | ShuffleAsmStr2<"u16">.s), |
| 719 | [], IMOV16rr>; |
| 720 | |
| 721 | def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), |
| 722 | (ins V2F64Regs:$src1, V2F64Regs:$src2, |
| 723 | i8imm:$c0, i8imm:$c1), |
| 724 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 725 | ShuffleAsmStr2<"f64">.s), |
| 726 | [], FMOV64rr>; |
| 727 | |
| 728 | def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), |
| 729 | (ins V2I64Regs:$src1, V2I64Regs:$src2, |
| 730 | i8imm:$c0, i8imm:$c1), |
| 731 | !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", |
| 732 | ShuffleAsmStr2<"u64">.s), |
| 733 | [], IMOV64rr>; |
| 734 | } |
| 735 | |
| 736 | def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ |
| 737 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| 738 | return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32); |
| 739 | }]>; |
| 740 | def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ |
| 741 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| 742 | return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32); |
| 743 | }]>; |
| 744 | def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ |
| 745 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| 746 | return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32); |
| 747 | }]>; |
| 748 | def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ |
| 749 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| 750 | return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32); |
| 751 | }]>; |
| 752 | |
| 753 | // The spurious call is here to silence a compiler warning about N being |
| 754 | // unused. |
| 755 | def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), |
| 756 | (vector_shuffle node:$lhs, node:$rhs), |
| 757 | [{ N->getGluedNode(); return true; }]>; |
| 758 | |
| 759 | def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), |
| 760 | (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, |
| 761 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 762 | |
| 763 | def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), |
| 764 | (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, |
| 765 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 766 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 767 | |
| 768 | def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), |
| 769 | (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, |
| 770 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 771 | |
| 772 | def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), |
| 773 | (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, |
| 774 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 775 | |
| 776 | def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), |
| 777 | (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, |
| 778 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 779 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 780 | |
| 781 | def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), |
| 782 | (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, |
| 783 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 784 | |
| 785 | def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), |
| 786 | (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, |
| 787 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 788 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 789 | |
| 790 | def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), |
| 791 | (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, |
| 792 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 793 | |
| 794 | def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), |
| 795 | (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, |
| 796 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), |
| 797 | (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; |
| 798 | |
| 799 | def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), |
| 800 | (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, |
| 801 | (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; |
| 802 | |
| 803 | class Build_Vector2<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, |
| 804 | NVPTXInst si> |
| 805 | : NVPTXVecInst<(outs vclass:$dst), |
| 806 | (ins sclass:$a1, sclass:$a2), |
| 807 | !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), |
| 808 | [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], |
| 809 | si>; |
| 810 | class Build_Vector4<string asmstr, NVPTXRegClass vclass, NVPTXRegClass sclass, |
| 811 | NVPTXInst si> |
| 812 | : NVPTXVecInst<(outs vclass:$dst), |
| 813 | (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), |
| 814 | !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), |
| 815 | [(set vclass:$dst, |
| 816 | (build_vector sclass:$a1, sclass:$a2, |
| 817 | sclass:$a3, sclass:$a4))], si>; |
| 818 | |
| 819 | let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { |
| 820 | def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, |
| 821 | FMOV32rr>; |
| 822 | def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, |
| 823 | FMOV64rr>; |
| 824 | |
| 825 | def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, |
| 826 | IMOV32rr>; |
| 827 | def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, |
| 828 | IMOV64rr>; |
| 829 | def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, |
| 830 | IMOV16rr>; |
| 831 | def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, |
| 832 | IMOV8rr>; |
| 833 | |
| 834 | def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, |
| 835 | FMOV32rr>; |
| 836 | |
| 837 | def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, |
| 838 | IMOV32rr>; |
| 839 | def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, |
| 840 | IMOV16rr>; |
| 841 | def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, |
| 842 | IMOV8rr>; |
| 843 | } |
| 844 | |
| 845 | class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> |
| 846 | : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), |
| 847 | !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), |
| 848 | [], sop>; |
| 849 | |
| 850 | let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, |
| 851 | VecInstType=isVecOther.Value in { |
| 852 | def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; |
| 853 | def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; |
| 854 | |
| 855 | def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; |
| 856 | def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; |
| 857 | |
| 858 | def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; |
| 859 | def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; |
| 860 | |
| 861 | def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; |
| 862 | def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; |
| 863 | |
| 864 | def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; |
| 865 | def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; |
| 866 | } |
| 867 | |
| 868 | // extract subvector patterns |
| 869 | def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", |
| 870 | SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; |
| 871 | |
| 872 | def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), |
| 873 | (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), |
| 874 | (V4f32Extract V4F32Regs:$src, 1))>; |
| 875 | def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), |
| 876 | (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), |
| 877 | (V4f32Extract V4F32Regs:$src, 3))>; |
| 878 | def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), |
| 879 | (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), |
| 880 | (V4i32Extract V4I32Regs:$src, 1))>; |
| 881 | def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), |
| 882 | (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), |
| 883 | (V4i32Extract V4I32Regs:$src, 3))>; |
| 884 | def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), |
| 885 | (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), |
| 886 | (V4i16Extract V4I16Regs:$src, 1))>; |
| 887 | def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), |
| 888 | (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), |
| 889 | (V4i16Extract V4I16Regs:$src, 3))>; |
| 890 | def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), |
| 891 | (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), |
| 892 | (V4i8Extract V4I8Regs:$src, 1))>; |
| 893 | def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), |
| 894 | (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), |
| 895 | (V4i8Extract V4I8Regs:$src, 3))>; |
| 896 | |
| 897 | // Select instructions |
| 898 | class Select_OneLine<string type, string pos> { |
| 899 | string t1 = !strconcat("selp.", type); |
| 900 | string t2 = !strconcat(t1, " \t${dst}_"); |
| 901 | string t3 = !strconcat(t2, pos); |
| 902 | string t4 = !strconcat(t3, ", ${src1}_"); |
| 903 | string t5 = !strconcat(t4, pos); |
| 904 | string t6 = !strconcat(t5, ", ${src2}_"); |
| 905 | string t7 = !strconcat(t6, pos); |
| 906 | string s = !strconcat(t7, ", $p;"); |
| 907 | } |
| 908 | |
| 909 | class Select_Str2<string type> { |
| 910 | string t1 = Select_OneLine<type, "0">.s; |
| 911 | string t2 = !strconcat(t1, "\n\t"); |
| 912 | string s = !strconcat(t2, Select_OneLine<type, "1">.s); |
| 913 | } |
| 914 | |
| 915 | class Select_Str4<string type> { |
| 916 | string t1 = Select_OneLine<type, "0">.s; |
| 917 | string t2 = !strconcat(t1, "\n\t"); |
| 918 | string t3 = !strconcat(t2, Select_OneLine<type, "1">.s); |
| 919 | string t4 = !strconcat(t3, "\n\t"); |
| 920 | string t5 = !strconcat(t4, Select_OneLine<type, "2">.s); |
| 921 | string t6 = !strconcat(t5, "\n\t"); |
| 922 | string s = !strconcat(t6, Select_OneLine<type, "3">.s); |
| 923 | |
| 924 | } |
| 925 | |
| 926 | class Vec_Select<NVPTXRegClass vclass, string asmstr, NVPTXInst sop> |
| 927 | : NVPTXVecInst<(outs vclass:$dst), |
| 928 | (ins vclass:$src1, vclass:$src2, Int1Regs:$p), |
| 929 | asmstr, |
| 930 | [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, |
| 931 | vclass:$src2))], |
| 932 | sop>; |
| 933 | |
| 934 | let VecInstType=isVecOther.Value in { |
| 935 | def V2I64_Select : Vec_Select<V2I64Regs, Select_Str2<"b64">.s, SELECTi64rr>; |
| 936 | def V4I32_Select : Vec_Select<V4I32Regs, Select_Str4<"b32">.s, SELECTi32rr>; |
| 937 | def V2I32_Select : Vec_Select<V2I32Regs, Select_Str2<"b32">.s, SELECTi32rr>; |
| 938 | def V4I16_Select : Vec_Select<V4I16Regs, Select_Str4<"b16">.s, SELECTi16rr>; |
| 939 | def V2I16_Select : Vec_Select<V2I16Regs, Select_Str2<"b16">.s, SELECTi16rr>; |
| 940 | def V4I8_Select : Vec_Select<V4I8Regs, Select_Str4<"b16">.s, SELECTi8rr>; |
| 941 | def V2I8_Select : Vec_Select<V2I8Regs, Select_Str2<"b16">.s, SELECTi8rr>; |
| 942 | |
| 943 | def V2F64_Select : Vec_Select<V2F64Regs, Select_Str2<"f64">.s, SELECTf64rr>; |
| 944 | def V4F32_Select : Vec_Select<V4F32Regs, Select_Str4<"f32">.s, SELECTf32rr>; |
| 945 | def V2F32_Select : Vec_Select<V2F32Regs, Select_Str2<"f32">.s, SELECTf32rr>; |
| 946 | } |
| 947 | |
| 948 | // Comparison instructions |
| 949 | |
| 950 | // setcc convenience fragments. |
| 951 | def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), |
| 952 | (setcc node:$lhs, node:$rhs, SETOEQ)>; |
| 953 | def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), |
| 954 | (setcc node:$lhs, node:$rhs, SETOGT)>; |
| 955 | def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), |
| 956 | (setcc node:$lhs, node:$rhs, SETOGE)>; |
| 957 | def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), |
| 958 | (setcc node:$lhs, node:$rhs, SETOLT)>; |
| 959 | def vsetole : PatFrag<(ops node:$lhs, node:$rhs), |
| 960 | (setcc node:$lhs, node:$rhs, SETOLE)>; |
| 961 | def vsetone : PatFrag<(ops node:$lhs, node:$rhs), |
| 962 | (setcc node:$lhs, node:$rhs, SETONE)>; |
| 963 | def vseto : PatFrag<(ops node:$lhs, node:$rhs), |
| 964 | (setcc node:$lhs, node:$rhs, SETO)>; |
| 965 | def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), |
| 966 | (setcc node:$lhs, node:$rhs, SETUO)>; |
| 967 | def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), |
| 968 | (setcc node:$lhs, node:$rhs, SETUEQ)>; |
| 969 | def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), |
| 970 | (setcc node:$lhs, node:$rhs, SETUGT)>; |
| 971 | def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), |
| 972 | (setcc node:$lhs, node:$rhs, SETUGE)>; |
| 973 | def vsetult : PatFrag<(ops node:$lhs, node:$rhs), |
| 974 | (setcc node:$lhs, node:$rhs, SETULT)>; |
| 975 | def vsetule : PatFrag<(ops node:$lhs, node:$rhs), |
| 976 | (setcc node:$lhs, node:$rhs, SETULE)>; |
| 977 | def vsetune : PatFrag<(ops node:$lhs, node:$rhs), |
| 978 | (setcc node:$lhs, node:$rhs, SETUNE)>; |
| 979 | def vseteq : PatFrag<(ops node:$lhs, node:$rhs), |
| 980 | (setcc node:$lhs, node:$rhs, SETEQ)>; |
| 981 | def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), |
| 982 | (setcc node:$lhs, node:$rhs, SETGT)>; |
| 983 | def vsetge : PatFrag<(ops node:$lhs, node:$rhs), |
| 984 | (setcc node:$lhs, node:$rhs, SETGE)>; |
| 985 | def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), |
| 986 | (setcc node:$lhs, node:$rhs, SETLT)>; |
| 987 | def vsetle : PatFrag<(ops node:$lhs, node:$rhs), |
| 988 | (setcc node:$lhs, node:$rhs, SETLE)>; |
| 989 | def vsetne : PatFrag<(ops node:$lhs, node:$rhs), |
| 990 | (setcc node:$lhs, node:$rhs, SETNE)>; |
| 991 | |
| 992 | class Vec_Compare<PatFrag op, NVPTXRegClass outrclass, NVPTXRegClass inrclass, |
| 993 | NVPTXInst sop> |
| 994 | : NVPTXVecInst<(outs outrclass:$dst), |
| 995 | (ins inrclass:$a, inrclass:$b), |
| 996 | "Unsupported", |
| 997 | [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], |
| 998 | sop>; |
| 999 | |
| 1000 | multiclass Vec_Compare_All<PatFrag op, |
| 1001 | NVPTXInst inst8, |
| 1002 | NVPTXInst inst16, |
| 1003 | NVPTXInst inst32, |
| 1004 | NVPTXInst inst64> |
| 1005 | { |
| 1006 | def V2I8 : Vec_Compare<op, V2I8Regs, V2I8Regs, inst8>; |
| 1007 | def V4I8 : Vec_Compare<op, V4I8Regs, V4I8Regs, inst8>; |
| 1008 | def V2I16 : Vec_Compare<op, V2I16Regs, V2I16Regs, inst16>; |
| 1009 | def V4I16 : Vec_Compare<op, V4I16Regs, V4I16Regs, inst16>; |
| 1010 | def V2I32 : Vec_Compare<op, V2I32Regs, V2I32Regs, inst32>; |
| 1011 | def V4I32 : Vec_Compare<op, V4I32Regs, V4I32Regs, inst32>; |
| 1012 | def V2I64 : Vec_Compare<op, V2I64Regs, V2I64Regs, inst64>; |
| 1013 | } |
| 1014 | |
| 1015 | let VecInstType=isVecOther.Value in { |
| 1016 | defm VecSGT : Vec_Compare_All<vsetgt, ISetSGTi8rr_toi8, ISetSGTi16rr_toi16, |
| 1017 | ISetSGTi32rr_toi32, ISetSGTi64rr_toi64>; |
| 1018 | defm VecUGT : Vec_Compare_All<vsetugt, ISetUGTi8rr_toi8, ISetUGTi16rr_toi16, |
| 1019 | ISetUGTi32rr_toi32, ISetUGTi64rr_toi64>; |
| 1020 | defm VecSLT : Vec_Compare_All<vsetlt, ISetSLTi8rr_toi8, ISetSLTi16rr_toi16, |
| 1021 | ISetSLTi32rr_toi32, ISetSLTi64rr_toi64>; |
| 1022 | defm VecULT : Vec_Compare_All<vsetult, ISetULTi8rr_toi8, ISetULTi16rr_toi16, |
| 1023 | ISetULTi32rr_toi32, ISetULTi64rr_toi64>; |
| 1024 | defm VecSGE : Vec_Compare_All<vsetge, ISetSGEi8rr_toi8, ISetSGEi16rr_toi16, |
| 1025 | ISetSGEi32rr_toi32, ISetSGEi64rr_toi64>; |
| 1026 | defm VecUGE : Vec_Compare_All<vsetuge, ISetUGEi8rr_toi8, ISetUGEi16rr_toi16, |
| 1027 | ISetUGEi32rr_toi32, ISetUGEi64rr_toi64>; |
| 1028 | defm VecSLE : Vec_Compare_All<vsetle, ISetSLEi8rr_toi8, ISetSLEi16rr_toi16, |
| 1029 | ISetSLEi32rr_toi32, ISetSLEi64rr_toi64>; |
| 1030 | defm VecULE : Vec_Compare_All<vsetule, ISetULEi8rr_toi8, ISetULEi16rr_toi16, |
| 1031 | ISetULEi32rr_toi32, ISetULEi64rr_toi64>; |
| 1032 | defm VecSEQ : Vec_Compare_All<vseteq, ISetSEQi8rr_toi8, ISetSEQi16rr_toi16, |
| 1033 | ISetSEQi32rr_toi32, ISetSEQi64rr_toi64>; |
| 1034 | defm VecUEQ : Vec_Compare_All<vsetueq, ISetUEQi8rr_toi8, ISetUEQi16rr_toi16, |
| 1035 | ISetUEQi32rr_toi32, ISetUEQi64rr_toi64>; |
| 1036 | defm VecSNE : Vec_Compare_All<vsetne, ISetSNEi8rr_toi8, ISetSNEi16rr_toi16, |
| 1037 | ISetSNEi32rr_toi32, ISetSNEi64rr_toi64>; |
| 1038 | defm VecUNE : Vec_Compare_All<vsetune, ISetUNEi8rr_toi8, ISetUNEi16rr_toi16, |
| 1039 | ISetUNEi32rr_toi32, ISetUNEi64rr_toi64>; |
| 1040 | } |
| 1041 | |
| 1042 | multiclass FVec_Compare_All<PatFrag op, |
| 1043 | NVPTXInst instf32, |
| 1044 | NVPTXInst instf64> |
| 1045 | { |
| 1046 | def V2F32 : Vec_Compare<op, V2I32Regs, V2F32Regs, instf32>; |
| 1047 | def V4F32 : Vec_Compare<op, V4I32Regs, V4F32Regs, instf32>; |
| 1048 | def V2F64 : Vec_Compare<op, V2I64Regs, V2F64Regs, instf64>; |
| 1049 | } |
| 1050 | |
| 1051 | let VecInstType=isVecOther.Value in { |
| 1052 | defm FVecGT : FVec_Compare_All<vsetogt, FSetGTf32rr_toi32, |
| 1053 | FSetGTf64rr_toi64>; |
| 1054 | defm FVecLT : FVec_Compare_All<vsetolt, FSetLTf32rr_toi32, |
| 1055 | FSetLTf64rr_toi64>; |
| 1056 | defm FVecGE : FVec_Compare_All<vsetoge, FSetGEf32rr_toi32, |
| 1057 | FSetGEf64rr_toi64>; |
| 1058 | defm FVecLE : FVec_Compare_All<vsetole, FSetLEf32rr_toi32, |
| 1059 | FSetLEf64rr_toi64>; |
| 1060 | defm FVecEQ : FVec_Compare_All<vsetoeq, FSetEQf32rr_toi32, |
| 1061 | FSetEQf64rr_toi64>; |
| 1062 | defm FVecNE : FVec_Compare_All<vsetone, FSetNEf32rr_toi32, |
| 1063 | FSetNEf64rr_toi64>; |
| 1064 | |
| 1065 | defm FVecUGT : FVec_Compare_All<vsetugt, FSetUGTf32rr_toi32, |
| 1066 | FSetUGTf64rr_toi64>; |
| 1067 | defm FVecULT : FVec_Compare_All<vsetult, FSetULTf32rr_toi32, |
| 1068 | FSetULTf64rr_toi64>; |
| 1069 | defm FVecUGE : FVec_Compare_All<vsetuge, FSetUGEf32rr_toi32, |
| 1070 | FSetUGEf64rr_toi64>; |
| 1071 | defm FVecULE : FVec_Compare_All<vsetule, FSetULEf32rr_toi32, |
| 1072 | FSetULEf64rr_toi64>; |
| 1073 | defm FVecUEQ : FVec_Compare_All<vsetueq, FSetUEQf32rr_toi32, |
| 1074 | FSetUEQf64rr_toi64>; |
| 1075 | defm FVecUNE : FVec_Compare_All<vsetune, FSetUNEf32rr_toi32, |
| 1076 | FSetUNEf64rr_toi64>; |
| 1077 | |
| 1078 | defm FVecNUM : FVec_Compare_All<vseto, FSetNUMf32rr_toi32, |
| 1079 | FSetNUMf64rr_toi64>; |
| 1080 | defm FVecNAN : FVec_Compare_All<vsetuo, FSetNANf32rr_toi32, |
| 1081 | FSetNANf64rr_toi64>; |
| 1082 | } |
| 1083 | |
| 1084 | class LoadParamScalar4Inst<NVPTXRegClass regclass, string opstr> : |
| 1085 | NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), |
| 1086 | (ins i32imm:$a, i32imm:$b), |
| 1087 | !strconcat(!strconcat("ld.param", opstr), |
| 1088 | "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; |
| 1089 | |
| 1090 | class LoadParamScalar2Inst<NVPTXRegClass regclass, string opstr> : |
| 1091 | NVPTXInst<(outs regclass:$d1, regclass:$d2), |
| 1092 | (ins i32imm:$a, i32imm:$b), |
| 1093 | !strconcat(!strconcat("ld.param", opstr), |
| 1094 | "\t{{$d1, $d2}}, [retval0+$b];"), []>; |
| 1095 | |
| 1096 | |
| 1097 | class StoreParamScalar4Inst<NVPTXRegClass regclass, string opstr> : |
| 1098 | NVPTXInst<(outs), |
| 1099 | (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, |
| 1100 | i32imm:$a, i32imm:$b), |
| 1101 | !strconcat(!strconcat("st.param", opstr), |
| 1102 | "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; |
| 1103 | |
| 1104 | class StoreParamScalar2Inst<NVPTXRegClass regclass, string opstr> : |
| 1105 | NVPTXInst<(outs), |
| 1106 | (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), |
| 1107 | !strconcat(!strconcat("st.param", opstr), |
| 1108 | "\t[param$a+$b], {{$s1, $s2}};"), []>; |
| 1109 | |
| 1110 | class StoreRetvalScalar4Inst<NVPTXRegClass regclass, string opstr> : |
| 1111 | NVPTXInst<(outs), |
| 1112 | (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, |
| 1113 | i32imm:$a), |
| 1114 | !strconcat(!strconcat("st.param", opstr), |
| 1115 | "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; |
| 1116 | |
| 1117 | class StoreRetvalScalar2Inst<NVPTXRegClass regclass, string opstr> : |
| 1118 | NVPTXInst<(outs), |
| 1119 | (ins regclass:$s1, regclass:$s2, i32imm:$a), |
| 1120 | !strconcat(!strconcat("st.param", opstr), |
| 1121 | "\t[func_retval+$a], {{$s1, $s2}};"), []>; |
| 1122 | |
| 1123 | def LoadParamScalar4I32 : LoadParamScalar4Inst<Int32Regs, ".v4.b32">; |
| 1124 | def LoadParamScalar4I16 : LoadParamScalar4Inst<Int16Regs, ".v4.b16">; |
| 1125 | def LoadParamScalar4I8 : LoadParamScalar4Inst<Int8Regs, ".v4.b8">; |
| 1126 | |
| 1127 | def LoadParamScalar2I64 : LoadParamScalar2Inst<Int32Regs, ".v2.b64">; |
| 1128 | def LoadParamScalar2I32 : LoadParamScalar2Inst<Int32Regs, ".v2.b32">; |
| 1129 | def LoadParamScalar2I16 : LoadParamScalar2Inst<Int32Regs, ".v2.b16">; |
| 1130 | def LoadParamScalar2I8 : LoadParamScalar2Inst<Int32Regs, ".v2.b8">; |
| 1131 | |
| 1132 | def LoadParamScalar4F32 : LoadParamScalar4Inst<Float32Regs, ".v4.f32">; |
| 1133 | def LoadParamScalar2F32 : LoadParamScalar2Inst<Float32Regs, ".v2.f32">; |
| 1134 | def LoadParamScalar2F64 : LoadParamScalar2Inst<Float64Regs, ".v2.f64">; |
| 1135 | |
| 1136 | def StoreParamScalar4I32 : StoreParamScalar4Inst<Int32Regs, ".v4.b32">; |
| 1137 | def StoreParamScalar4I16 : StoreParamScalar4Inst<Int16Regs, ".v4.b16">; |
| 1138 | def StoreParamScalar4I8 : StoreParamScalar4Inst<Int8Regs, ".v4.b8">; |
| 1139 | |
| 1140 | def StoreParamScalar2I64 : StoreParamScalar2Inst<Int64Regs, ".v2.b64">; |
| 1141 | def StoreParamScalar2I32 : StoreParamScalar2Inst<Int32Regs, ".v2.b32">; |
| 1142 | def StoreParamScalar2I16 : StoreParamScalar2Inst<Int16Regs, ".v2.b16">; |
| 1143 | def StoreParamScalar2I8 : StoreParamScalar2Inst<Int8Regs, ".v2.b8">; |
| 1144 | |
| 1145 | def StoreParamScalar4F32 : StoreParamScalar4Inst<Float32Regs, ".v4.f32">; |
| 1146 | def StoreParamScalar2F32 : StoreParamScalar2Inst<Float32Regs, ".v2.f32">; |
| 1147 | def StoreParamScalar2F64 : StoreParamScalar2Inst<Float64Regs, ".v2.f64">; |
| 1148 | |
| 1149 | def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst<Int32Regs, ".v4.b32">; |
| 1150 | def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst<Int16Regs, ".v4.b16">; |
| 1151 | def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst<Int8Regs, ".v4.b8">; |
| 1152 | |
| 1153 | def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst<Int64Regs, ".v2.b64">; |
| 1154 | def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst<Int32Regs, ".v2.b32">; |
| 1155 | def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst<Int16Regs, ".v2.b16">; |
| 1156 | def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst<Int8Regs, ".v2.b8">; |
| 1157 | |
| 1158 | def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst<Float32Regs, ".v4.f32">; |
| 1159 | def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst<Float32Regs, ".v2.f32">; |
| 1160 | def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst<Float64Regs, ".v2.f64">; |
| 1161 | |
| 1162 | class LoadParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP>: |
| 1163 | NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), |
| 1164 | "loadparam : $dst <- [$a, $b]", |
| 1165 | [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], |
| 1166 | sop>; |
| 1167 | |
| 1168 | class StoreParamVecInst<NVPTXRegClass regclass, string opstr, NVPTXInst sop=NOP> |
| 1169 | : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), |
| 1170 | "storeparam : [$a, $b] <- $val", |
| 1171 | [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; |
| 1172 | |
| 1173 | class StoreRetvalVecInst<NVPTXRegClass regclass, string opstr, |
| 1174 | NVPTXInst sop=NOP> |
| 1175 | : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), |
| 1176 | "storeretval : retval[$a] <- $val", |
| 1177 | [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; |
| 1178 | |
| 1179 | let VecInstType=isVecLD.Value in { |
| 1180 | def LoadParamV4I32 : LoadParamVecInst<V4I32Regs, ".v4.b32", |
| 1181 | LoadParamScalar4I32>; |
| 1182 | def LoadParamV4I16 : LoadParamVecInst<V4I16Regs, ".v4.b16", |
| 1183 | LoadParamScalar4I16>; |
| 1184 | def LoadParamV4I8 : LoadParamVecInst<V4I8Regs, ".v4.b8", |
| 1185 | LoadParamScalar4I8>; |
| 1186 | |
| 1187 | def LoadParamV2I64 : LoadParamVecInst<V2I64Regs, ".v2.b64", |
| 1188 | LoadParamScalar2I64>; |
| 1189 | def LoadParamV2I32 : LoadParamVecInst<V2I32Regs, ".v2.b32", |
| 1190 | LoadParamScalar2I32>; |
| 1191 | def LoadParamV2I16 : LoadParamVecInst<V2I16Regs, ".v2.b16", |
| 1192 | LoadParamScalar2I16>; |
| 1193 | def LoadParamV2I8 : LoadParamVecInst<V2I8Regs, ".v2.b8", |
| 1194 | LoadParamScalar2I8>; |
| 1195 | |
| 1196 | def LoadParamV4F32 : LoadParamVecInst<V4F32Regs, ".v4.f32", |
| 1197 | LoadParamScalar4F32>; |
| 1198 | def LoadParamV2F32 : LoadParamVecInst<V2F32Regs, ".v2.f32", |
| 1199 | LoadParamScalar2F32>; |
| 1200 | def LoadParamV2F64 : LoadParamVecInst<V2F64Regs, ".v2.f64", |
| 1201 | LoadParamScalar2F64>; |
| 1202 | } |
| 1203 | |
| 1204 | let VecInstType=isVecST.Value in { |
| 1205 | def StoreParamV4I32 : StoreParamVecInst<V4I32Regs, ".v4.b32", |
| 1206 | StoreParamScalar4I32>; |
| 1207 | def StoreParamV4I16 : StoreParamVecInst<V4I16Regs, ".v4.b16", |
| 1208 | StoreParamScalar4I16>; |
| 1209 | def StoreParamV4I8 : StoreParamVecInst<V4I8Regs, ".v4.b8", |
| 1210 | StoreParamScalar4I8>; |
| 1211 | |
| 1212 | def StoreParamV2I64 : StoreParamVecInst<V2I64Regs, ".v2.b64", |
| 1213 | StoreParamScalar2I64>; |
| 1214 | def StoreParamV2I32 : StoreParamVecInst<V2I32Regs, ".v2.b32", |
| 1215 | StoreParamScalar2I32>; |
| 1216 | def StoreParamV2I16 : StoreParamVecInst<V2I16Regs, ".v2.b16", |
| 1217 | StoreParamScalar2I16>; |
| 1218 | def StoreParamV2I8 : StoreParamVecInst<V2I8Regs, ".v2.b8", |
| 1219 | StoreParamScalar2I8>; |
| 1220 | |
| 1221 | def StoreParamV4F32 : StoreParamVecInst<V4F32Regs, ".v4.f32", |
| 1222 | StoreParamScalar4F32>; |
| 1223 | def StoreParamV2F32 : StoreParamVecInst<V2F32Regs, ".v2.f32", |
| 1224 | StoreParamScalar2F32>; |
| 1225 | def StoreParamV2F64 : StoreParamVecInst<V2F64Regs, ".v2.f64", |
| 1226 | StoreParamScalar2F64>; |
| 1227 | |
| 1228 | def StoreRetvalV4I32 : StoreRetvalVecInst<V4I32Regs, ".v4.b32", |
| 1229 | StoreRetvalScalar4I32>; |
| 1230 | def StoreRetvalV4I16 : StoreRetvalVecInst<V4I16Regs, ".v4.b16", |
| 1231 | StoreRetvalScalar4I16>; |
| 1232 | def StoreRetvalV4I8 : StoreRetvalVecInst<V4I8Regs, ".v4.b8", |
| 1233 | StoreRetvalScalar4I8>; |
| 1234 | |
| 1235 | def StoreRetvalV2I64 : StoreRetvalVecInst<V2I64Regs, ".v2.b64", |
| 1236 | StoreRetvalScalar2I64>; |
| 1237 | def StoreRetvalV2I32 : StoreRetvalVecInst<V2I32Regs, ".v2.b32", |
| 1238 | StoreRetvalScalar2I32>; |
| 1239 | def StoreRetvalV2I16 : StoreRetvalVecInst<V2I16Regs, ".v2.b16", |
| 1240 | StoreRetvalScalar2I16>; |
| 1241 | def StoreRetvalV2I8 : StoreRetvalVecInst<V2I8Regs, ".v2.b8", |
| 1242 | StoreRetvalScalar2I8>; |
| 1243 | |
| 1244 | def StoreRetvalV4F32 : StoreRetvalVecInst<V4F32Regs, ".v4.f32", |
| 1245 | StoreRetvalScalar4F32>; |
| 1246 | def StoreRetvalV2F32 : StoreRetvalVecInst<V2F32Regs, ".v2.f32", |
| 1247 | StoreRetvalScalar2F32>; |
| 1248 | def StoreRetvalV2F64 : StoreRetvalVecInst<V2F64Regs, ".v2.f64", |
| 1249 | StoreRetvalScalar2F64>; |
| 1250 | |
| 1251 | } |
| 1252 | |
| 1253 | |
| 1254 | // Int vector to int scalar bit convert |
| 1255 | // v4i8 -> i32 |
| 1256 | def : Pat<(i32 (bitconvert V4I8Regs:$s)), |
| 1257 | (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), |
| 1258 | (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; |
| 1259 | // v4i16 -> i64 |
| 1260 | def : Pat<(i64 (bitconvert V4I16Regs:$s)), |
| 1261 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), |
| 1262 | (V4i16Extract V4I16Regs:$s,1), |
| 1263 | (V4i16Extract V4I16Regs:$s,2), |
| 1264 | (V4i16Extract V4I16Regs:$s,3))>; |
| 1265 | // v2i8 -> i16 |
| 1266 | def : Pat<(i16 (bitconvert V2I8Regs:$s)), |
| 1267 | (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; |
| 1268 | // v2i16 -> i32 |
| 1269 | def : Pat<(i32 (bitconvert V2I16Regs:$s)), |
| 1270 | (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), |
| 1271 | (V2i16Extract V2I16Regs:$s,1))>; |
| 1272 | // v2i32 -> i64 |
| 1273 | def : Pat<(i64 (bitconvert V2I32Regs:$s)), |
| 1274 | (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), |
| 1275 | (V2i32Extract V2I32Regs:$s,1))>; |
| 1276 | |
| 1277 | // Int scalar to int vector bit convert |
| 1278 | let VecInstType=isVecDest.Value in { |
| 1279 | // i32 -> v4i8 |
| 1280 | def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), |
| 1281 | "Error!", |
| 1282 | [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], |
| 1283 | I32toV4I8>; |
| 1284 | // i64 -> v4i16 |
| 1285 | def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), |
| 1286 | "Error!", |
| 1287 | [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], |
| 1288 | I64toV4I16>; |
| 1289 | // i16 -> v2i8 |
| 1290 | def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), |
| 1291 | "Error!", |
| 1292 | [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], |
| 1293 | I16toV2I8>; |
| 1294 | // i32 -> v2i16 |
| 1295 | def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), |
| 1296 | "Error!", |
| 1297 | [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], |
| 1298 | I32toV2I16>; |
| 1299 | // i64 -> v2i32 |
| 1300 | def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), |
| 1301 | "Error!", |
| 1302 | [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], |
| 1303 | I64toV2I32>; |
| 1304 | } |
| 1305 | |
| 1306 | // Int vector to int vector bit convert |
| 1307 | // v4i8 -> v2i16 |
| 1308 | def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), |
| 1309 | (VecI32toV2I16 |
| 1310 | (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), |
| 1311 | (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; |
| 1312 | // v4i16 -> v2i32 |
| 1313 | def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), |
| 1314 | (VecI64toV2I32 |
| 1315 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), |
| 1316 | (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; |
| 1317 | // v2i16 -> v4i8 |
| 1318 | def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), |
| 1319 | (VecI32toV4I8 |
| 1320 | (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; |
| 1321 | // v2i32 -> v4i16 |
| 1322 | def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), |
| 1323 | (VecI64toV4I16 |
| 1324 | (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; |
| 1325 | // v2i64 -> v4i32 |
| 1326 | def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), |
| 1327 | (Build_Vector4_i32 |
| 1328 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), |
| 1329 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), |
| 1330 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), |
| 1331 | (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; |
| 1332 | // v4i32 -> v2i64 |
| 1333 | def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), |
| 1334 | (Build_Vector2_i64 |
| 1335 | (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), |
| 1336 | (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; |
| 1337 | |
| 1338 | // Fp scalar to fp vector convert |
| 1339 | // f64 -> v2f32 |
| 1340 | let VecInstType=isVecDest.Value in { |
| 1341 | def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), |
| 1342 | "Error!", |
| 1343 | [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], |
| 1344 | F64toV2F32>; |
| 1345 | } |
| 1346 | |
| 1347 | // Fp vector to fp scalar convert |
| 1348 | // v2f32 -> f64 |
| 1349 | def : Pat<(f64 (bitconvert V2F32Regs:$s)), |
| 1350 | (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; |
| 1351 | |
| 1352 | // Fp scalar to int vector convert |
| 1353 | // f32 -> v4i8 |
| 1354 | def : Pat<(v4i8 (bitconvert Float32Regs:$s)), |
| 1355 | (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; |
| 1356 | // f32 -> v2i16 |
| 1357 | def : Pat<(v2i16 (bitconvert Float32Regs:$s)), |
| 1358 | (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; |
| 1359 | // f64 -> v4i16 |
| 1360 | def : Pat<(v4i16 (bitconvert Float64Regs:$s)), |
| 1361 | (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; |
| 1362 | // f64 -> v2i32 |
| 1363 | def : Pat<(v2i32 (bitconvert Float64Regs:$s)), |
| 1364 | (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; |
| 1365 | |
| 1366 | // Int vector to fp scalar convert |
| 1367 | // v4i8 -> f32 |
| 1368 | def : Pat<(f32 (bitconvert V4I8Regs:$s)), |
| 1369 | (BITCONVERT_32_I2F |
| 1370 | (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), |
| 1371 | (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; |
| 1372 | // v4i16 -> f64 |
| 1373 | def : Pat<(f64 (bitconvert V4I16Regs:$s)), |
| 1374 | (BITCONVERT_64_I2F |
| 1375 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), |
| 1376 | (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; |
| 1377 | // v2i16 -> f32 |
| 1378 | def : Pat<(f32 (bitconvert V2I16Regs:$s)), |
| 1379 | (BITCONVERT_32_I2F |
| 1380 | (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; |
| 1381 | // v2i32 -> f64 |
| 1382 | def : Pat<(f64 (bitconvert V2I32Regs:$s)), |
| 1383 | (BITCONVERT_64_I2F |
| 1384 | (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; |
| 1385 | |
| 1386 | // Int scalar to fp vector convert |
| 1387 | // i64 -> v2f32 |
| 1388 | def : Pat<(v2f32 (bitconvert Int64Regs:$s)), |
| 1389 | (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; |
| 1390 | |
| 1391 | // Fp vector to int scalar convert |
| 1392 | // v2f32 -> i64 |
| 1393 | def : Pat<(i64 (bitconvert V2F32Regs:$s)), |
| 1394 | (BITCONVERT_64_F2I |
| 1395 | (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; |
| 1396 | |
| 1397 | // Int vector to fp vector convert |
| 1398 | // v2i64 -> v4f32 |
| 1399 | def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), |
| 1400 | (Build_Vector4_f32 |
| 1401 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1402 | (V2i64Extract V2I64Regs:$s, 0)), 0)), |
| 1403 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1404 | (V2i64Extract V2I64Regs:$s, 0)), 1)), |
| 1405 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1406 | (V2i64Extract V2I64Regs:$s, 1)), 0)), |
| 1407 | (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 |
| 1408 | (V2i64Extract V2I64Regs:$s, 1)), 1)))>; |
| 1409 | // v2i64 -> v2f64 |
| 1410 | def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), |
| 1411 | (Build_Vector2_f64 |
| 1412 | (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), |
| 1413 | (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; |
| 1414 | // v2i32 -> v2f32 |
| 1415 | def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), |
| 1416 | (Build_Vector2_f32 |
| 1417 | (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), |
| 1418 | (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; |
| 1419 | // v4i32 -> v2f64 |
| 1420 | def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), |
| 1421 | (Build_Vector2_f64 |
| 1422 | (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), |
| 1423 | (V4i32Extract V4I32Regs:$s,1))), |
| 1424 | (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), |
| 1425 | (V4i32Extract V4I32Regs:$s,3))))>; |
| 1426 | // v4i32 -> v4f32 |
| 1427 | def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), |
| 1428 | (Build_Vector4_f32 |
| 1429 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), |
| 1430 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), |
| 1431 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), |
| 1432 | (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; |
| 1433 | // v4i16 -> v2f32 |
| 1434 | def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), |
| 1435 | (VecF64toV2F32 (BITCONVERT_64_I2F |
| 1436 | (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), |
| 1437 | (V4i16Extract V4I16Regs:$s,1), |
| 1438 | (V4i16Extract V4I16Regs:$s,2), |
| 1439 | (V4i16Extract V4I16Regs:$s,3))))>; |
| 1440 | |
| 1441 | // Fp vector to int vector convert |
| 1442 | // v2i64 <- v4f32 |
| 1443 | def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), |
| 1444 | (Build_Vector2_i64 |
| 1445 | (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), |
| 1446 | (V4f32Extract V4F32Regs:$s,1))), |
| 1447 | (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), |
| 1448 | (V4f32Extract V4F32Regs:$s,3))))>; |
| 1449 | // v2i64 <- v2f64 |
| 1450 | def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), |
| 1451 | (Build_Vector2_i64 |
| 1452 | (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), |
| 1453 | (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; |
| 1454 | // v2i32 <- v2f32 |
| 1455 | def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), |
| 1456 | (Build_Vector2_i32 |
| 1457 | (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), |
| 1458 | (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; |
| 1459 | // v4i32 <- v2f64 |
| 1460 | def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), |
| 1461 | (Build_Vector4_i32 |
| 1462 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1463 | (V2f64Extract V2F64Regs:$s, 0)), 0)), |
| 1464 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1465 | (V2f64Extract V2F64Regs:$s, 0)), 1)), |
| 1466 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1467 | (V2f64Extract V2F64Regs:$s, 1)), 0)), |
| 1468 | (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 |
| 1469 | (V2f64Extract V2F64Regs:$s, 1)), 1)))>; |
| 1470 | // v4i32 <- v4f32 |
| 1471 | def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), |
| 1472 | (Build_Vector4_i32 |
| 1473 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), |
| 1474 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), |
| 1475 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), |
| 1476 | (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; |
| 1477 | // v4i16 <- v2f32 |
| 1478 | def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), |
| 1479 | (VecI64toV4I16 (BITCONVERT_64_F2I |
| 1480 | (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), |
| 1481 | (V2f32Extract V2F32Regs:$s,1))))>; |