| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1 | //===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // R600 Tablegen instruction definitions | 
|  | 11 | // | 
|  | 12 | //===----------------------------------------------------------------------===// | 
|  | 13 |  | 
|  | 14 | include "R600Intrinsics.td" | 
|  | 15 |  | 
|  | 16 | class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, | 
|  | 17 | InstrItinClass itin> | 
|  | 18 | : AMDGPUInst <outs, ins, asm, pattern> { | 
|  | 19 |  | 
|  | 20 | field bits<64> Inst; | 
|  | 21 | bit Trig = 0; | 
|  | 22 | bit Op3 = 0; | 
|  | 23 | bit isVector = 0; | 
|  | 24 | bits<2> FlagOperandIdx = 0; | 
|  | 25 | bit Op1 = 0; | 
|  | 26 | bit Op2 = 0; | 
|  | 27 | bit HasNativeOperands = 0; | 
|  | 28 |  | 
|  | 29 | bits<11> op_code = inst; | 
|  | 30 | //let Inst = inst; | 
|  | 31 | let Namespace = "AMDGPU"; | 
|  | 32 | let OutOperandList = outs; | 
|  | 33 | let InOperandList = ins; | 
|  | 34 | let AsmString = asm; | 
|  | 35 | let Pattern = pattern; | 
|  | 36 | let Itinerary = itin; | 
|  | 37 |  | 
|  | 38 | let TSFlags{4} = Trig; | 
|  | 39 | let TSFlags{5} = Op3; | 
|  | 40 |  | 
|  | 41 | // Vector instructions are instructions that must fill all slots in an | 
|  | 42 | // instruction group | 
|  | 43 | let TSFlags{6} = isVector; | 
|  | 44 | let TSFlags{8-7} = FlagOperandIdx; | 
|  | 45 | let TSFlags{9} = HasNativeOperands; | 
|  | 46 | let TSFlags{10} = Op1; | 
|  | 47 | let TSFlags{11} = Op2; | 
|  | 48 | } | 
|  | 49 |  | 
|  | 50 | class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : | 
|  | 51 | AMDGPUInst <outs, ins, asm, pattern> { | 
|  | 52 | field bits<64> Inst; | 
|  | 53 |  | 
|  | 54 | let Namespace = "AMDGPU"; | 
|  | 55 | } | 
|  | 56 |  | 
|  | 57 | def MEMxi : Operand<iPTR> { | 
|  | 58 | let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); | 
|  | 59 | let PrintMethod = "printMemOperand"; | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | def MEMrr : Operand<iPTR> { | 
|  | 63 | let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); | 
|  | 64 | } | 
|  | 65 |  | 
|  | 66 | // Operands for non-registers | 
|  | 67 |  | 
|  | 68 | class InstFlag<string PM = "printOperand", int Default = 0> | 
|  | 69 | : OperandWithDefaultOps <i32, (ops (i32 Default))> { | 
|  | 70 | let PrintMethod = PM; | 
|  | 71 | } | 
|  | 72 |  | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 73 | // src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers | 
|  | 74 | def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { | 
|  | 75 | let PrintMethod = "printSel"; | 
|  | 76 | } | 
|  | 77 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 78 | def LITERAL : InstFlag<"printLiteral">; | 
|  | 79 |  | 
|  | 80 | def WRITE : InstFlag <"printWrite", 1>; | 
|  | 81 | def OMOD : InstFlag <"printOMOD">; | 
|  | 82 | def REL : InstFlag <"printRel">; | 
|  | 83 | def CLAMP : InstFlag <"printClamp">; | 
|  | 84 | def NEG : InstFlag <"printNeg">; | 
|  | 85 | def ABS : InstFlag <"printAbs">; | 
|  | 86 | def UEM : InstFlag <"printUpdateExecMask">; | 
|  | 87 | def UP : InstFlag <"printUpdatePred">; | 
|  | 88 |  | 
|  | 89 | // XXX: The r600g finalizer in Mesa expects last to be one in most cases. | 
|  | 90 | // Once we start using the packetizer in this backend we should have this | 
|  | 91 | // default to 0. | 
|  | 92 | def LAST : InstFlag<"printLast", 1>; | 
|  | 93 |  | 
|  | 94 | def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; | 
|  | 95 | def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; | 
|  | 96 | def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 97 | def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; | 
|  | 98 | def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 99 |  | 
|  | 100 | class R600ALU_Word0 { | 
|  | 101 | field bits<32> Word0; | 
|  | 102 |  | 
|  | 103 | bits<11> src0; | 
|  | 104 | bits<1>  src0_neg; | 
|  | 105 | bits<1>  src0_rel; | 
|  | 106 | bits<11> src1; | 
|  | 107 | bits<1>  src1_rel; | 
|  | 108 | bits<1>  src1_neg; | 
|  | 109 | bits<3>  index_mode = 0; | 
|  | 110 | bits<2>  pred_sel; | 
|  | 111 | bits<1>  last; | 
|  | 112 |  | 
|  | 113 | bits<9>  src0_sel  = src0{8-0}; | 
|  | 114 | bits<2>  src0_chan = src0{10-9}; | 
|  | 115 | bits<9>  src1_sel  = src1{8-0}; | 
|  | 116 | bits<2>  src1_chan = src1{10-9}; | 
|  | 117 |  | 
|  | 118 | let Word0{8-0}   = src0_sel; | 
|  | 119 | let Word0{9}     = src0_rel; | 
|  | 120 | let Word0{11-10} = src0_chan; | 
|  | 121 | let Word0{12}    = src0_neg; | 
|  | 122 | let Word0{21-13} = src1_sel; | 
|  | 123 | let Word0{22}    = src1_rel; | 
|  | 124 | let Word0{24-23} = src1_chan; | 
|  | 125 | let Word0{25}    = src1_neg; | 
|  | 126 | let Word0{28-26} = index_mode; | 
|  | 127 | let Word0{30-29} = pred_sel; | 
|  | 128 | let Word0{31}    = last; | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | class R600ALU_Word1 { | 
|  | 132 | field bits<32> Word1; | 
|  | 133 |  | 
|  | 134 | bits<11> dst; | 
|  | 135 | bits<3>  bank_swizzle = 0; | 
|  | 136 | bits<1>  dst_rel; | 
|  | 137 | bits<1>  clamp; | 
|  | 138 |  | 
|  | 139 | bits<7>  dst_sel  = dst{6-0}; | 
|  | 140 | bits<2>  dst_chan = dst{10-9}; | 
|  | 141 |  | 
|  | 142 | let Word1{20-18} = bank_swizzle; | 
|  | 143 | let Word1{27-21} = dst_sel; | 
|  | 144 | let Word1{28}    = dst_rel; | 
|  | 145 | let Word1{30-29} = dst_chan; | 
|  | 146 | let Word1{31}    = clamp; | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{ | 
|  | 150 |  | 
|  | 151 | bits<1>  src0_abs; | 
|  | 152 | bits<1>  src1_abs; | 
|  | 153 | bits<1>  update_exec_mask; | 
|  | 154 | bits<1>  update_pred; | 
|  | 155 | bits<1>  write; | 
|  | 156 | bits<2>  omod; | 
|  | 157 |  | 
|  | 158 | let Word1{0}     = src0_abs; | 
|  | 159 | let Word1{1}     = src1_abs; | 
|  | 160 | let Word1{2}     = update_exec_mask; | 
|  | 161 | let Word1{3}     = update_pred; | 
|  | 162 | let Word1{4}     = write; | 
|  | 163 | let Word1{6-5}   = omod; | 
|  | 164 | let Word1{17-7}  = alu_inst; | 
|  | 165 | } | 
|  | 166 |  | 
|  | 167 | class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ | 
|  | 168 |  | 
|  | 169 | bits<11> src2; | 
|  | 170 | bits<1>  src2_rel; | 
|  | 171 | bits<1>  src2_neg; | 
|  | 172 |  | 
|  | 173 | bits<9>  src2_sel = src2{8-0}; | 
|  | 174 | bits<2>  src2_chan = src2{10-9}; | 
|  | 175 |  | 
|  | 176 | let Word1{8-0}   = src2_sel; | 
|  | 177 | let Word1{9}     = src2_rel; | 
|  | 178 | let Word1{11-10} = src2_chan; | 
|  | 179 | let Word1{12}    = src2_neg; | 
|  | 180 | let Word1{17-13} = alu_inst; | 
|  | 181 | } | 
|  | 182 |  | 
| Tom Stellard | ab28e9a | 2013-01-23 02:09:01 +0000 | [diff] [blame] | 183 | class VTX_WORD0 { | 
|  | 184 | field bits<32> Word0; | 
|  | 185 | bits<7> SRC_GPR; | 
|  | 186 | bits<5> VC_INST; | 
|  | 187 | bits<2> FETCH_TYPE; | 
|  | 188 | bits<1> FETCH_WHOLE_QUAD; | 
|  | 189 | bits<8> BUFFER_ID; | 
|  | 190 | bits<1> SRC_REL; | 
|  | 191 | bits<2> SRC_SEL_X; | 
|  | 192 | bits<6> MEGA_FETCH_COUNT; | 
|  | 193 |  | 
|  | 194 | let Word0{4-0}   = VC_INST; | 
|  | 195 | let Word0{6-5}   = FETCH_TYPE; | 
|  | 196 | let Word0{7}     = FETCH_WHOLE_QUAD; | 
|  | 197 | let Word0{15-8}  = BUFFER_ID; | 
|  | 198 | let Word0{22-16} = SRC_GPR; | 
|  | 199 | let Word0{23}    = SRC_REL; | 
|  | 200 | let Word0{25-24} = SRC_SEL_X; | 
|  | 201 | let Word0{31-26} = MEGA_FETCH_COUNT; | 
|  | 202 | } | 
|  | 203 |  | 
|  | 204 | class VTX_WORD1_GPR { | 
|  | 205 | field bits<32> Word1; | 
|  | 206 | bits<7> DST_GPR; | 
|  | 207 | bits<1> DST_REL; | 
|  | 208 | bits<3> DST_SEL_X; | 
|  | 209 | bits<3> DST_SEL_Y; | 
|  | 210 | bits<3> DST_SEL_Z; | 
|  | 211 | bits<3> DST_SEL_W; | 
|  | 212 | bits<1> USE_CONST_FIELDS; | 
|  | 213 | bits<6> DATA_FORMAT; | 
|  | 214 | bits<2> NUM_FORMAT_ALL; | 
|  | 215 | bits<1> FORMAT_COMP_ALL; | 
|  | 216 | bits<1> SRF_MODE_ALL; | 
|  | 217 |  | 
|  | 218 | let Word1{6-0} = DST_GPR; | 
|  | 219 | let Word1{7}    = DST_REL; | 
|  | 220 | let Word1{8}    = 0; // Reserved | 
|  | 221 | let Word1{11-9} = DST_SEL_X; | 
|  | 222 | let Word1{14-12} = DST_SEL_Y; | 
|  | 223 | let Word1{17-15} = DST_SEL_Z; | 
|  | 224 | let Word1{20-18} = DST_SEL_W; | 
|  | 225 | let Word1{21}    = USE_CONST_FIELDS; | 
|  | 226 | let Word1{27-22} = DATA_FORMAT; | 
|  | 227 | let Word1{29-28} = NUM_FORMAT_ALL; | 
|  | 228 | let Word1{30}    = FORMAT_COMP_ALL; | 
|  | 229 | let Word1{31}    = SRF_MODE_ALL; | 
|  | 230 | } | 
|  | 231 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 232 | /* | 
|  | 233 | XXX: R600 subtarget uses a slightly different encoding than the other | 
|  | 234 | subtargets.  We currently handle this in R600MCCodeEmitter, but we may | 
|  | 235 | want to use these instruction classes in the future. | 
|  | 236 |  | 
|  | 237 | class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { | 
|  | 238 |  | 
|  | 239 | bits<1>  fog_merge; | 
|  | 240 | bits<10> alu_inst; | 
|  | 241 |  | 
|  | 242 | let Inst{37}    = fog_merge; | 
|  | 243 | let Inst{39-38} = omod; | 
|  | 244 | let Inst{49-40} = alu_inst; | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { | 
|  | 248 |  | 
|  | 249 | bits<11> alu_inst; | 
|  | 250 |  | 
|  | 251 | let Inst{38-37} = omod; | 
|  | 252 | let Inst{49-39} = alu_inst; | 
|  | 253 | } | 
|  | 254 | */ | 
|  | 255 |  | 
|  | 256 | def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), | 
|  | 257 | (ops PRED_SEL_OFF)>; | 
|  | 258 |  | 
|  | 259 |  | 
|  | 260 | let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { | 
|  | 261 |  | 
|  | 262 | // Class for instructions with only one source register. | 
|  | 263 | // If you add new ins to this instruction, make sure they are listed before | 
|  | 264 | // $literal, because the backend currently assumes that the last operand is | 
|  | 265 | // a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in | 
|  | 266 | // R600Defines.h, R600InstrInfo::buildDefaultInstruction(), | 
|  | 267 | // and R600InstrInfo::getOperandIdx(). | 
|  | 268 | class R600_1OP <bits<11> inst, string opName, list<dag> pattern, | 
|  | 269 | InstrItinClass itin = AnyALU> : | 
|  | 270 | InstR600 <0, | 
|  | 271 | (outs R600_Reg32:$dst), | 
|  | 272 | (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 273 | R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 274 | LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), | 
|  | 275 | !strconcat(opName, | 
|  | 276 | "$clamp $dst$write$dst_rel$omod, " | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 277 | "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 278 | "$literal $pred_sel$last"), | 
|  | 279 | pattern, | 
|  | 280 | itin>, | 
|  | 281 | R600ALU_Word0, | 
|  | 282 | R600ALU_Word1_OP2 <inst> { | 
|  | 283 |  | 
|  | 284 | let src1 = 0; | 
|  | 285 | let src1_rel = 0; | 
|  | 286 | let src1_neg = 0; | 
|  | 287 | let src1_abs = 0; | 
|  | 288 | let update_exec_mask = 0; | 
|  | 289 | let update_pred = 0; | 
|  | 290 | let HasNativeOperands = 1; | 
|  | 291 | let Op1 = 1; | 
|  | 292 | let DisableEncoding = "$literal"; | 
|  | 293 |  | 
|  | 294 | let Inst{31-0}  = Word0; | 
|  | 295 | let Inst{63-32} = Word1; | 
|  | 296 | } | 
|  | 297 |  | 
|  | 298 | class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, | 
|  | 299 | InstrItinClass itin = AnyALU> : | 
|  | 300 | R600_1OP <inst, opName, | 
|  | 301 | [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] | 
|  | 302 | >; | 
|  | 303 |  | 
|  | 304 | // If you add our change the operands for R600_2OP instructions, you must | 
|  | 305 | // also update the R600Op2OperandIndex::ROI enum in R600Defines.h, | 
|  | 306 | // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). | 
|  | 307 | class R600_2OP <bits<11> inst, string opName, list<dag> pattern, | 
|  | 308 | InstrItinClass itin = AnyALU> : | 
|  | 309 | InstR600 <inst, | 
|  | 310 | (outs R600_Reg32:$dst), | 
|  | 311 | (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, | 
|  | 312 | OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 313 | R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, | 
|  | 314 | R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 315 | LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), | 
|  | 316 | !strconcat(opName, | 
|  | 317 | "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 318 | "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " | 
|  | 319 | "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 320 | "$literal $pred_sel$last"), | 
|  | 321 | pattern, | 
|  | 322 | itin>, | 
|  | 323 | R600ALU_Word0, | 
|  | 324 | R600ALU_Word1_OP2 <inst> { | 
|  | 325 |  | 
|  | 326 | let HasNativeOperands = 1; | 
|  | 327 | let Op2 = 1; | 
|  | 328 | let DisableEncoding = "$literal"; | 
|  | 329 |  | 
|  | 330 | let Inst{31-0}  = Word0; | 
|  | 331 | let Inst{63-32} = Word1; | 
|  | 332 | } | 
|  | 333 |  | 
|  | 334 | class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, | 
|  | 335 | InstrItinClass itim = AnyALU> : | 
|  | 336 | R600_2OP <inst, opName, | 
|  | 337 | [(set R600_Reg32:$dst, (node R600_Reg32:$src0, | 
|  | 338 | R600_Reg32:$src1))] | 
|  | 339 | >; | 
|  | 340 |  | 
|  | 341 | // If you add our change the operands for R600_3OP instructions, you must | 
|  | 342 | // also update the R600Op3OperandIndex::ROI enum in R600Defines.h, | 
|  | 343 | // R600InstrInfo::buildDefaultInstruction(), and | 
|  | 344 | // R600InstrInfo::getOperandIdx(). | 
|  | 345 | class R600_3OP <bits<5> inst, string opName, list<dag> pattern, | 
|  | 346 | InstrItinClass itin = AnyALU> : | 
|  | 347 | InstR600 <0, | 
|  | 348 | (outs R600_Reg32:$dst), | 
|  | 349 | (ins REL:$dst_rel, CLAMP:$clamp, | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 350 | R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, | 
|  | 351 | R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, | 
|  | 352 | R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 353 | LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), | 
|  | 354 | !strconcat(opName, "$clamp $dst$dst_rel, " | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 355 | "$src0_neg$src0$src0_sel$src0_rel, " | 
|  | 356 | "$src1_neg$src1$src1_sel$src1_rel, " | 
|  | 357 | "$src2_neg$src2$src2_sel$src2_rel, " | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 358 | "$literal $pred_sel$last"), | 
|  | 359 | pattern, | 
|  | 360 | itin>, | 
|  | 361 | R600ALU_Word0, | 
|  | 362 | R600ALU_Word1_OP3<inst>{ | 
|  | 363 |  | 
|  | 364 | let HasNativeOperands = 1; | 
|  | 365 | let DisableEncoding = "$literal"; | 
|  | 366 | let Op3 = 1; | 
|  | 367 |  | 
|  | 368 | let Inst{31-0}  = Word0; | 
|  | 369 | let Inst{63-32} = Word1; | 
|  | 370 | } | 
|  | 371 |  | 
|  | 372 | class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, | 
|  | 373 | InstrItinClass itin = VecALU> : | 
|  | 374 | InstR600 <inst, | 
|  | 375 | (outs R600_Reg32:$dst), | 
|  | 376 | ins, | 
|  | 377 | asm, | 
|  | 378 | pattern, | 
|  | 379 | itin>; | 
|  | 380 |  | 
|  | 381 | class R600_TEX <bits<11> inst, string opName, list<dag> pattern, | 
|  | 382 | InstrItinClass itin = AnyALU> : | 
|  | 383 | InstR600 <inst, | 
|  | 384 | (outs R600_Reg128:$dst), | 
|  | 385 | (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), | 
|  | 386 | !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"), | 
|  | 387 | pattern, | 
|  | 388 | itin>{ | 
|  | 389 | let Inst {10-0} = inst; | 
|  | 390 | } | 
|  | 391 |  | 
|  | 392 | } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 | 
|  | 393 |  | 
|  | 394 | def TEX_SHADOW : PatLeaf< | 
|  | 395 | (imm), | 
|  | 396 | [{uint32_t TType = (uint32_t)N->getZExtValue(); | 
| Tom Stellard | 4fa7ac2 | 2012-12-21 20:11:59 +0000 | [diff] [blame] | 397 | return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 398 | }] | 
|  | 399 | >; | 
|  | 400 |  | 
| Tom Stellard | c9b9031 | 2013-01-21 15:40:48 +0000 | [diff] [blame] | 401 | def TEX_RECT : PatLeaf< | 
|  | 402 | (imm), | 
|  | 403 | [{uint32_t TType = (uint32_t)N->getZExtValue(); | 
|  | 404 | return TType == 5; | 
|  | 405 | }] | 
|  | 406 | >; | 
|  | 407 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 408 | class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, | 
|  | 409 | dag ins, string asm, list<dag> pattern> : | 
|  | 410 | InstR600ISA <outs, ins, asm, pattern> { | 
|  | 411 | bits<7>  RW_GPR; | 
|  | 412 | bits<7>  INDEX_GPR; | 
|  | 413 |  | 
|  | 414 | bits<2>  RIM; | 
|  | 415 | bits<2>  TYPE; | 
|  | 416 | bits<1>  RW_REL; | 
|  | 417 | bits<2>  ELEM_SIZE; | 
|  | 418 |  | 
|  | 419 | bits<12> ARRAY_SIZE; | 
|  | 420 | bits<4>  COMP_MASK; | 
|  | 421 | bits<4>  BURST_COUNT; | 
|  | 422 | bits<1>  VPM; | 
|  | 423 | bits<1>  eop; | 
|  | 424 | bits<1>  MARK; | 
|  | 425 | bits<1>  BARRIER; | 
|  | 426 |  | 
|  | 427 | // CF_ALLOC_EXPORT_WORD0_RAT | 
|  | 428 | let Inst{3-0}   = rat_id; | 
|  | 429 | let Inst{9-4}   = rat_inst; | 
|  | 430 | let Inst{10}    = 0; // Reserved | 
|  | 431 | let Inst{12-11} = RIM; | 
|  | 432 | let Inst{14-13} = TYPE; | 
|  | 433 | let Inst{21-15} = RW_GPR; | 
|  | 434 | let Inst{22}    = RW_REL; | 
|  | 435 | let Inst{29-23} = INDEX_GPR; | 
|  | 436 | let Inst{31-30} = ELEM_SIZE; | 
|  | 437 |  | 
|  | 438 | // CF_ALLOC_EXPORT_WORD1_BUF | 
|  | 439 | let Inst{43-32} = ARRAY_SIZE; | 
|  | 440 | let Inst{47-44} = COMP_MASK; | 
|  | 441 | let Inst{51-48} = BURST_COUNT; | 
|  | 442 | let Inst{52}    = VPM; | 
|  | 443 | let Inst{53}    = eop; | 
|  | 444 | let Inst{61-54} = cf_inst; | 
|  | 445 | let Inst{62}    = MARK; | 
|  | 446 | let Inst{63}    = BARRIER; | 
|  | 447 | } | 
|  | 448 |  | 
|  | 449 | class LoadParamFrag <PatFrag load_type> : PatFrag < | 
|  | 450 | (ops node:$ptr), (load_type node:$ptr), | 
|  | 451 | [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }] | 
|  | 452 | >; | 
|  | 453 |  | 
|  | 454 | def load_param : LoadParamFrag<load>; | 
|  | 455 | def load_param_zexti8 : LoadParamFrag<zextloadi8>; | 
|  | 456 | def load_param_zexti16 : LoadParamFrag<zextloadi16>; | 
|  | 457 |  | 
|  | 458 | def isR600 : Predicate<"Subtarget.device()" | 
|  | 459 | "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; | 
|  | 460 | def isR700 : Predicate<"Subtarget.device()" | 
|  | 461 | "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" | 
|  | 462 | "Subtarget.device()->getDeviceFlag()" | 
|  | 463 | ">= OCL_DEVICE_RV710">; | 
|  | 464 | def isEG : Predicate< | 
|  | 465 | "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " | 
|  | 466 | "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " | 
|  | 467 | "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; | 
|  | 468 |  | 
|  | 469 | def isCayman : Predicate<"Subtarget.device()" | 
|  | 470 | "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; | 
|  | 471 | def isEGorCayman : Predicate<"Subtarget.device()" | 
|  | 472 | "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" | 
|  | 473 | "|| Subtarget.device()->getGeneration() ==" | 
|  | 474 | "AMDGPUDeviceInfo::HD6XXX">; | 
|  | 475 |  | 
|  | 476 | def isR600toCayman : Predicate< | 
|  | 477 | "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; | 
|  | 478 |  | 
|  | 479 | //===----------------------------------------------------------------------===// | 
| Tom Stellard | ff62c35 | 2013-01-23 02:09:03 +0000 | [diff] [blame] | 480 | // R600 SDNodes | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 481 | //===----------------------------------------------------------------------===// | 
|  | 482 |  | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 483 | def INTERP_PAIR_XY :  AMDGPUShaderInst < | 
|  | 484 | (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), | 
|  | 485 | (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), | 
|  | 486 | "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", | 
|  | 487 | []>; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 488 |  | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 489 | def INTERP_PAIR_ZW :  AMDGPUShaderInst < | 
|  | 490 | (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), | 
|  | 491 | (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), | 
|  | 492 | "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", | 
|  | 493 | []>; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 494 |  | 
| Tom Stellard | ff62c35 | 2013-01-23 02:09:03 +0000 | [diff] [blame] | 495 | def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 496 | SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, | 
| Tom Stellard | ff62c35 | 2013-01-23 02:09:03 +0000 | [diff] [blame] | 497 | [SDNPMayLoad] | 
|  | 498 | >; | 
|  | 499 |  | 
|  | 500 | //===----------------------------------------------------------------------===// | 
|  | 501 | // Interpolation Instructions | 
|  | 502 | //===----------------------------------------------------------------------===// | 
|  | 503 |  | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 504 | def INTERP_VEC_LOAD :  AMDGPUShaderInst < | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 505 | (outs R600_Reg128:$dst), | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 506 | (ins i32imm:$src0), | 
|  | 507 | "INTERP_LOAD $src0 : $dst", | 
|  | 508 | []>; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 509 |  | 
|  | 510 | def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { | 
|  | 511 | let bank_swizzle = 5; | 
|  | 512 | } | 
|  | 513 |  | 
|  | 514 | def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { | 
|  | 515 | let bank_swizzle = 5; | 
|  | 516 | } | 
|  | 517 |  | 
|  | 518 | def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; | 
|  | 519 |  | 
|  | 520 | //===----------------------------------------------------------------------===// | 
|  | 521 | // Export Instructions | 
|  | 522 | //===----------------------------------------------------------------------===// | 
|  | 523 |  | 
|  | 524 | def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; | 
|  | 525 |  | 
|  | 526 | def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, | 
|  | 527 | [SDNPHasChain, SDNPSideEffect]>; | 
|  | 528 |  | 
|  | 529 | class ExportWord0 { | 
|  | 530 | field bits<32> Word0; | 
|  | 531 |  | 
|  | 532 | bits<13> arraybase; | 
|  | 533 | bits<2> type; | 
|  | 534 | bits<7> gpr; | 
|  | 535 | bits<2> elem_size; | 
|  | 536 |  | 
|  | 537 | let Word0{12-0} = arraybase; | 
|  | 538 | let Word0{14-13} = type; | 
|  | 539 | let Word0{21-15} = gpr; | 
|  | 540 | let Word0{22} = 0; // RW_REL | 
|  | 541 | let Word0{29-23} = 0; // INDEX_GPR | 
|  | 542 | let Word0{31-30} = elem_size; | 
|  | 543 | } | 
|  | 544 |  | 
|  | 545 | class ExportSwzWord1 { | 
|  | 546 | field bits<32> Word1; | 
|  | 547 |  | 
|  | 548 | bits<3> sw_x; | 
|  | 549 | bits<3> sw_y; | 
|  | 550 | bits<3> sw_z; | 
|  | 551 | bits<3> sw_w; | 
|  | 552 | bits<1> eop; | 
|  | 553 | bits<8> inst; | 
|  | 554 |  | 
|  | 555 | let Word1{2-0} = sw_x; | 
|  | 556 | let Word1{5-3} = sw_y; | 
|  | 557 | let Word1{8-6} = sw_z; | 
|  | 558 | let Word1{11-9} = sw_w; | 
|  | 559 | } | 
|  | 560 |  | 
|  | 561 | class ExportBufWord1 { | 
|  | 562 | field bits<32> Word1; | 
|  | 563 |  | 
|  | 564 | bits<12> arraySize; | 
|  | 565 | bits<4> compMask; | 
|  | 566 | bits<1> eop; | 
|  | 567 | bits<8> inst; | 
|  | 568 |  | 
|  | 569 | let Word1{11-0} = arraySize; | 
|  | 570 | let Word1{15-12} = compMask; | 
|  | 571 | } | 
|  | 572 |  | 
|  | 573 | multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { | 
|  | 574 | def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), | 
|  | 575 | (ExportInst | 
|  | 576 | (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), | 
|  | 577 | 0, 61, 0, 7, 7, 7, cf_inst, 0) | 
|  | 578 | >; | 
|  | 579 |  | 
|  | 580 | def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), | 
|  | 581 | (ExportInst | 
|  | 582 | (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), | 
|  | 583 | 0, 61, 7, 0, 7, 7, cf_inst, 0) | 
|  | 584 | >; | 
|  | 585 |  | 
| Tom Stellard | af1bce7 | 2013-01-31 22:11:46 +0000 | [diff] [blame] | 586 | def : Pat<(int_R600_store_dummy (i32 imm:$type)), | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 587 | (ExportInst | 
| Tom Stellard | af1bce7 | 2013-01-31 22:11:46 +0000 | [diff] [blame] | 588 | (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) | 
|  | 589 | >; | 
|  | 590 |  | 
|  | 591 | def : Pat<(int_R600_store_dummy 1), | 
|  | 592 | (ExportInst | 
|  | 593 | (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 594 | >; | 
|  | 595 |  | 
|  | 596 | def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), | 
|  | 597 | (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), | 
|  | 598 | (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, | 
|  | 599 | 0, 1, 2, 3, cf_inst, 0) | 
|  | 600 | >; | 
| Tom Stellard | 6f1b865 | 2013-01-23 21:39:49 +0000 | [diff] [blame] | 601 | def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), | 
|  | 602 | (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), | 
|  | 603 | (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, | 
|  | 604 | 0, 1, 2, 3, cf_inst, 0) | 
|  | 605 | >; | 
|  | 606 |  | 
|  | 607 | def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase, | 
|  | 608 | imm:$type), | 
|  | 609 | (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, | 
|  | 610 | 0, 1, 2, 3, cf_inst, 0) | 
|  | 611 | >; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 612 | } | 
|  | 613 |  | 
|  | 614 | multiclass SteamOutputExportPattern<Instruction ExportInst, | 
|  | 615 | bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { | 
|  | 616 | // Stream0 | 
| Tom Stellard | d8ac91d | 2013-01-23 21:39:47 +0000 | [diff] [blame] | 617 | def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), | 
|  | 618 | (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), | 
|  | 619 | (ExportInst R600_Reg128:$src, 0, imm:$arraybase, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 620 | 4095, imm:$mask, buf0inst, 0)>; | 
|  | 621 | // Stream1 | 
| Tom Stellard | d8ac91d | 2013-01-23 21:39:47 +0000 | [diff] [blame] | 622 | def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), | 
|  | 623 | (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), | 
|  | 624 | (ExportInst R600_Reg128:$src, 0, imm:$arraybase, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 625 | 4095, imm:$mask, buf1inst, 0)>; | 
|  | 626 | // Stream2 | 
| Tom Stellard | d8ac91d | 2013-01-23 21:39:47 +0000 | [diff] [blame] | 627 | def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), | 
|  | 628 | (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), | 
|  | 629 | (ExportInst R600_Reg128:$src, 0, imm:$arraybase, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 630 | 4095, imm:$mask, buf2inst, 0)>; | 
|  | 631 | // Stream3 | 
| Tom Stellard | d8ac91d | 2013-01-23 21:39:47 +0000 | [diff] [blame] | 632 | def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), | 
|  | 633 | (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), | 
|  | 634 | (ExportInst R600_Reg128:$src, 0, imm:$arraybase, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 635 | 4095, imm:$mask, buf3inst, 0)>; | 
|  | 636 | } | 
|  | 637 |  | 
|  | 638 | let isTerminator = 1, usesCustomInserter = 1 in { | 
|  | 639 |  | 
|  | 640 | class ExportSwzInst : InstR600ISA<( | 
|  | 641 | outs), | 
|  | 642 | (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, | 
|  | 643 | i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, | 
|  | 644 | i32imm:$eop), | 
|  | 645 | !strconcat("EXPORT", " $gpr"), | 
|  | 646 | []>, ExportWord0, ExportSwzWord1 { | 
|  | 647 | let elem_size = 3; | 
|  | 648 | let Inst{31-0} = Word0; | 
|  | 649 | let Inst{63-32} = Word1; | 
|  | 650 | } | 
|  | 651 |  | 
|  | 652 | } // End isTerminator = 1, usesCustomInserter = 1 | 
|  | 653 |  | 
|  | 654 | class ExportBufInst : InstR600ISA<( | 
|  | 655 | outs), | 
|  | 656 | (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, | 
|  | 657 | i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), | 
|  | 658 | !strconcat("EXPORT", " $gpr"), | 
|  | 659 | []>, ExportWord0, ExportBufWord1 { | 
|  | 660 | let elem_size = 0; | 
|  | 661 | let Inst{31-0} = Word0; | 
|  | 662 | let Inst{63-32} = Word1; | 
|  | 663 | } | 
|  | 664 |  | 
|  | 665 | let Predicates = [isR600toCayman] in { | 
|  | 666 |  | 
|  | 667 | //===----------------------------------------------------------------------===// | 
|  | 668 | // Common Instructions R600, R700, Evergreen, Cayman | 
|  | 669 | //===----------------------------------------------------------------------===// | 
|  | 670 |  | 
|  | 671 | def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; | 
|  | 672 | // Non-IEEE MUL: 0 * anything = 0 | 
|  | 673 | def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; | 
|  | 674 | def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; | 
|  | 675 | def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; | 
|  | 676 | def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; | 
|  | 677 |  | 
|  | 678 | // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, | 
|  | 679 | // so some of the instruction names don't match the asm string. | 
|  | 680 | // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. | 
|  | 681 | def SETE : R600_2OP < | 
|  | 682 | 0x08, "SETE", | 
|  | 683 | [(set R600_Reg32:$dst, | 
|  | 684 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | 
|  | 685 | COND_EQ))] | 
|  | 686 | >; | 
|  | 687 |  | 
|  | 688 | def SGT : R600_2OP < | 
|  | 689 | 0x09, "SETGT", | 
|  | 690 | [(set R600_Reg32:$dst, | 
|  | 691 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | 
|  | 692 | COND_GT))] | 
|  | 693 | >; | 
|  | 694 |  | 
|  | 695 | def SGE : R600_2OP < | 
|  | 696 | 0xA, "SETGE", | 
|  | 697 | [(set R600_Reg32:$dst, | 
|  | 698 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | 
|  | 699 | COND_GE))] | 
|  | 700 | >; | 
|  | 701 |  | 
|  | 702 | def SNE : R600_2OP < | 
|  | 703 | 0xB, "SETNE", | 
|  | 704 | [(set R600_Reg32:$dst, | 
|  | 705 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, | 
|  | 706 | COND_NE))] | 
|  | 707 | >; | 
|  | 708 |  | 
|  | 709 | def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; | 
|  | 710 | def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; | 
|  | 711 | def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; | 
|  | 712 | def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; | 
|  | 713 | def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; | 
|  | 714 |  | 
|  | 715 | def MOV : R600_1OP <0x19, "MOV", []>; | 
|  | 716 |  | 
|  | 717 | let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { | 
|  | 718 |  | 
|  | 719 | class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < | 
|  | 720 | (outs R600_Reg32:$dst), | 
|  | 721 | (ins immType:$imm), | 
|  | 722 | "", | 
|  | 723 | [] | 
|  | 724 | >; | 
|  | 725 |  | 
|  | 726 | } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 | 
|  | 727 |  | 
|  | 728 | def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; | 
|  | 729 | def : Pat < | 
|  | 730 | (imm:$val), | 
|  | 731 | (MOV_IMM_I32 imm:$val) | 
|  | 732 | >; | 
|  | 733 |  | 
|  | 734 | def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; | 
|  | 735 | def : Pat < | 
|  | 736 | (fpimm:$val), | 
|  | 737 | (MOV_IMM_F32  fpimm:$val) | 
|  | 738 | >; | 
|  | 739 |  | 
|  | 740 | def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; | 
|  | 741 | def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; | 
|  | 742 | def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; | 
|  | 743 | def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; | 
|  | 744 |  | 
|  | 745 | let hasSideEffects = 1 in { | 
|  | 746 |  | 
|  | 747 | def KILLGT : R600_2OP <0x2D, "KILLGT", []>; | 
|  | 748 |  | 
|  | 749 | } // end hasSideEffects | 
|  | 750 |  | 
|  | 751 | def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; | 
|  | 752 | def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; | 
|  | 753 | def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; | 
|  | 754 | def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; | 
|  | 755 | def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; | 
|  | 756 | def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; | 
|  | 757 | def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; | 
|  | 758 | def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; | 
| Tom Stellard | 4139802 | 2012-12-21 20:12:01 +0000 | [diff] [blame] | 759 | def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 760 | def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; | 
|  | 761 |  | 
|  | 762 | def SETE_INT : R600_2OP < | 
|  | 763 | 0x3A, "SETE_INT", | 
|  | 764 | [(set (i32 R600_Reg32:$dst), | 
|  | 765 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] | 
|  | 766 | >; | 
|  | 767 |  | 
|  | 768 | def SETGT_INT : R600_2OP < | 
|  | 769 | 0x3B, "SGT_INT", | 
|  | 770 | [(set (i32 R600_Reg32:$dst), | 
|  | 771 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] | 
|  | 772 | >; | 
|  | 773 |  | 
|  | 774 | def SETGE_INT : R600_2OP < | 
|  | 775 | 0x3C, "SETGE_INT", | 
|  | 776 | [(set (i32 R600_Reg32:$dst), | 
|  | 777 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] | 
|  | 778 | >; | 
|  | 779 |  | 
|  | 780 | def SETNE_INT : R600_2OP < | 
|  | 781 | 0x3D, "SETNE_INT", | 
|  | 782 | [(set (i32 R600_Reg32:$dst), | 
|  | 783 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] | 
|  | 784 | >; | 
|  | 785 |  | 
|  | 786 | def SETGT_UINT : R600_2OP < | 
|  | 787 | 0x3E, "SETGT_UINT", | 
|  | 788 | [(set (i32 R600_Reg32:$dst), | 
|  | 789 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] | 
|  | 790 | >; | 
|  | 791 |  | 
|  | 792 | def SETGE_UINT : R600_2OP < | 
|  | 793 | 0x3F, "SETGE_UINT", | 
|  | 794 | [(set (i32 R600_Reg32:$dst), | 
|  | 795 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] | 
|  | 796 | >; | 
|  | 797 |  | 
|  | 798 | def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; | 
|  | 799 | def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; | 
|  | 800 | def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; | 
|  | 801 | def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; | 
|  | 802 |  | 
|  | 803 | def CNDE_INT : R600_3OP < | 
|  | 804 | 0x1C, "CNDE_INT", | 
|  | 805 | [(set (i32 R600_Reg32:$dst), | 
|  | 806 | (selectcc (i32 R600_Reg32:$src0), 0, | 
|  | 807 | (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), | 
|  | 808 | COND_EQ))] | 
|  | 809 | >; | 
|  | 810 |  | 
|  | 811 | def CNDGE_INT : R600_3OP < | 
|  | 812 | 0x1E, "CNDGE_INT", | 
|  | 813 | [(set (i32 R600_Reg32:$dst), | 
|  | 814 | (selectcc (i32 R600_Reg32:$src0), 0, | 
|  | 815 | (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), | 
|  | 816 | COND_GE))] | 
|  | 817 | >; | 
|  | 818 |  | 
|  | 819 | def CNDGT_INT : R600_3OP < | 
|  | 820 | 0x1D, "CNDGT_INT", | 
|  | 821 | [(set (i32 R600_Reg32:$dst), | 
|  | 822 | (selectcc (i32 R600_Reg32:$src0), 0, | 
|  | 823 | (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), | 
|  | 824 | COND_GT))] | 
|  | 825 | >; | 
|  | 826 |  | 
|  | 827 | //===----------------------------------------------------------------------===// | 
|  | 828 | // Texture instructions | 
|  | 829 | //===----------------------------------------------------------------------===// | 
|  | 830 |  | 
|  | 831 | def TEX_LD : R600_TEX < | 
|  | 832 | 0x03, "TEX_LD", | 
|  | 833 | [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 834 | > { | 
|  | 835 | let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget"; | 
|  | 836 | let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget); | 
|  | 837 | } | 
|  | 838 |  | 
|  | 839 | def TEX_GET_TEXTURE_RESINFO : R600_TEX < | 
|  | 840 | 0x04, "TEX_GET_TEXTURE_RESINFO", | 
|  | 841 | [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 842 | >; | 
|  | 843 |  | 
|  | 844 | def TEX_GET_GRADIENTS_H : R600_TEX < | 
|  | 845 | 0x07, "TEX_GET_GRADIENTS_H", | 
|  | 846 | [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 847 | >; | 
|  | 848 |  | 
|  | 849 | def TEX_GET_GRADIENTS_V : R600_TEX < | 
|  | 850 | 0x08, "TEX_GET_GRADIENTS_V", | 
|  | 851 | [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 852 | >; | 
|  | 853 |  | 
|  | 854 | def TEX_SET_GRADIENTS_H : R600_TEX < | 
|  | 855 | 0x0B, "TEX_SET_GRADIENTS_H", | 
|  | 856 | [] | 
|  | 857 | >; | 
|  | 858 |  | 
|  | 859 | def TEX_SET_GRADIENTS_V : R600_TEX < | 
|  | 860 | 0x0C, "TEX_SET_GRADIENTS_V", | 
|  | 861 | [] | 
|  | 862 | >; | 
|  | 863 |  | 
|  | 864 | def TEX_SAMPLE : R600_TEX < | 
|  | 865 | 0x10, "TEX_SAMPLE", | 
|  | 866 | [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 867 | >; | 
|  | 868 |  | 
|  | 869 | def TEX_SAMPLE_C : R600_TEX < | 
|  | 870 | 0x18, "TEX_SAMPLE_C", | 
|  | 871 | [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] | 
|  | 872 | >; | 
|  | 873 |  | 
|  | 874 | def TEX_SAMPLE_L : R600_TEX < | 
|  | 875 | 0x11, "TEX_SAMPLE_L", | 
|  | 876 | [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 877 | >; | 
|  | 878 |  | 
|  | 879 | def TEX_SAMPLE_C_L : R600_TEX < | 
|  | 880 | 0x19, "TEX_SAMPLE_C_L", | 
|  | 881 | [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] | 
|  | 882 | >; | 
|  | 883 |  | 
|  | 884 | def TEX_SAMPLE_LB : R600_TEX < | 
|  | 885 | 0x12, "TEX_SAMPLE_LB", | 
|  | 886 | [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 887 | >; | 
|  | 888 |  | 
|  | 889 | def TEX_SAMPLE_C_LB : R600_TEX < | 
|  | 890 | 0x1A, "TEX_SAMPLE_C_LB", | 
|  | 891 | [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] | 
|  | 892 | >; | 
|  | 893 |  | 
|  | 894 | def TEX_SAMPLE_G : R600_TEX < | 
|  | 895 | 0x14, "TEX_SAMPLE_G", | 
|  | 896 | [] | 
|  | 897 | >; | 
|  | 898 |  | 
|  | 899 | def TEX_SAMPLE_C_G : R600_TEX < | 
|  | 900 | 0x1C, "TEX_SAMPLE_C_G", | 
|  | 901 | [] | 
|  | 902 | >; | 
|  | 903 |  | 
|  | 904 | //===----------------------------------------------------------------------===// | 
|  | 905 | // Helper classes for common instructions | 
|  | 906 | //===----------------------------------------------------------------------===// | 
|  | 907 |  | 
|  | 908 | class MUL_LIT_Common <bits<5> inst> : R600_3OP < | 
|  | 909 | inst, "MUL_LIT", | 
|  | 910 | [] | 
|  | 911 | >; | 
|  | 912 |  | 
|  | 913 | class MULADD_Common <bits<5> inst> : R600_3OP < | 
|  | 914 | inst, "MULADD", | 
|  | 915 | [(set (f32 R600_Reg32:$dst), | 
|  | 916 | (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] | 
|  | 917 | >; | 
|  | 918 |  | 
|  | 919 | class CNDE_Common <bits<5> inst> : R600_3OP < | 
|  | 920 | inst, "CNDE", | 
|  | 921 | [(set R600_Reg32:$dst, | 
|  | 922 | (selectcc (f32 R600_Reg32:$src0), FP_ZERO, | 
|  | 923 | (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), | 
|  | 924 | COND_EQ))] | 
|  | 925 | >; | 
|  | 926 |  | 
|  | 927 | class CNDGT_Common <bits<5> inst> : R600_3OP < | 
|  | 928 | inst, "CNDGT", | 
|  | 929 | [(set R600_Reg32:$dst, | 
|  | 930 | (selectcc (f32 R600_Reg32:$src0), FP_ZERO, | 
|  | 931 | (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), | 
|  | 932 | COND_GT))] | 
|  | 933 | >; | 
|  | 934 |  | 
|  | 935 | class CNDGE_Common <bits<5> inst> : R600_3OP < | 
|  | 936 | inst, "CNDGE", | 
|  | 937 | [(set R600_Reg32:$dst, | 
|  | 938 | (selectcc (f32 R600_Reg32:$src0), FP_ZERO, | 
|  | 939 | (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), | 
|  | 940 | COND_GE))] | 
|  | 941 | >; | 
|  | 942 |  | 
|  | 943 | multiclass DOT4_Common <bits<11> inst> { | 
|  | 944 |  | 
|  | 945 | def _pseudo : R600_REDUCTION <inst, | 
|  | 946 | (ins R600_Reg128:$src0, R600_Reg128:$src1), | 
|  | 947 | "DOT4 $dst $src0, $src1", | 
|  | 948 | [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] | 
|  | 949 | >; | 
|  | 950 |  | 
|  | 951 | def _real : R600_2OP <inst, "DOT4", []>; | 
|  | 952 | } | 
|  | 953 |  | 
|  | 954 | let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { | 
|  | 955 | multiclass CUBE_Common <bits<11> inst> { | 
|  | 956 |  | 
|  | 957 | def _pseudo : InstR600 < | 
|  | 958 | inst, | 
|  | 959 | (outs R600_Reg128:$dst), | 
|  | 960 | (ins R600_Reg128:$src), | 
|  | 961 | "CUBE $dst $src", | 
|  | 962 | [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], | 
|  | 963 | VecALU | 
|  | 964 | > { | 
|  | 965 | let isPseudo = 1; | 
|  | 966 | } | 
|  | 967 |  | 
|  | 968 | def _real : R600_2OP <inst, "CUBE", []>; | 
|  | 969 | } | 
|  | 970 | } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 | 
|  | 971 |  | 
|  | 972 | class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 973 | inst, "EXP_IEEE", fexp2 | 
|  | 974 | >; | 
|  | 975 |  | 
|  | 976 | class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 977 | inst, "FLT_TO_INT", fp_to_sint | 
|  | 978 | >; | 
|  | 979 |  | 
|  | 980 | class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 981 | inst, "INT_TO_FLT", sint_to_fp | 
|  | 982 | >; | 
|  | 983 |  | 
|  | 984 | class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 985 | inst, "FLT_TO_UINT", fp_to_uint | 
|  | 986 | >; | 
|  | 987 |  | 
|  | 988 | class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 989 | inst, "UINT_TO_FLT", uint_to_fp | 
|  | 990 | >; | 
|  | 991 |  | 
|  | 992 | class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < | 
|  | 993 | inst, "LOG_CLAMPED", [] | 
|  | 994 | >; | 
|  | 995 |  | 
|  | 996 | class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 997 | inst, "LOG_IEEE", flog2 | 
|  | 998 | >; | 
|  | 999 |  | 
|  | 1000 | class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; | 
|  | 1001 | class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; | 
|  | 1002 | class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; | 
|  | 1003 | class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < | 
|  | 1004 | inst, "MULHI_INT", mulhs | 
|  | 1005 | >; | 
|  | 1006 | class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < | 
|  | 1007 | inst, "MULHI", mulhu | 
|  | 1008 | >; | 
|  | 1009 | class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < | 
|  | 1010 | inst, "MULLO_INT", mul | 
|  | 1011 | >; | 
|  | 1012 | class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>; | 
|  | 1013 |  | 
|  | 1014 | class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < | 
|  | 1015 | inst, "RECIP_CLAMPED", [] | 
|  | 1016 | >; | 
|  | 1017 |  | 
|  | 1018 | class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < | 
|  | 1019 | inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))] | 
|  | 1020 | >; | 
|  | 1021 |  | 
|  | 1022 | class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 1023 | inst, "RECIP_UINT", AMDGPUurecip | 
|  | 1024 | >; | 
|  | 1025 |  | 
|  | 1026 | class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < | 
|  | 1027 | inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq | 
|  | 1028 | >; | 
|  | 1029 |  | 
|  | 1030 | class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < | 
|  | 1031 | inst, "RECIPSQRT_IEEE", [] | 
|  | 1032 | >; | 
|  | 1033 |  | 
|  | 1034 | class SIN_Common <bits<11> inst> : R600_1OP < | 
|  | 1035 | inst, "SIN", []>{ | 
|  | 1036 | let Trig = 1; | 
|  | 1037 | } | 
|  | 1038 |  | 
|  | 1039 | class COS_Common <bits<11> inst> : R600_1OP < | 
|  | 1040 | inst, "COS", []> { | 
|  | 1041 | let Trig = 1; | 
|  | 1042 | } | 
|  | 1043 |  | 
|  | 1044 | //===----------------------------------------------------------------------===// | 
|  | 1045 | // Helper patterns for complex intrinsics | 
|  | 1046 | //===----------------------------------------------------------------------===// | 
|  | 1047 |  | 
|  | 1048 | multiclass DIV_Common <InstR600 recip_ieee> { | 
|  | 1049 | def : Pat< | 
|  | 1050 | (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), | 
|  | 1051 | (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) | 
|  | 1052 | >; | 
|  | 1053 |  | 
|  | 1054 | def : Pat< | 
|  | 1055 | (fdiv R600_Reg32:$src0, R600_Reg32:$src1), | 
|  | 1056 | (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) | 
|  | 1057 | >; | 
|  | 1058 | } | 
|  | 1059 |  | 
|  | 1060 | class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < | 
|  | 1061 | (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), | 
|  | 1062 | (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) | 
|  | 1063 | >; | 
|  | 1064 |  | 
|  | 1065 | //===----------------------------------------------------------------------===// | 
|  | 1066 | // R600 / R700 Instructions | 
|  | 1067 | //===----------------------------------------------------------------------===// | 
|  | 1068 |  | 
|  | 1069 | let Predicates = [isR600] in { | 
|  | 1070 |  | 
|  | 1071 | def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; | 
|  | 1072 | def MULADD_r600 : MULADD_Common<0x10>; | 
|  | 1073 | def CNDE_r600 : CNDE_Common<0x18>; | 
|  | 1074 | def CNDGT_r600 : CNDGT_Common<0x19>; | 
|  | 1075 | def CNDGE_r600 : CNDGE_Common<0x1A>; | 
|  | 1076 | defm DOT4_r600 : DOT4_Common<0x50>; | 
|  | 1077 | defm CUBE_r600 : CUBE_Common<0x52>; | 
|  | 1078 | def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; | 
|  | 1079 | def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; | 
|  | 1080 | def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; | 
|  | 1081 | def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; | 
|  | 1082 | def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; | 
|  | 1083 | def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; | 
|  | 1084 | def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; | 
|  | 1085 | def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; | 
|  | 1086 | def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; | 
|  | 1087 | def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; | 
|  | 1088 | def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; | 
|  | 1089 | def SIN_r600 : SIN_Common<0x6E>; | 
|  | 1090 | def COS_r600 : COS_Common<0x6F>; | 
|  | 1091 | def ASHR_r600 : ASHR_Common<0x70>; | 
|  | 1092 | def LSHR_r600 : LSHR_Common<0x71>; | 
|  | 1093 | def LSHL_r600 : LSHL_Common<0x72>; | 
|  | 1094 | def MULLO_INT_r600 : MULLO_INT_Common<0x73>; | 
|  | 1095 | def MULHI_INT_r600 : MULHI_INT_Common<0x74>; | 
|  | 1096 | def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; | 
|  | 1097 | def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; | 
|  | 1098 | def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; | 
|  | 1099 |  | 
|  | 1100 | defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; | 
|  | 1101 | def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; | 
|  | 1102 |  | 
|  | 1103 | def : Pat<(fsqrt R600_Reg32:$src), | 
|  | 1104 | (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>; | 
|  | 1105 |  | 
|  | 1106 | def R600_ExportSwz : ExportSwzInst { | 
|  | 1107 | let Word1{20-17} = 1; // BURST_COUNT | 
|  | 1108 | let Word1{21} = eop; | 
|  | 1109 | let Word1{22} = 1; // VALID_PIXEL_MODE | 
|  | 1110 | let Word1{30-23} = inst; | 
|  | 1111 | let Word1{31} = 1; // BARRIER | 
|  | 1112 | } | 
|  | 1113 | defm : ExportPattern<R600_ExportSwz, 39>; | 
|  | 1114 |  | 
|  | 1115 | def R600_ExportBuf : ExportBufInst { | 
|  | 1116 | let Word1{20-17} = 1; // BURST_COUNT | 
|  | 1117 | let Word1{21} = eop; | 
|  | 1118 | let Word1{22} = 1; // VALID_PIXEL_MODE | 
|  | 1119 | let Word1{30-23} = inst; | 
|  | 1120 | let Word1{31} = 1; // BARRIER | 
|  | 1121 | } | 
|  | 1122 | defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; | 
|  | 1123 | } | 
|  | 1124 |  | 
|  | 1125 | // Helper pattern for normalizing inputs to triginomic instructions for R700+ | 
|  | 1126 | // cards. | 
|  | 1127 | class COS_PAT <InstR600 trig> : Pat< | 
|  | 1128 | (fcos R600_Reg32:$src), | 
|  | 1129 | (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) | 
|  | 1130 | >; | 
|  | 1131 |  | 
|  | 1132 | class SIN_PAT <InstR600 trig> : Pat< | 
|  | 1133 | (fsin R600_Reg32:$src), | 
|  | 1134 | (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) | 
|  | 1135 | >; | 
|  | 1136 |  | 
|  | 1137 | //===----------------------------------------------------------------------===// | 
|  | 1138 | // R700 Only instructions | 
|  | 1139 | //===----------------------------------------------------------------------===// | 
|  | 1140 |  | 
|  | 1141 | let Predicates = [isR700] in { | 
|  | 1142 | def SIN_r700 : SIN_Common<0x6E>; | 
|  | 1143 | def COS_r700 : COS_Common<0x6F>; | 
|  | 1144 |  | 
|  | 1145 | // R700 normalizes inputs to SIN/COS the same as EG | 
|  | 1146 | def : SIN_PAT <SIN_r700>; | 
|  | 1147 | def : COS_PAT <COS_r700>; | 
|  | 1148 | } | 
|  | 1149 |  | 
|  | 1150 | //===----------------------------------------------------------------------===// | 
|  | 1151 | // Evergreen Only instructions | 
|  | 1152 | //===----------------------------------------------------------------------===// | 
|  | 1153 |  | 
|  | 1154 | let Predicates = [isEG] in { | 
|  | 1155 |  | 
|  | 1156 | def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; | 
|  | 1157 | defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; | 
|  | 1158 |  | 
|  | 1159 | def MULLO_INT_eg : MULLO_INT_Common<0x8F>; | 
|  | 1160 | def MULHI_INT_eg : MULHI_INT_Common<0x90>; | 
|  | 1161 | def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; | 
|  | 1162 | def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; | 
|  | 1163 | def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; | 
|  | 1164 | def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; | 
|  | 1165 | def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; | 
|  | 1166 | def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; | 
|  | 1167 | def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; | 
|  | 1168 | def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; | 
|  | 1169 | def SIN_eg : SIN_Common<0x8D>; | 
|  | 1170 | def COS_eg : COS_Common<0x8E>; | 
|  | 1171 |  | 
|  | 1172 | def : SIN_PAT <SIN_eg>; | 
|  | 1173 | def : COS_PAT <COS_eg>; | 
|  | 1174 | def : Pat<(fsqrt R600_Reg32:$src), | 
|  | 1175 | (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; | 
|  | 1176 | } // End Predicates = [isEG] | 
|  | 1177 |  | 
|  | 1178 | //===----------------------------------------------------------------------===// | 
|  | 1179 | // Evergreen / Cayman Instructions | 
|  | 1180 | //===----------------------------------------------------------------------===// | 
|  | 1181 |  | 
|  | 1182 | let Predicates = [isEGorCayman] in { | 
|  | 1183 |  | 
|  | 1184 | // BFE_UINT - bit_extract, an optimization for mask and shift | 
|  | 1185 | // Src0 = Input | 
|  | 1186 | // Src1 = Offset | 
|  | 1187 | // Src2 = Width | 
|  | 1188 | // | 
|  | 1189 | // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) | 
|  | 1190 | // | 
|  | 1191 | // Example Usage: | 
|  | 1192 | // (Offset, Width) | 
|  | 1193 | // | 
|  | 1194 | // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0 | 
|  | 1195 | // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8 | 
|  | 1196 | // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16 | 
|  | 1197 | // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24 | 
|  | 1198 | def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", | 
|  | 1199 | [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, | 
|  | 1200 | R600_Reg32:$src1, | 
|  | 1201 | R600_Reg32:$src2))], | 
|  | 1202 | VecALU | 
|  | 1203 | >; | 
|  | 1204 |  | 
|  | 1205 | def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", | 
|  | 1206 | [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, | 
|  | 1207 | R600_Reg32:$src2))], | 
|  | 1208 | VecALU | 
|  | 1209 | >; | 
|  | 1210 |  | 
|  | 1211 | def MULADD_eg : MULADD_Common<0x14>; | 
|  | 1212 | def ASHR_eg : ASHR_Common<0x15>; | 
|  | 1213 | def LSHR_eg : LSHR_Common<0x16>; | 
|  | 1214 | def LSHL_eg : LSHL_Common<0x17>; | 
|  | 1215 | def CNDE_eg : CNDE_Common<0x19>; | 
|  | 1216 | def CNDGT_eg : CNDGT_Common<0x1A>; | 
|  | 1217 | def CNDGE_eg : CNDGE_Common<0x1B>; | 
|  | 1218 | def MUL_LIT_eg : MUL_LIT_Common<0x1F>; | 
|  | 1219 | def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; | 
|  | 1220 | defm DOT4_eg : DOT4_Common<0xBE>; | 
|  | 1221 | defm CUBE_eg : CUBE_Common<0xC0>; | 
|  | 1222 |  | 
|  | 1223 | def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; | 
|  | 1224 |  | 
|  | 1225 | def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { | 
|  | 1226 | let Pattern = []; | 
|  | 1227 | } | 
|  | 1228 |  | 
|  | 1229 | def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; | 
|  | 1230 |  | 
|  | 1231 | def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { | 
|  | 1232 | let Pattern = []; | 
|  | 1233 | } | 
|  | 1234 |  | 
|  | 1235 | def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; | 
|  | 1236 |  | 
|  | 1237 | // TRUNC is used for the FLT_TO_INT instructions to work around a | 
|  | 1238 | // perceived problem where the rounding modes are applied differently | 
|  | 1239 | // depending on the instruction and the slot they are in. | 
|  | 1240 | // See: | 
|  | 1241 | // https://bugs.freedesktop.org/show_bug.cgi?id=50232 | 
|  | 1242 | // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c | 
|  | 1243 | // | 
|  | 1244 | // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, | 
|  | 1245 | // which do not need to be truncated since the fp values are 0.0f or 1.0f. | 
|  | 1246 | // We should look into handling these cases separately. | 
|  | 1247 | def : Pat<(fp_to_sint R600_Reg32:$src0), | 
|  | 1248 | (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>; | 
|  | 1249 |  | 
|  | 1250 | def : Pat<(fp_to_uint R600_Reg32:$src0), | 
|  | 1251 | (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; | 
|  | 1252 |  | 
|  | 1253 | def EG_ExportSwz : ExportSwzInst { | 
|  | 1254 | let Word1{19-16} = 1; // BURST_COUNT | 
|  | 1255 | let Word1{20} = 1; // VALID_PIXEL_MODE | 
|  | 1256 | let Word1{21} = eop; | 
|  | 1257 | let Word1{29-22} = inst; | 
|  | 1258 | let Word1{30} = 0; // MARK | 
|  | 1259 | let Word1{31} = 1; // BARRIER | 
|  | 1260 | } | 
|  | 1261 | defm : ExportPattern<EG_ExportSwz, 83>; | 
|  | 1262 |  | 
|  | 1263 | def EG_ExportBuf : ExportBufInst { | 
|  | 1264 | let Word1{19-16} = 1; // BURST_COUNT | 
|  | 1265 | let Word1{20} = 1; // VALID_PIXEL_MODE | 
|  | 1266 | let Word1{21} = eop; | 
|  | 1267 | let Word1{29-22} = inst; | 
|  | 1268 | let Word1{30} = 0; // MARK | 
|  | 1269 | let Word1{31} = 1; // BARRIER | 
|  | 1270 | } | 
|  | 1271 | defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; | 
|  | 1272 |  | 
|  | 1273 | //===----------------------------------------------------------------------===// | 
|  | 1274 | // Memory read/write instructions | 
|  | 1275 | //===----------------------------------------------------------------------===// | 
|  | 1276 | let usesCustomInserter = 1 in { | 
|  | 1277 |  | 
|  | 1278 | class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, | 
|  | 1279 | list<dag> pattern> | 
|  | 1280 | : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, | 
|  | 1281 | !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { | 
|  | 1282 | let RIM         = 0; | 
|  | 1283 | // XXX: Have a separate instruction for non-indexed writes. | 
|  | 1284 | let TYPE        = 1; | 
|  | 1285 | let RW_REL      = 0; | 
|  | 1286 | let ELEM_SIZE   = 0; | 
|  | 1287 |  | 
|  | 1288 | let ARRAY_SIZE  = 0; | 
|  | 1289 | let COMP_MASK   = comp_mask; | 
|  | 1290 | let BURST_COUNT = 0; | 
|  | 1291 | let VPM         = 0; | 
|  | 1292 | let MARK        = 0; | 
|  | 1293 | let BARRIER     = 1; | 
|  | 1294 | } | 
|  | 1295 |  | 
|  | 1296 | } // End usesCustomInserter = 1 | 
|  | 1297 |  | 
|  | 1298 | // 32-bit store | 
|  | 1299 | def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < | 
|  | 1300 | (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), | 
|  | 1301 | 0x1, "RAT_WRITE_CACHELESS_32_eg", | 
|  | 1302 | [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)] | 
|  | 1303 | >; | 
|  | 1304 |  | 
|  | 1305 | //128-bit store | 
|  | 1306 | def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < | 
|  | 1307 | (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), | 
|  | 1308 | 0xf, "RAT_WRITE_CACHELESS_128", | 
|  | 1309 | [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)] | 
|  | 1310 | >; | 
|  | 1311 |  | 
|  | 1312 | class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> | 
| Tom Stellard | ab28e9a | 2013-01-23 02:09:01 +0000 | [diff] [blame] | 1313 | : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, | 
|  | 1314 | VTX_WORD1_GPR, VTX_WORD0 { | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1315 |  | 
|  | 1316 | // Static fields | 
| Tom Stellard | ab28e9a | 2013-01-23 02:09:01 +0000 | [diff] [blame] | 1317 | let VC_INST = 0; | 
|  | 1318 | let FETCH_TYPE = 2; | 
|  | 1319 | let FETCH_WHOLE_QUAD = 0; | 
|  | 1320 | let BUFFER_ID = buffer_id; | 
|  | 1321 | let SRC_REL = 0; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1322 | // XXX: We can infer this field based on the SRC_GPR.  This would allow us | 
|  | 1323 | // to store vertex addresses in any channel, not just X. | 
| Tom Stellard | ab28e9a | 2013-01-23 02:09:01 +0000 | [diff] [blame] | 1324 | let SRC_SEL_X = 0; | 
|  | 1325 | let DST_REL = 0; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1326 | // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, | 
|  | 1327 | // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, | 
|  | 1328 | // however, based on my testing if USE_CONST_FIELDS is set, then all | 
|  | 1329 | // these fields need to be set to 0. | 
| Tom Stellard | ab28e9a | 2013-01-23 02:09:01 +0000 | [diff] [blame] | 1330 | let USE_CONST_FIELDS = 0; | 
|  | 1331 | let NUM_FORMAT_ALL = 1; | 
|  | 1332 | let FORMAT_COMP_ALL = 0; | 
|  | 1333 | let SRF_MODE_ALL = 0; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1334 |  | 
| Tom Stellard | ab28e9a | 2013-01-23 02:09:01 +0000 | [diff] [blame] | 1335 | let Inst{31-0} = Word0; | 
|  | 1336 | let Inst{63-32} = Word1; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1337 | // LLVM can only encode 64-bit instructions, so these fields are manually | 
|  | 1338 | // encoded in R600CodeEmitter | 
|  | 1339 | // | 
|  | 1340 | // bits<16> OFFSET; | 
|  | 1341 | // bits<2>  ENDIAN_SWAP = 0; | 
|  | 1342 | // bits<1>  CONST_BUF_NO_STRIDE = 0; | 
|  | 1343 | // bits<1>  MEGA_FETCH = 0; | 
|  | 1344 | // bits<1>  ALT_CONST = 0; | 
|  | 1345 | // bits<2>  BUFFER_INDEX_MODE = 0; | 
|  | 1346 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1347 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1348 |  | 
|  | 1349 | // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding | 
|  | 1350 | // is done in R600CodeEmitter | 
|  | 1351 | // | 
|  | 1352 | // Inst{79-64} = OFFSET; | 
|  | 1353 | // Inst{81-80} = ENDIAN_SWAP; | 
|  | 1354 | // Inst{82}    = CONST_BUF_NO_STRIDE; | 
|  | 1355 | // Inst{83}    = MEGA_FETCH; | 
|  | 1356 | // Inst{84}    = ALT_CONST; | 
|  | 1357 | // Inst{86-85} = BUFFER_INDEX_MODE; | 
|  | 1358 | // Inst{95-86} = 0; Reserved | 
|  | 1359 |  | 
|  | 1360 | // VTX_WORD3 (Padding) | 
|  | 1361 | // | 
|  | 1362 | // Inst{127-96} = 0; | 
|  | 1363 | } | 
|  | 1364 |  | 
|  | 1365 | class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> | 
|  | 1366 | : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), | 
|  | 1367 | pattern> { | 
|  | 1368 |  | 
|  | 1369 | let MEGA_FETCH_COUNT = 1; | 
|  | 1370 | let DST_SEL_X = 0; | 
|  | 1371 | let DST_SEL_Y = 7;   // Masked | 
|  | 1372 | let DST_SEL_Z = 7;   // Masked | 
|  | 1373 | let DST_SEL_W = 7;   // Masked | 
|  | 1374 | let DATA_FORMAT = 1; // FMT_8 | 
|  | 1375 | } | 
|  | 1376 |  | 
|  | 1377 | class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> | 
|  | 1378 | : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), | 
|  | 1379 | pattern> { | 
|  | 1380 | let MEGA_FETCH_COUNT = 2; | 
|  | 1381 | let DST_SEL_X = 0; | 
|  | 1382 | let DST_SEL_Y = 7;   // Masked | 
|  | 1383 | let DST_SEL_Z = 7;   // Masked | 
|  | 1384 | let DST_SEL_W = 7;   // Masked | 
|  | 1385 | let DATA_FORMAT = 5; // FMT_16 | 
|  | 1386 |  | 
|  | 1387 | } | 
|  | 1388 |  | 
|  | 1389 | class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> | 
|  | 1390 | : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), | 
|  | 1391 | pattern> { | 
|  | 1392 |  | 
|  | 1393 | let MEGA_FETCH_COUNT = 4; | 
|  | 1394 | let DST_SEL_X        = 0; | 
|  | 1395 | let DST_SEL_Y        = 7;   // Masked | 
|  | 1396 | let DST_SEL_Z        = 7;   // Masked | 
|  | 1397 | let DST_SEL_W        = 7;   // Masked | 
|  | 1398 | let DATA_FORMAT      = 0xD; // COLOR_32 | 
|  | 1399 |  | 
|  | 1400 | // This is not really necessary, but there were some GPU hangs that appeared | 
|  | 1401 | // to be caused by ALU instructions in the next instruction group that wrote | 
|  | 1402 | // to the $ptr registers of the VTX_READ. | 
|  | 1403 | // e.g. | 
|  | 1404 | // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 | 
|  | 1405 | // %T2_X<def> = MOV %ZERO | 
|  | 1406 | //Adding this constraint prevents this from happening. | 
|  | 1407 | let Constraints = "$ptr.ptr = $dst"; | 
|  | 1408 | } | 
|  | 1409 |  | 
|  | 1410 | class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> | 
|  | 1411 | : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), | 
|  | 1412 | pattern> { | 
|  | 1413 |  | 
|  | 1414 | let MEGA_FETCH_COUNT = 16; | 
|  | 1415 | let DST_SEL_X        =  0; | 
|  | 1416 | let DST_SEL_Y        =  1; | 
|  | 1417 | let DST_SEL_Z        =  2; | 
|  | 1418 | let DST_SEL_W        =  3; | 
|  | 1419 | let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32 | 
|  | 1420 |  | 
|  | 1421 | // XXX: Need to force VTX_READ_128 instructions to write to the same register | 
|  | 1422 | // that holds its buffer address to avoid potential hangs.  We can't use | 
|  | 1423 | // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst | 
|  | 1424 | // registers are different sizes. | 
|  | 1425 | } | 
|  | 1426 |  | 
|  | 1427 | //===----------------------------------------------------------------------===// | 
|  | 1428 | // VTX Read from parameter memory space | 
|  | 1429 | //===----------------------------------------------------------------------===// | 
|  | 1430 |  | 
|  | 1431 | def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, | 
|  | 1432 | [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))] | 
|  | 1433 | >; | 
|  | 1434 |  | 
|  | 1435 | def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, | 
|  | 1436 | [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))] | 
|  | 1437 | >; | 
|  | 1438 |  | 
|  | 1439 | def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, | 
|  | 1440 | [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] | 
|  | 1441 | >; | 
|  | 1442 |  | 
|  | 1443 | //===----------------------------------------------------------------------===// | 
|  | 1444 | // VTX Read from global memory space | 
|  | 1445 | //===----------------------------------------------------------------------===// | 
|  | 1446 |  | 
|  | 1447 | // 8-bit reads | 
|  | 1448 | def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, | 
|  | 1449 | [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] | 
|  | 1450 | >; | 
|  | 1451 |  | 
|  | 1452 | // 32-bit reads | 
|  | 1453 | def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, | 
|  | 1454 | [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] | 
|  | 1455 | >; | 
|  | 1456 |  | 
|  | 1457 | // 128-bit reads | 
|  | 1458 | def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, | 
|  | 1459 | [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] | 
|  | 1460 | >; | 
|  | 1461 |  | 
|  | 1462 | //===----------------------------------------------------------------------===// | 
|  | 1463 | // Constant Loads | 
|  | 1464 | // XXX: We are currently storing all constants in the global address space. | 
|  | 1465 | //===----------------------------------------------------------------------===// | 
|  | 1466 |  | 
|  | 1467 | def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, | 
|  | 1468 | [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] | 
|  | 1469 | >; | 
|  | 1470 |  | 
|  | 1471 | } | 
|  | 1472 |  | 
|  | 1473 | let Predicates = [isCayman] in { | 
|  | 1474 |  | 
|  | 1475 | let isVector = 1 in { | 
|  | 1476 |  | 
|  | 1477 | def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; | 
|  | 1478 |  | 
|  | 1479 | def MULLO_INT_cm : MULLO_INT_Common<0x8F>; | 
|  | 1480 | def MULHI_INT_cm : MULHI_INT_Common<0x90>; | 
|  | 1481 | def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; | 
|  | 1482 | def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; | 
|  | 1483 | def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; | 
|  | 1484 | def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; | 
|  | 1485 | def LOG_IEEE_ : LOG_IEEE_Common<0x83>; | 
|  | 1486 | def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; | 
|  | 1487 | def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; | 
|  | 1488 | def SIN_cm : SIN_Common<0x8D>; | 
|  | 1489 | def COS_cm : COS_Common<0x8E>; | 
|  | 1490 | } // End isVector = 1 | 
|  | 1491 |  | 
|  | 1492 | def : SIN_PAT <SIN_cm>; | 
|  | 1493 | def : COS_PAT <COS_cm>; | 
|  | 1494 |  | 
|  | 1495 | defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; | 
|  | 1496 |  | 
|  | 1497 | // RECIP_UINT emulation for Cayman | 
|  | 1498 | def : Pat < | 
|  | 1499 | (AMDGPUurecip R600_Reg32:$src0), | 
|  | 1500 | (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), | 
|  | 1501 | (MOV_IMM_I32 0x4f800000))) | 
|  | 1502 | >; | 
|  | 1503 |  | 
|  | 1504 |  | 
|  | 1505 | def : Pat<(fsqrt R600_Reg32:$src), | 
|  | 1506 | (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; | 
|  | 1507 |  | 
|  | 1508 | } // End isCayman | 
|  | 1509 |  | 
|  | 1510 | //===----------------------------------------------------------------------===// | 
|  | 1511 | // Branch Instructions | 
|  | 1512 | //===----------------------------------------------------------------------===// | 
|  | 1513 |  | 
|  | 1514 |  | 
|  | 1515 | def IF_PREDICATE_SET  : ILFormat<(outs), (ins GPRI32:$src), | 
|  | 1516 | "IF_PREDICATE_SET $src", []>; | 
|  | 1517 |  | 
|  | 1518 | def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), | 
|  | 1519 | "PREDICATED_BREAK $src", []>; | 
|  | 1520 |  | 
|  | 1521 | //===----------------------------------------------------------------------===// | 
|  | 1522 | // Pseudo instructions | 
|  | 1523 | //===----------------------------------------------------------------------===// | 
|  | 1524 |  | 
|  | 1525 | let isPseudo = 1 in { | 
|  | 1526 |  | 
|  | 1527 | def PRED_X : InstR600 < | 
|  | 1528 | 0, (outs R600_Predicate_Bit:$dst), | 
|  | 1529 | (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), | 
|  | 1530 | "", [], NullALU> { | 
|  | 1531 | let FlagOperandIdx = 3; | 
|  | 1532 | } | 
|  | 1533 |  | 
|  | 1534 | let isTerminator = 1, isBranch = 1, isBarrier = 1 in { | 
|  | 1535 |  | 
|  | 1536 | def JUMP : InstR600 <0x10, | 
|  | 1537 | (outs), | 
|  | 1538 | (ins brtarget:$target, R600_Pred:$p), | 
|  | 1539 | "JUMP $target ($p)", | 
|  | 1540 | [], AnyALU | 
|  | 1541 | >; | 
|  | 1542 |  | 
|  | 1543 | }  // End isTerminator = 1, isBranch = 1, isBarrier = 1 | 
|  | 1544 |  | 
|  | 1545 | let usesCustomInserter = 1 in { | 
|  | 1546 |  | 
|  | 1547 | let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { | 
|  | 1548 |  | 
|  | 1549 | def MASK_WRITE : AMDGPUShaderInst < | 
|  | 1550 | (outs), | 
|  | 1551 | (ins R600_Reg32:$src), | 
|  | 1552 | "MASK_WRITE $src", | 
|  | 1553 | [] | 
|  | 1554 | >; | 
|  | 1555 |  | 
|  | 1556 | } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 | 
|  | 1557 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1558 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1559 | def TXD: AMDGPUShaderInst < | 
|  | 1560 | (outs R600_Reg128:$dst), | 
|  | 1561 | (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), | 
|  | 1562 | "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", | 
|  | 1563 | [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] | 
|  | 1564 | >; | 
|  | 1565 |  | 
|  | 1566 | def TXD_SHADOW: AMDGPUShaderInst < | 
|  | 1567 | (outs R600_Reg128:$dst), | 
|  | 1568 | (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), | 
|  | 1569 | "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", | 
|  | 1570 | [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] | 
|  | 1571 | >; | 
|  | 1572 |  | 
|  | 1573 | } // End isPseudo = 1 | 
|  | 1574 | } // End usesCustomInserter = 1 | 
|  | 1575 |  | 
|  | 1576 | def CLAMP_R600 :  CLAMP <R600_Reg32>; | 
|  | 1577 | def FABS_R600 : FABS<R600_Reg32>; | 
|  | 1578 | def FNEG_R600 : FNEG<R600_Reg32>; | 
|  | 1579 |  | 
|  | 1580 | //===---------------------------------------------------------------------===// | 
|  | 1581 | // Return instruction | 
|  | 1582 | //===---------------------------------------------------------------------===// | 
| Jakob Stoklund Olesen | fdc3767 | 2013-02-05 17:53:52 +0000 | [diff] [blame] | 1583 | let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, | 
|  | 1584 | usesCustomInserter = 1 in { | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1585 | def RETURN          : ILFormat<(outs), (ins variable_ops), | 
|  | 1586 | "RETURN", [(IL_retflag)]>; | 
|  | 1587 | } | 
|  | 1588 |  | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1589 |  | 
|  | 1590 | //===----------------------------------------------------------------------===// | 
|  | 1591 | // Constant Buffer Addressing Support | 
|  | 1592 | //===----------------------------------------------------------------------===// | 
|  | 1593 |  | 
|  | 1594 | let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in { | 
|  | 1595 | def CONST_COPY : Instruction { | 
|  | 1596 | let OutOperandList = (outs R600_Reg32:$dst); | 
|  | 1597 | let InOperandList = (ins i32imm:$src); | 
|  | 1598 | let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; | 
|  | 1599 | let AsmString = "CONST_COPY"; | 
|  | 1600 | let neverHasSideEffects = 1; | 
|  | 1601 | let isAsCheapAsAMove = 1; | 
|  | 1602 | let Itinerary = NullALU; | 
|  | 1603 | } | 
|  | 1604 | } // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" | 
|  | 1605 |  | 
|  | 1606 | def TEX_VTX_CONSTBUF : | 
|  | 1607 | InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", | 
|  | 1608 | [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, | 
|  | 1609 | VTX_WORD1_GPR, VTX_WORD0 { | 
|  | 1610 |  | 
|  | 1611 | let VC_INST = 0; | 
|  | 1612 | let FETCH_TYPE = 2; | 
|  | 1613 | let FETCH_WHOLE_QUAD = 0; | 
|  | 1614 | let BUFFER_ID = 0; | 
|  | 1615 | let SRC_REL = 0; | 
|  | 1616 | let SRC_SEL_X = 0; | 
|  | 1617 | let DST_REL = 0; | 
|  | 1618 | let USE_CONST_FIELDS = 0; | 
|  | 1619 | let NUM_FORMAT_ALL = 2; | 
|  | 1620 | let FORMAT_COMP_ALL = 1; | 
|  | 1621 | let SRF_MODE_ALL = 1; | 
|  | 1622 | let MEGA_FETCH_COUNT = 16; | 
|  | 1623 | let DST_SEL_X        = 0; | 
|  | 1624 | let DST_SEL_Y        = 1; | 
|  | 1625 | let DST_SEL_Z        = 2; | 
|  | 1626 | let DST_SEL_W        = 3; | 
|  | 1627 | let DATA_FORMAT      = 35; | 
|  | 1628 |  | 
|  | 1629 | let Inst{31-0} = Word0; | 
|  | 1630 | let Inst{63-32} = Word1; | 
|  | 1631 |  | 
|  | 1632 | // LLVM can only encode 64-bit instructions, so these fields are manually | 
|  | 1633 | // encoded in R600CodeEmitter | 
|  | 1634 | // | 
|  | 1635 | // bits<16> OFFSET; | 
|  | 1636 | // bits<2>  ENDIAN_SWAP = 0; | 
|  | 1637 | // bits<1>  CONST_BUF_NO_STRIDE = 0; | 
|  | 1638 | // bits<1>  MEGA_FETCH = 0; | 
|  | 1639 | // bits<1>  ALT_CONST = 0; | 
|  | 1640 | // bits<2>  BUFFER_INDEX_MODE = 0; | 
|  | 1641 |  | 
|  | 1642 |  | 
|  | 1643 |  | 
|  | 1644 | // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding | 
|  | 1645 | // is done in R600CodeEmitter | 
|  | 1646 | // | 
|  | 1647 | // Inst{79-64} = OFFSET; | 
|  | 1648 | // Inst{81-80} = ENDIAN_SWAP; | 
|  | 1649 | // Inst{82}    = CONST_BUF_NO_STRIDE; | 
|  | 1650 | // Inst{83}    = MEGA_FETCH; | 
|  | 1651 | // Inst{84}    = ALT_CONST; | 
|  | 1652 | // Inst{86-85} = BUFFER_INDEX_MODE; | 
|  | 1653 | // Inst{95-86} = 0; Reserved | 
|  | 1654 |  | 
|  | 1655 | // VTX_WORD3 (Padding) | 
|  | 1656 | // | 
|  | 1657 | // Inst{127-96} = 0; | 
|  | 1658 | } | 
|  | 1659 |  | 
|  | 1660 |  | 
| Tom Stellard | f879435 | 2012-12-19 22:10:31 +0000 | [diff] [blame] | 1661 | //===--------------------------------------------------------------------===// | 
|  | 1662 | // Instructions support | 
|  | 1663 | //===--------------------------------------------------------------------===// | 
|  | 1664 | //===---------------------------------------------------------------------===// | 
|  | 1665 | // Custom Inserter for Branches and returns, this eventually will be a | 
|  | 1666 | // seperate pass | 
|  | 1667 | //===---------------------------------------------------------------------===// | 
|  | 1668 | let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { | 
|  | 1669 | def BRANCH : ILFormat<(outs), (ins brtarget:$target), | 
|  | 1670 | "; Pseudo unconditional branch instruction", | 
|  | 1671 | [(br bb:$target)]>; | 
|  | 1672 | defm BRANCH_COND : BranchConditional<IL_brcond>; | 
|  | 1673 | } | 
|  | 1674 |  | 
|  | 1675 | //===---------------------------------------------------------------------===// | 
|  | 1676 | // Flow and Program control Instructions | 
|  | 1677 | //===---------------------------------------------------------------------===// | 
|  | 1678 | let isTerminator=1 in { | 
|  | 1679 | def SWITCH      : ILFormat< (outs), (ins GPRI32:$src), | 
|  | 1680 | !strconcat("SWITCH", " $src"), []>; | 
|  | 1681 | def CASE        : ILFormat< (outs), (ins GPRI32:$src), | 
|  | 1682 | !strconcat("CASE", " $src"), []>; | 
|  | 1683 | def BREAK       : ILFormat< (outs), (ins), | 
|  | 1684 | "BREAK", []>; | 
|  | 1685 | def CONTINUE    : ILFormat< (outs), (ins), | 
|  | 1686 | "CONTINUE", []>; | 
|  | 1687 | def DEFAULT     : ILFormat< (outs), (ins), | 
|  | 1688 | "DEFAULT", []>; | 
|  | 1689 | def ELSE        : ILFormat< (outs), (ins), | 
|  | 1690 | "ELSE", []>; | 
|  | 1691 | def ENDSWITCH   : ILFormat< (outs), (ins), | 
|  | 1692 | "ENDSWITCH", []>; | 
|  | 1693 | def ENDMAIN     : ILFormat< (outs), (ins), | 
|  | 1694 | "ENDMAIN", []>; | 
|  | 1695 | def END         : ILFormat< (outs), (ins), | 
|  | 1696 | "END", []>; | 
|  | 1697 | def ENDFUNC     : ILFormat< (outs), (ins), | 
|  | 1698 | "ENDFUNC", []>; | 
|  | 1699 | def ENDIF       : ILFormat< (outs), (ins), | 
|  | 1700 | "ENDIF", []>; | 
|  | 1701 | def WHILELOOP   : ILFormat< (outs), (ins), | 
|  | 1702 | "WHILE", []>; | 
|  | 1703 | def ENDLOOP     : ILFormat< (outs), (ins), | 
|  | 1704 | "ENDLOOP", []>; | 
|  | 1705 | def FUNC        : ILFormat< (outs), (ins), | 
|  | 1706 | "FUNC", []>; | 
|  | 1707 | def RETDYN      : ILFormat< (outs), (ins), | 
|  | 1708 | "RET_DYN", []>; | 
|  | 1709 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | 
|  | 1710 | defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">; | 
|  | 1711 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | 
|  | 1712 | defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">; | 
|  | 1713 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | 
|  | 1714 | defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; | 
|  | 1715 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | 
|  | 1716 | defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; | 
|  | 1717 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | 
|  | 1718 | defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; | 
|  | 1719 | // This opcode has custom swizzle pattern encoded in Swizzle Encoder | 
|  | 1720 | defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; | 
|  | 1721 | defm IFC         : BranchInstr2<"IFC">; | 
|  | 1722 | defm BREAKC      : BranchInstr2<"BREAKC">; | 
|  | 1723 | defm CONTINUEC   : BranchInstr2<"CONTINUEC">; | 
|  | 1724 | } | 
|  | 1725 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1726 | //===----------------------------------------------------------------------===// | 
|  | 1727 | // ISel Patterns | 
|  | 1728 | //===----------------------------------------------------------------------===// | 
|  | 1729 |  | 
|  | 1730 | //CNDGE_INT extra pattern | 
|  | 1731 | def : Pat < | 
|  | 1732 | (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1), | 
|  | 1733 | (i32 R600_Reg32:$src2), COND_GT), | 
|  | 1734 | (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) | 
|  | 1735 | >; | 
|  | 1736 |  | 
|  | 1737 | // KIL Patterns | 
|  | 1738 | def KILP : Pat < | 
|  | 1739 | (int_AMDGPU_kilp), | 
|  | 1740 | (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) | 
|  | 1741 | >; | 
|  | 1742 |  | 
|  | 1743 | def KIL : Pat < | 
|  | 1744 | (int_AMDGPU_kill R600_Reg32:$src0), | 
|  | 1745 | (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) | 
|  | 1746 | >; | 
|  | 1747 |  | 
|  | 1748 | // SGT Reverse args | 
|  | 1749 | def : Pat < | 
|  | 1750 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), | 
|  | 1751 | (SGT R600_Reg32:$src1, R600_Reg32:$src0) | 
|  | 1752 | >; | 
|  | 1753 |  | 
|  | 1754 | // SGE Reverse args | 
|  | 1755 | def : Pat < | 
|  | 1756 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), | 
|  | 1757 | (SGE R600_Reg32:$src1, R600_Reg32:$src0) | 
|  | 1758 | >; | 
|  | 1759 |  | 
|  | 1760 | // SETGT_INT reverse args | 
|  | 1761 | def : Pat < | 
|  | 1762 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), | 
|  | 1763 | (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) | 
|  | 1764 | >; | 
|  | 1765 |  | 
|  | 1766 | // SETGE_INT reverse args | 
|  | 1767 | def : Pat < | 
|  | 1768 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), | 
|  | 1769 | (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) | 
|  | 1770 | >; | 
|  | 1771 |  | 
|  | 1772 | // SETGT_UINT reverse args | 
|  | 1773 | def : Pat < | 
|  | 1774 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), | 
|  | 1775 | (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) | 
|  | 1776 | >; | 
|  | 1777 |  | 
|  | 1778 | // SETGE_UINT reverse args | 
|  | 1779 | def : Pat < | 
|  | 1780 | (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), | 
|  | 1781 | (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0) | 
|  | 1782 | >; | 
|  | 1783 |  | 
|  | 1784 | // The next two patterns are special cases for handling 'true if ordered' and | 
|  | 1785 | // 'true if unordered' conditionals.  The assumption here is that the behavior of | 
|  | 1786 | // SETE and SNE conforms to the Direct3D 10 rules for floating point values | 
|  | 1787 | // described here: | 
|  | 1788 | // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit | 
|  | 1789 | // We assume that  SETE returns false when one of the operands is NAN and | 
|  | 1790 | // SNE returns true when on of the operands is NAN | 
|  | 1791 |  | 
|  | 1792 | //SETE - 'true if ordered' | 
|  | 1793 | def : Pat < | 
|  | 1794 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), | 
|  | 1795 | (SETE R600_Reg32:$src0, R600_Reg32:$src1) | 
|  | 1796 | >; | 
|  | 1797 |  | 
|  | 1798 | //SNE - 'true if unordered' | 
|  | 1799 | def : Pat < | 
|  | 1800 | (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), | 
|  | 1801 | (SNE R600_Reg32:$src0, R600_Reg32:$src1) | 
|  | 1802 | >; | 
|  | 1803 |  | 
|  | 1804 | def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; | 
|  | 1805 | def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; | 
|  | 1806 | def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; | 
|  | 1807 | def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; | 
|  | 1808 |  | 
|  | 1809 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>; | 
|  | 1810 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>; | 
|  | 1811 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>; | 
|  | 1812 | def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>; | 
|  | 1813 |  | 
|  | 1814 | def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; | 
|  | 1815 | def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; | 
|  | 1816 | def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; | 
|  | 1817 | def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; | 
|  | 1818 |  | 
|  | 1819 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>; | 
|  | 1820 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>; | 
|  | 1821 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>; | 
|  | 1822 | def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>; | 
|  | 1823 |  | 
|  | 1824 | def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; | 
|  | 1825 | def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; | 
|  | 1826 |  | 
|  | 1827 | // bitconvert patterns | 
|  | 1828 |  | 
|  | 1829 | def : BitConvert <i32, f32, R600_Reg32>; | 
|  | 1830 | def : BitConvert <f32, i32, R600_Reg32>; | 
|  | 1831 | def : BitConvert <v4f32, v4i32, R600_Reg128>; | 
|  | 1832 | def : BitConvert <v4i32, v4f32, R600_Reg128>; | 
|  | 1833 |  | 
|  | 1834 | // DWORDADDR pattern | 
|  | 1835 | def : DwordAddrPat  <i32, R600_Reg32>; | 
|  | 1836 |  | 
|  | 1837 | } // End isR600toCayman Predicate |