| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1 | //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file | 
|  | 11 | /// \brief Custom DAG lowering for R600 | 
|  | 12 | // | 
|  | 13 | //===----------------------------------------------------------------------===// | 
|  | 14 |  | 
|  | 15 | #include "R600ISelLowering.h" | 
| Tom Stellard | 2e59a45 | 2014-06-13 01:32:00 +0000 | [diff] [blame] | 16 | #include "AMDGPUFrameLowering.h" | 
| Matt Arsenault | c791f39 | 2014-06-23 18:00:31 +0000 | [diff] [blame] | 17 | #include "AMDGPUIntrinsicInfo.h" | 
| Tom Stellard | 2e59a45 | 2014-06-13 01:32:00 +0000 | [diff] [blame] | 18 | #include "AMDGPUSubtarget.h" | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 19 | #include "R600Defines.h" | 
|  | 20 | #include "R600InstrInfo.h" | 
|  | 21 | #include "R600MachineFunctionInfo.h" | 
| Tom Stellard | 067c815 | 2014-07-21 14:01:14 +0000 | [diff] [blame] | 22 | #include "llvm/Analysis/ValueTracking.h" | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 23 | #include "llvm/CodeGen/CallingConvLower.h" | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 24 | #include "llvm/CodeGen/MachineFrameInfo.h" | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 25 | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | 26 | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | 27 | #include "llvm/CodeGen/SelectionDAG.h" | 
| Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 28 | #include "llvm/IR/Argument.h" | 
|  | 29 | #include "llvm/IR/Function.h" | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 30 |  | 
|  | 31 | using namespace llvm; | 
|  | 32 |  | 
|  | 33 | R600TargetLowering::R600TargetLowering(TargetMachine &TM) : | 
| Vincent Lejeune | b55940c | 2013-07-09 15:03:11 +0000 | [diff] [blame] | 34 | AMDGPUTargetLowering(TM), | 
|  | 35 | Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) { | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 36 | addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); | 
|  | 37 | addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); | 
|  | 38 | addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); | 
|  | 39 | addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); | 
| Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 40 | addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); | 
|  | 41 | addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); | 
|  | 42 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 43 | computeRegisterProperties(); | 
|  | 44 |  | 
| Tom Stellard | 0351ea2 | 2013-09-28 02:50:50 +0000 | [diff] [blame] | 45 | // Set condition code actions | 
|  | 46 | setCondCodeAction(ISD::SETO,   MVT::f32, Expand); | 
|  | 47 | setCondCodeAction(ISD::SETUO,  MVT::f32, Expand); | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 48 | setCondCodeAction(ISD::SETLT,  MVT::f32, Expand); | 
| Tom Stellard | 0351ea2 | 2013-09-28 02:50:50 +0000 | [diff] [blame] | 49 | setCondCodeAction(ISD::SETLE,  MVT::f32, Expand); | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 50 | setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); | 
|  | 51 | setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); | 
| Tom Stellard | 0351ea2 | 2013-09-28 02:50:50 +0000 | [diff] [blame] | 52 | setCondCodeAction(ISD::SETONE, MVT::f32, Expand); | 
|  | 53 | setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); | 
|  | 54 | setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); | 
|  | 55 | setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 56 | setCondCodeAction(ISD::SETULT, MVT::f32, Expand); | 
|  | 57 | setCondCodeAction(ISD::SETULE, MVT::f32, Expand); | 
|  | 58 |  | 
|  | 59 | setCondCodeAction(ISD::SETLE, MVT::i32, Expand); | 
|  | 60 | setCondCodeAction(ISD::SETLT, MVT::i32, Expand); | 
|  | 61 | setCondCodeAction(ISD::SETULE, MVT::i32, Expand); | 
|  | 62 | setCondCodeAction(ISD::SETULT, MVT::i32, Expand); | 
|  | 63 |  | 
| Vincent Lejeune | b55940c | 2013-07-09 15:03:11 +0000 | [diff] [blame] | 64 | setOperationAction(ISD::FCOS, MVT::f32, Custom); | 
|  | 65 | setOperationAction(ISD::FSIN, MVT::f32, Custom); | 
|  | 66 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 67 | setOperationAction(ISD::SETCC, MVT::v4i32, Expand); | 
| Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 68 | setOperationAction(ISD::SETCC, MVT::v2i32, Expand); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 69 |  | 
| Tom Stellard | 492ebea | 2013-03-08 15:37:07 +0000 | [diff] [blame] | 70 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); | 
|  | 71 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); | 
| Matt Arsenault | 1d555c4 | 2014-06-23 18:00:55 +0000 | [diff] [blame] | 72 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 73 |  | 
|  | 74 | setOperationAction(ISD::FSUB, MVT::f32, Expand); | 
|  | 75 |  | 
|  | 76 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); | 
|  | 77 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | 
|  | 78 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 79 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 80 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); | 
|  | 81 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); | 
|  | 82 |  | 
| Tom Stellard | e8f9f28 | 2013-03-08 15:37:05 +0000 | [diff] [blame] | 83 | setOperationAction(ISD::SETCC, MVT::i32, Expand); | 
|  | 84 | setOperationAction(ISD::SETCC, MVT::f32, Expand); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 85 | setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); | 
| Jan Vesely | 2cb62ce | 2014-07-10 22:40:21 +0000 | [diff] [blame] | 86 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); | 
|  | 87 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 88 |  | 
| Tom Stellard | 53f2f90 | 2013-09-05 18:38:03 +0000 | [diff] [blame] | 89 | setOperationAction(ISD::SELECT, MVT::i32, Expand); | 
|  | 90 | setOperationAction(ISD::SELECT, MVT::f32, Expand); | 
|  | 91 | setOperationAction(ISD::SELECT, MVT::v2i32, Expand); | 
| Tom Stellard | 53f2f90 | 2013-09-05 18:38:03 +0000 | [diff] [blame] | 92 | setOperationAction(ISD::SELECT, MVT::v4i32, Expand); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 93 |  | 
| Matt Arsenault | 4e46665 | 2014-04-16 01:41:30 +0000 | [diff] [blame] | 94 | // Expand sign extension of vectors | 
|  | 95 | if (!Subtarget->hasBFE()) | 
|  | 96 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | 
|  | 97 |  | 
|  | 98 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); | 
|  | 99 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); | 
|  | 100 |  | 
|  | 101 | if (!Subtarget->hasBFE()) | 
|  | 102 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); | 
|  | 103 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); | 
|  | 104 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); | 
|  | 105 |  | 
|  | 106 | if (!Subtarget->hasBFE()) | 
|  | 107 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); | 
|  | 108 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); | 
|  | 109 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); | 
|  | 110 |  | 
|  | 111 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); | 
|  | 112 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); | 
|  | 113 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); | 
|  | 114 |  | 
|  | 115 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); | 
|  | 116 |  | 
|  | 117 |  | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 118 | // Legalize loads and stores to the private address space. | 
|  | 119 | setOperationAction(ISD::LOAD, MVT::i32, Custom); | 
| Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 120 | setOperationAction(ISD::LOAD, MVT::v2i32, Custom); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 121 | setOperationAction(ISD::LOAD, MVT::v4i32, Custom); | 
| Matt Arsenault | 00a0d6f | 2013-11-13 02:39:07 +0000 | [diff] [blame] | 122 |  | 
|  | 123 | // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address | 
|  | 124 | // spaces, so it is custom lowered to handle those where it isn't. | 
| Tom Stellard | 1e80309 | 2013-07-23 01:48:18 +0000 | [diff] [blame] | 125 | setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); | 
|  | 126 | setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); | 
|  | 127 | setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); | 
|  | 128 | setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); | 
| Matt Arsenault | 00a0d6f | 2013-11-13 02:39:07 +0000 | [diff] [blame] | 129 | setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); | 
|  | 130 | setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); | 
|  | 131 |  | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 132 | setOperationAction(ISD::STORE, MVT::i8, Custom); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 133 | setOperationAction(ISD::STORE, MVT::i32, Custom); | 
| Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 134 | setOperationAction(ISD::STORE, MVT::v2i32, Custom); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 135 | setOperationAction(ISD::STORE, MVT::v4i32, Custom); | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 136 | setTruncStoreAction(MVT::i32, MVT::i8, Custom); | 
|  | 137 | setTruncStoreAction(MVT::i32, MVT::i16, Custom); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 138 |  | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 139 | setOperationAction(ISD::LOAD, MVT::i32, Custom); | 
|  | 140 | setOperationAction(ISD::LOAD, MVT::v4i32, Custom); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 141 | setOperationAction(ISD::FrameIndex, MVT::i32, Custom); | 
|  | 142 |  | 
| Tom Stellard | 880a80a | 2014-06-17 16:53:14 +0000 | [diff] [blame] | 143 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); | 
|  | 144 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); | 
|  | 145 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); | 
|  | 146 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); | 
|  | 147 |  | 
|  | 148 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); | 
|  | 149 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); | 
|  | 150 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); | 
|  | 151 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); | 
|  | 152 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 153 | setTargetDAGCombine(ISD::FP_ROUND); | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 154 | setTargetDAGCombine(ISD::FP_TO_SINT); | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 155 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 156 | setTargetDAGCombine(ISD::SELECT_CC); | 
| Quentin Colombet | e2e0548 | 2013-07-30 00:27:16 +0000 | [diff] [blame] | 157 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 158 |  | 
| Matt Arsenault | b8b5153 | 2014-06-23 18:00:38 +0000 | [diff] [blame] | 159 | setOperationAction(ISD::SUB, MVT::i64, Expand); | 
|  | 160 |  | 
| Tom Stellard | 5f33788 | 2014-04-29 23:12:43 +0000 | [diff] [blame] | 161 | // These should be replaced by UDVIREM, but it does not happen automatically | 
|  | 162 | // during Type Legalization | 
|  | 163 | setOperationAction(ISD::UDIV, MVT::i64, Custom); | 
|  | 164 | setOperationAction(ISD::UREM, MVT::i64, Custom); | 
| Jan Vesely | 343cd6f0 | 2014-06-22 21:43:01 +0000 | [diff] [blame] | 165 | setOperationAction(ISD::SDIV, MVT::i64, Custom); | 
|  | 166 | setOperationAction(ISD::SREM, MVT::i64, Custom); | 
| Tom Stellard | 5f33788 | 2014-04-29 23:12:43 +0000 | [diff] [blame] | 167 |  | 
| Jan Vesely | 25f3627 | 2014-06-18 12:27:13 +0000 | [diff] [blame] | 168 | // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 | 
|  | 169 | //  to be Legal/Custom in order to avoid library calls. | 
|  | 170 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); | 
| Jan Vesely | 900ff2e | 2014-06-18 12:27:15 +0000 | [diff] [blame] | 171 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); | 
| Jan Vesely | ecf5133 | 2014-06-18 12:27:17 +0000 | [diff] [blame] | 172 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); | 
| Jan Vesely | 25f3627 | 2014-06-18 12:27:13 +0000 | [diff] [blame] | 173 |  | 
| Michel Danzer | 49812b5 | 2013-07-10 16:37:07 +0000 | [diff] [blame] | 174 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | 
|  | 175 |  | 
| Matt Arsenault | c4d3d3a | 2014-06-23 18:00:49 +0000 | [diff] [blame] | 176 | const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; | 
|  | 177 | for (MVT VT : ScalarIntVTs) { | 
|  | 178 | setOperationAction(ISD::ADDC, VT, Expand); | 
|  | 179 | setOperationAction(ISD::SUBC, VT, Expand); | 
|  | 180 | setOperationAction(ISD::ADDE, VT, Expand); | 
|  | 181 | setOperationAction(ISD::SUBE, VT, Expand); | 
|  | 182 | } | 
|  | 183 |  | 
| Tom Stellard | b852af5 | 2013-03-08 15:37:03 +0000 | [diff] [blame] | 184 | setBooleanContents(ZeroOrNegativeOneBooleanContent); | 
| Tom Stellard | 87047f6 | 2013-04-24 23:56:18 +0000 | [diff] [blame] | 185 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | 
| Tom Stellard | fc45547 | 2013-08-12 22:33:21 +0000 | [diff] [blame] | 186 | setSchedulingPreference(Sched::Source); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 187 | } | 
|  | 188 |  | 
|  | 189 | MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( | 
|  | 190 | MachineInstr * MI, MachineBasicBlock * BB) const { | 
|  | 191 | MachineFunction * MF = BB->getParent(); | 
|  | 192 | MachineRegisterInfo &MRI = MF->getRegInfo(); | 
|  | 193 | MachineBasicBlock::iterator I = *MI; | 
| Eric Christopher | fc6de42 | 2014-08-05 02:39:49 +0000 | [diff] [blame] | 194 | const R600InstrInfo *TII = | 
|  | 195 | static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo()); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 196 |  | 
|  | 197 | switch (MI->getOpcode()) { | 
| Tom Stellard | c6f4a29 | 2013-08-26 15:05:59 +0000 | [diff] [blame] | 198 | default: | 
| Tom Stellard | 8f9fc20 | 2013-11-15 00:12:45 +0000 | [diff] [blame] | 199 | // Replace LDS_*_RET instruction that don't have any uses with the | 
|  | 200 | // equivalent LDS_*_NORET instruction. | 
|  | 201 | if (TII->isLDSRetInstr(MI->getOpcode())) { | 
| Tom Stellard | 13c68ef | 2013-09-05 18:38:09 +0000 | [diff] [blame] | 202 | int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); | 
|  | 203 | assert(DstIdx != -1); | 
|  | 204 | MachineInstrBuilder NewMI; | 
| Tom Stellard | 8f9fc20 | 2013-11-15 00:12:45 +0000 | [diff] [blame] | 205 | if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) | 
|  | 206 | return BB; | 
|  | 207 |  | 
|  | 208 | NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), | 
|  | 209 | TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode()))); | 
| Tom Stellard | c6f4a29 | 2013-08-26 15:05:59 +0000 | [diff] [blame] | 210 | for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { | 
|  | 211 | NewMI.addOperand(MI->getOperand(i)); | 
|  | 212 | } | 
| Tom Stellard | c6f4a29 | 2013-08-26 15:05:59 +0000 | [diff] [blame] | 213 | } else { | 
|  | 214 | return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); | 
|  | 215 | } | 
|  | 216 | break; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 217 | case AMDGPU::CLAMP_R600: { | 
|  | 218 | MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, | 
|  | 219 | AMDGPU::MOV, | 
|  | 220 | MI->getOperand(0).getReg(), | 
|  | 221 | MI->getOperand(1).getReg()); | 
|  | 222 | TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); | 
|  | 223 | break; | 
|  | 224 | } | 
|  | 225 |  | 
|  | 226 | case AMDGPU::FABS_R600: { | 
|  | 227 | MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, | 
|  | 228 | AMDGPU::MOV, | 
|  | 229 | MI->getOperand(0).getReg(), | 
|  | 230 | MI->getOperand(1).getReg()); | 
|  | 231 | TII->addFlag(NewMI, 0, MO_FLAG_ABS); | 
|  | 232 | break; | 
|  | 233 | } | 
|  | 234 |  | 
|  | 235 | case AMDGPU::FNEG_R600: { | 
|  | 236 | MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, | 
|  | 237 | AMDGPU::MOV, | 
|  | 238 | MI->getOperand(0).getReg(), | 
|  | 239 | MI->getOperand(1).getReg()); | 
|  | 240 | TII->addFlag(NewMI, 0, MO_FLAG_NEG); | 
|  | 241 | break; | 
|  | 242 | } | 
|  | 243 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 244 | case AMDGPU::MASK_WRITE: { | 
|  | 245 | unsigned maskedRegister = MI->getOperand(0).getReg(); | 
|  | 246 | assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); | 
|  | 247 | MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); | 
|  | 248 | TII->addFlag(defInstr, 0, MO_FLAG_MASK); | 
|  | 249 | break; | 
|  | 250 | } | 
|  | 251 |  | 
|  | 252 | case AMDGPU::MOV_IMM_F32: | 
|  | 253 | TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), | 
|  | 254 | MI->getOperand(1).getFPImm()->getValueAPF() | 
|  | 255 | .bitcastToAPInt().getZExtValue()); | 
|  | 256 | break; | 
|  | 257 | case AMDGPU::MOV_IMM_I32: | 
|  | 258 | TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), | 
|  | 259 | MI->getOperand(1).getImm()); | 
|  | 260 | break; | 
| Vincent Lejeune | 0b72f10 | 2013-03-05 15:04:55 +0000 | [diff] [blame] | 261 | case AMDGPU::CONST_COPY: { | 
|  | 262 | MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, | 
|  | 263 | MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); | 
| Tom Stellard | 02661d9 | 2013-06-25 21:22:18 +0000 | [diff] [blame] | 264 | TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel, | 
| Vincent Lejeune | 0b72f10 | 2013-03-05 15:04:55 +0000 | [diff] [blame] | 265 | MI->getOperand(1).getImm()); | 
|  | 266 | break; | 
|  | 267 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 268 |  | 
|  | 269 | case AMDGPU::RAT_WRITE_CACHELESS_32_eg: | 
| Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 270 | case AMDGPU::RAT_WRITE_CACHELESS_64_eg: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 271 | case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { | 
| Benjamin Kramer | b6d0bd4 | 2014-03-02 12:27:27 +0000 | [diff] [blame] | 272 | unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 273 |  | 
|  | 274 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) | 
|  | 275 | .addOperand(MI->getOperand(0)) | 
|  | 276 | .addOperand(MI->getOperand(1)) | 
|  | 277 | .addImm(EOP); // Set End of program bit | 
|  | 278 | break; | 
|  | 279 | } | 
|  | 280 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 281 | case AMDGPU::TXD: { | 
|  | 282 | unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); | 
|  | 283 | unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 284 | MachineOperand &RID = MI->getOperand(4); | 
|  | 285 | MachineOperand &SID = MI->getOperand(5); | 
|  | 286 | unsigned TextureId = MI->getOperand(6).getImm(); | 
|  | 287 | unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; | 
|  | 288 | unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 289 |  | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 290 | switch (TextureId) { | 
|  | 291 | case 5: // Rect | 
|  | 292 | CTX = CTY = 0; | 
|  | 293 | break; | 
|  | 294 | case 6: // Shadow1D | 
|  | 295 | SrcW = SrcZ; | 
|  | 296 | break; | 
|  | 297 | case 7: // Shadow2D | 
|  | 298 | SrcW = SrcZ; | 
|  | 299 | break; | 
|  | 300 | case 8: // ShadowRect | 
|  | 301 | CTX = CTY = 0; | 
|  | 302 | SrcW = SrcZ; | 
|  | 303 | break; | 
|  | 304 | case 9: // 1DArray | 
|  | 305 | SrcZ = SrcY; | 
|  | 306 | CTZ = 0; | 
|  | 307 | break; | 
|  | 308 | case 10: // 2DArray | 
|  | 309 | CTZ = 0; | 
|  | 310 | break; | 
|  | 311 | case 11: // Shadow1DArray | 
|  | 312 | SrcZ = SrcY; | 
|  | 313 | CTZ = 0; | 
|  | 314 | break; | 
|  | 315 | case 12: // Shadow2DArray | 
|  | 316 | CTZ = 0; | 
|  | 317 | break; | 
|  | 318 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 319 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) | 
|  | 320 | .addOperand(MI->getOperand(3)) | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 321 | .addImm(SrcX) | 
|  | 322 | .addImm(SrcY) | 
|  | 323 | .addImm(SrcZ) | 
|  | 324 | .addImm(SrcW) | 
|  | 325 | .addImm(0) | 
|  | 326 | .addImm(0) | 
|  | 327 | .addImm(0) | 
|  | 328 | .addImm(0) | 
|  | 329 | .addImm(1) | 
|  | 330 | .addImm(2) | 
|  | 331 | .addImm(3) | 
|  | 332 | .addOperand(RID) | 
|  | 333 | .addOperand(SID) | 
|  | 334 | .addImm(CTX) | 
|  | 335 | .addImm(CTY) | 
|  | 336 | .addImm(CTZ) | 
|  | 337 | .addImm(CTW); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 338 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) | 
|  | 339 | .addOperand(MI->getOperand(2)) | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 340 | .addImm(SrcX) | 
|  | 341 | .addImm(SrcY) | 
|  | 342 | .addImm(SrcZ) | 
|  | 343 | .addImm(SrcW) | 
|  | 344 | .addImm(0) | 
|  | 345 | .addImm(0) | 
|  | 346 | .addImm(0) | 
|  | 347 | .addImm(0) | 
|  | 348 | .addImm(1) | 
|  | 349 | .addImm(2) | 
|  | 350 | .addImm(3) | 
|  | 351 | .addOperand(RID) | 
|  | 352 | .addOperand(SID) | 
|  | 353 | .addImm(CTX) | 
|  | 354 | .addImm(CTY) | 
|  | 355 | .addImm(CTZ) | 
|  | 356 | .addImm(CTW); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 357 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) | 
|  | 358 | .addOperand(MI->getOperand(0)) | 
|  | 359 | .addOperand(MI->getOperand(1)) | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 360 | .addImm(SrcX) | 
|  | 361 | .addImm(SrcY) | 
|  | 362 | .addImm(SrcZ) | 
|  | 363 | .addImm(SrcW) | 
|  | 364 | .addImm(0) | 
|  | 365 | .addImm(0) | 
|  | 366 | .addImm(0) | 
|  | 367 | .addImm(0) | 
|  | 368 | .addImm(1) | 
|  | 369 | .addImm(2) | 
|  | 370 | .addImm(3) | 
|  | 371 | .addOperand(RID) | 
|  | 372 | .addOperand(SID) | 
|  | 373 | .addImm(CTX) | 
|  | 374 | .addImm(CTY) | 
|  | 375 | .addImm(CTZ) | 
|  | 376 | .addImm(CTW) | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 377 | .addReg(T0, RegState::Implicit) | 
|  | 378 | .addReg(T1, RegState::Implicit); | 
|  | 379 | break; | 
|  | 380 | } | 
|  | 381 |  | 
|  | 382 | case AMDGPU::TXD_SHADOW: { | 
|  | 383 | unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); | 
|  | 384 | unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 385 | MachineOperand &RID = MI->getOperand(4); | 
|  | 386 | MachineOperand &SID = MI->getOperand(5); | 
|  | 387 | unsigned TextureId = MI->getOperand(6).getImm(); | 
|  | 388 | unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; | 
|  | 389 | unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; | 
|  | 390 |  | 
|  | 391 | switch (TextureId) { | 
|  | 392 | case 5: // Rect | 
|  | 393 | CTX = CTY = 0; | 
|  | 394 | break; | 
|  | 395 | case 6: // Shadow1D | 
|  | 396 | SrcW = SrcZ; | 
|  | 397 | break; | 
|  | 398 | case 7: // Shadow2D | 
|  | 399 | SrcW = SrcZ; | 
|  | 400 | break; | 
|  | 401 | case 8: // ShadowRect | 
|  | 402 | CTX = CTY = 0; | 
|  | 403 | SrcW = SrcZ; | 
|  | 404 | break; | 
|  | 405 | case 9: // 1DArray | 
|  | 406 | SrcZ = SrcY; | 
|  | 407 | CTZ = 0; | 
|  | 408 | break; | 
|  | 409 | case 10: // 2DArray | 
|  | 410 | CTZ = 0; | 
|  | 411 | break; | 
|  | 412 | case 11: // Shadow1DArray | 
|  | 413 | SrcZ = SrcY; | 
|  | 414 | CTZ = 0; | 
|  | 415 | break; | 
|  | 416 | case 12: // Shadow2DArray | 
|  | 417 | CTZ = 0; | 
|  | 418 | break; | 
|  | 419 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 420 |  | 
|  | 421 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) | 
|  | 422 | .addOperand(MI->getOperand(3)) | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 423 | .addImm(SrcX) | 
|  | 424 | .addImm(SrcY) | 
|  | 425 | .addImm(SrcZ) | 
|  | 426 | .addImm(SrcW) | 
|  | 427 | .addImm(0) | 
|  | 428 | .addImm(0) | 
|  | 429 | .addImm(0) | 
|  | 430 | .addImm(0) | 
|  | 431 | .addImm(1) | 
|  | 432 | .addImm(2) | 
|  | 433 | .addImm(3) | 
|  | 434 | .addOperand(RID) | 
|  | 435 | .addOperand(SID) | 
|  | 436 | .addImm(CTX) | 
|  | 437 | .addImm(CTY) | 
|  | 438 | .addImm(CTZ) | 
|  | 439 | .addImm(CTW); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 440 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) | 
|  | 441 | .addOperand(MI->getOperand(2)) | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 442 | .addImm(SrcX) | 
|  | 443 | .addImm(SrcY) | 
|  | 444 | .addImm(SrcZ) | 
|  | 445 | .addImm(SrcW) | 
|  | 446 | .addImm(0) | 
|  | 447 | .addImm(0) | 
|  | 448 | .addImm(0) | 
|  | 449 | .addImm(0) | 
|  | 450 | .addImm(1) | 
|  | 451 | .addImm(2) | 
|  | 452 | .addImm(3) | 
|  | 453 | .addOperand(RID) | 
|  | 454 | .addOperand(SID) | 
|  | 455 | .addImm(CTX) | 
|  | 456 | .addImm(CTY) | 
|  | 457 | .addImm(CTZ) | 
|  | 458 | .addImm(CTW); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 459 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) | 
|  | 460 | .addOperand(MI->getOperand(0)) | 
|  | 461 | .addOperand(MI->getOperand(1)) | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 462 | .addImm(SrcX) | 
|  | 463 | .addImm(SrcY) | 
|  | 464 | .addImm(SrcZ) | 
|  | 465 | .addImm(SrcW) | 
|  | 466 | .addImm(0) | 
|  | 467 | .addImm(0) | 
|  | 468 | .addImm(0) | 
|  | 469 | .addImm(0) | 
|  | 470 | .addImm(1) | 
|  | 471 | .addImm(2) | 
|  | 472 | .addImm(3) | 
|  | 473 | .addOperand(RID) | 
|  | 474 | .addOperand(SID) | 
|  | 475 | .addImm(CTX) | 
|  | 476 | .addImm(CTY) | 
|  | 477 | .addImm(CTZ) | 
|  | 478 | .addImm(CTW) | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 479 | .addReg(T0, RegState::Implicit) | 
|  | 480 | .addReg(T1, RegState::Implicit); | 
|  | 481 | break; | 
|  | 482 | } | 
|  | 483 |  | 
|  | 484 | case AMDGPU::BRANCH: | 
|  | 485 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 486 | .addOperand(MI->getOperand(0)); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 487 | break; | 
|  | 488 |  | 
|  | 489 | case AMDGPU::BRANCH_COND_f32: { | 
|  | 490 | MachineInstr *NewMI = | 
|  | 491 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), | 
|  | 492 | AMDGPU::PREDICATE_BIT) | 
|  | 493 | .addOperand(MI->getOperand(1)) | 
|  | 494 | .addImm(OPCODE_IS_NOT_ZERO) | 
|  | 495 | .addImm(0); // Flags | 
|  | 496 | TII->addFlag(NewMI, 0, MO_FLAG_PUSH); | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 497 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 498 | .addOperand(MI->getOperand(0)) | 
|  | 499 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); | 
|  | 500 | break; | 
|  | 501 | } | 
|  | 502 |  | 
|  | 503 | case AMDGPU::BRANCH_COND_i32: { | 
|  | 504 | MachineInstr *NewMI = | 
|  | 505 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), | 
|  | 506 | AMDGPU::PREDICATE_BIT) | 
|  | 507 | .addOperand(MI->getOperand(1)) | 
|  | 508 | .addImm(OPCODE_IS_NOT_ZERO_INT) | 
|  | 509 | .addImm(0); // Flags | 
|  | 510 | TII->addFlag(NewMI, 0, MO_FLAG_PUSH); | 
| Vincent Lejeune | e5ecf10 | 2013-03-11 18:15:06 +0000 | [diff] [blame] | 511 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 512 | .addOperand(MI->getOperand(0)) | 
|  | 513 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); | 
|  | 514 | break; | 
|  | 515 | } | 
|  | 516 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 517 | case AMDGPU::EG_ExportSwz: | 
|  | 518 | case AMDGPU::R600_ExportSwz: { | 
| Tom Stellard | 6f1b865 | 2013-01-23 21:39:49 +0000 | [diff] [blame] | 519 | // Instruction is left unmodified if its not the last one of its type | 
|  | 520 | bool isLastInstructionOfItsType = true; | 
|  | 521 | unsigned InstExportType = MI->getOperand(1).getImm(); | 
| Benjamin Kramer | b6d0bd4 | 2014-03-02 12:27:27 +0000 | [diff] [blame] | 522 | for (MachineBasicBlock::iterator NextExportInst = std::next(I), | 
| Tom Stellard | 6f1b865 | 2013-01-23 21:39:49 +0000 | [diff] [blame] | 523 | EndBlock = BB->end(); NextExportInst != EndBlock; | 
| Benjamin Kramer | b6d0bd4 | 2014-03-02 12:27:27 +0000 | [diff] [blame] | 524 | NextExportInst = std::next(NextExportInst)) { | 
| Tom Stellard | 6f1b865 | 2013-01-23 21:39:49 +0000 | [diff] [blame] | 525 | if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || | 
|  | 526 | NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { | 
|  | 527 | unsigned CurrentInstExportType = NextExportInst->getOperand(1) | 
|  | 528 | .getImm(); | 
|  | 529 | if (CurrentInstExportType == InstExportType) { | 
|  | 530 | isLastInstructionOfItsType = false; | 
|  | 531 | break; | 
|  | 532 | } | 
|  | 533 | } | 
|  | 534 | } | 
| Benjamin Kramer | b6d0bd4 | 2014-03-02 12:27:27 +0000 | [diff] [blame] | 535 | bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; | 
| Tom Stellard | 6f1b865 | 2013-01-23 21:39:49 +0000 | [diff] [blame] | 536 | if (!EOP && !isLastInstructionOfItsType) | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 537 | return BB; | 
|  | 538 | unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; | 
|  | 539 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) | 
|  | 540 | .addOperand(MI->getOperand(0)) | 
|  | 541 | .addOperand(MI->getOperand(1)) | 
|  | 542 | .addOperand(MI->getOperand(2)) | 
|  | 543 | .addOperand(MI->getOperand(3)) | 
|  | 544 | .addOperand(MI->getOperand(4)) | 
|  | 545 | .addOperand(MI->getOperand(5)) | 
|  | 546 | .addOperand(MI->getOperand(6)) | 
|  | 547 | .addImm(CfInst) | 
| Tom Stellard | 6f1b865 | 2013-01-23 21:39:49 +0000 | [diff] [blame] | 548 | .addImm(EOP); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 549 | break; | 
|  | 550 | } | 
| Jakob Stoklund Olesen | fdc3767 | 2013-02-05 17:53:52 +0000 | [diff] [blame] | 551 | case AMDGPU::RETURN: { | 
|  | 552 | // RETURN instructions must have the live-out registers as implicit uses, | 
|  | 553 | // otherwise they appear dead. | 
|  | 554 | R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); | 
|  | 555 | MachineInstrBuilder MIB(*MF, MI); | 
|  | 556 | for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i) | 
|  | 557 | MIB.addReg(MFI->LiveOuts[i], RegState::Implicit); | 
|  | 558 | return BB; | 
|  | 559 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 560 | } | 
|  | 561 |  | 
|  | 562 | MI->eraseFromParent(); | 
|  | 563 | return BB; | 
|  | 564 | } | 
|  | 565 |  | 
|  | 566 | //===----------------------------------------------------------------------===// | 
|  | 567 | // Custom DAG Lowering Operations | 
|  | 568 | //===----------------------------------------------------------------------===// | 
|  | 569 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 570 | SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | 
| Tom Stellard | c026e8b | 2013-06-28 15:47:08 +0000 | [diff] [blame] | 571 | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | 572 | R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 573 | switch (Op.getOpcode()) { | 
|  | 574 | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | 
| Tom Stellard | 880a80a | 2014-06-17 16:53:14 +0000 | [diff] [blame] | 575 | case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); | 
|  | 576 | case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); | 
| Jan Vesely | 25f3627 | 2014-06-18 12:27:13 +0000 | [diff] [blame] | 577 | case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); | 
| Jan Vesely | ecf5133 | 2014-06-18 12:27:17 +0000 | [diff] [blame] | 578 | case ISD::SRA_PARTS: | 
| Jan Vesely | 900ff2e | 2014-06-18 12:27:15 +0000 | [diff] [blame] | 579 | case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); | 
| Vincent Lejeune | b55940c | 2013-07-09 15:03:11 +0000 | [diff] [blame] | 580 | case ISD::FCOS: | 
|  | 581 | case ISD::FSIN: return LowerTrig(Op, DAG); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 582 | case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 583 | case ISD::STORE: return LowerSTORE(Op, DAG); | 
| Matt Arsenault | d2c9e08 | 2014-07-07 18:34:45 +0000 | [diff] [blame] | 584 | case ISD::LOAD: { | 
|  | 585 | SDValue Result = LowerLOAD(Op, DAG); | 
|  | 586 | assert((!Result.getNode() || | 
|  | 587 | Result.getNode()->getNumValues() == 2) && | 
|  | 588 | "Load should return a value and a chain"); | 
|  | 589 | return Result; | 
|  | 590 | } | 
|  | 591 |  | 
| Matt Arsenault | 1d555c4 | 2014-06-23 18:00:55 +0000 | [diff] [blame] | 592 | case ISD::BRCOND: return LowerBRCOND(Op, DAG); | 
| Tom Stellard | c026e8b | 2013-06-28 15:47:08 +0000 | [diff] [blame] | 593 | case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 594 | case ISD::INTRINSIC_VOID: { | 
|  | 595 | SDValue Chain = Op.getOperand(0); | 
|  | 596 | unsigned IntrinsicID = | 
|  | 597 | cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | 
|  | 598 | switch (IntrinsicID) { | 
|  | 599 | case AMDGPUIntrinsic::AMDGPU_store_output: { | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 600 | int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); | 
|  | 601 | unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); | 
| Jakob Stoklund Olesen | fdc3767 | 2013-02-05 17:53:52 +0000 | [diff] [blame] | 602 | MFI->LiveOuts.push_back(Reg); | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 603 | return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2)); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 604 | } | 
| Vincent Lejeune | d80bc15 | 2013-02-14 16:55:06 +0000 | [diff] [blame] | 605 | case AMDGPUIntrinsic::R600_store_swizzle: { | 
|  | 606 | const SDValue Args[8] = { | 
|  | 607 | Chain, | 
|  | 608 | Op.getOperand(2), // Export Value | 
|  | 609 | Op.getOperand(3), // ArrayBase | 
|  | 610 | Op.getOperand(4), // Type | 
|  | 611 | DAG.getConstant(0, MVT::i32), // SWZ_X | 
|  | 612 | DAG.getConstant(1, MVT::i32), // SWZ_Y | 
|  | 613 | DAG.getConstant(2, MVT::i32), // SWZ_Z | 
|  | 614 | DAG.getConstant(3, MVT::i32) // SWZ_W | 
|  | 615 | }; | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 616 | return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 617 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 618 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 619 | // default for switch(IntrinsicID) | 
|  | 620 | default: break; | 
|  | 621 | } | 
|  | 622 | // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) | 
|  | 623 | break; | 
|  | 624 | } | 
|  | 625 | case ISD::INTRINSIC_WO_CHAIN: { | 
|  | 626 | unsigned IntrinsicID = | 
|  | 627 | cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | 
|  | 628 | EVT VT = Op.getValueType(); | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 629 | SDLoc DL(Op); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 630 | switch(IntrinsicID) { | 
|  | 631 | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); | 
| Vincent Lejeune | aee3a10 | 2013-11-12 16:26:47 +0000 | [diff] [blame] | 632 | case AMDGPUIntrinsic::R600_load_input: { | 
|  | 633 | int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | 
|  | 634 | unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); | 
|  | 635 | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | 636 | MachineRegisterInfo &MRI = MF.getRegInfo(); | 
|  | 637 | MRI.addLiveIn(Reg); | 
|  | 638 | return DAG.getCopyFromReg(DAG.getEntryNode(), | 
|  | 639 | SDLoc(DAG.getEntryNode()), Reg, VT); | 
|  | 640 | } | 
|  | 641 |  | 
|  | 642 | case AMDGPUIntrinsic::R600_interp_input: { | 
|  | 643 | int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | 
|  | 644 | int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); | 
|  | 645 | MachineSDNode *interp; | 
|  | 646 | if (ijb < 0) { | 
|  | 647 | const MachineFunction &MF = DAG.getMachineFunction(); | 
| Eric Christopher | d913448 | 2014-08-04 21:25:23 +0000 | [diff] [blame] | 648 | const R600InstrInfo *TII = static_cast<const R600InstrInfo *>( | 
| Eric Christopher | fc6de42 | 2014-08-05 02:39:49 +0000 | [diff] [blame] | 649 | MF.getSubtarget().getInstrInfo()); | 
| Vincent Lejeune | aee3a10 | 2013-11-12 16:26:47 +0000 | [diff] [blame] | 650 | interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, | 
|  | 651 | MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); | 
|  | 652 | return DAG.getTargetExtractSubreg( | 
|  | 653 | TII->getRegisterInfo().getSubRegFromChannel(slot % 4), | 
|  | 654 | DL, MVT::f32, SDValue(interp, 0)); | 
|  | 655 | } | 
|  | 656 | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | 657 | MachineRegisterInfo &MRI = MF.getRegInfo(); | 
|  | 658 | unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb); | 
|  | 659 | unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1); | 
|  | 660 | MRI.addLiveIn(RegisterI); | 
|  | 661 | MRI.addLiveIn(RegisterJ); | 
|  | 662 | SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(), | 
|  | 663 | SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32); | 
|  | 664 | SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(), | 
|  | 665 | SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32); | 
|  | 666 |  | 
|  | 667 | if (slot % 4 < 2) | 
|  | 668 | interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, | 
|  | 669 | MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), | 
|  | 670 | RegisterJNode, RegisterINode); | 
|  | 671 | else | 
|  | 672 | interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, | 
|  | 673 | MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), | 
|  | 674 | RegisterJNode, RegisterINode); | 
|  | 675 | return SDValue(interp, slot % 2); | 
|  | 676 | } | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 677 | case AMDGPUIntrinsic::R600_interp_xy: | 
|  | 678 | case AMDGPUIntrinsic::R600_interp_zw: { | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 679 | int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 680 | MachineSDNode *interp; | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 681 | SDValue RegisterINode = Op.getOperand(2); | 
|  | 682 | SDValue RegisterJNode = Op.getOperand(3); | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 683 |  | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 684 | if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy) | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 685 | interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 686 | MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32), | 
| Vincent Lejeune | a09873d | 2013-06-03 15:44:16 +0000 | [diff] [blame] | 687 | RegisterJNode, RegisterINode); | 
| Tom Stellard | 41afe6a | 2013-02-05 17:09:14 +0000 | [diff] [blame] | 688 | else | 
|  | 689 | interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 690 | MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32), | 
| Vincent Lejeune | a09873d | 2013-06-03 15:44:16 +0000 | [diff] [blame] | 691 | RegisterJNode, RegisterINode); | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 692 | return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, | 
|  | 693 | SDValue(interp, 0), SDValue(interp, 1)); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 694 | } | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 695 | case AMDGPUIntrinsic::R600_tex: | 
|  | 696 | case AMDGPUIntrinsic::R600_texc: | 
|  | 697 | case AMDGPUIntrinsic::R600_txl: | 
|  | 698 | case AMDGPUIntrinsic::R600_txlc: | 
|  | 699 | case AMDGPUIntrinsic::R600_txb: | 
|  | 700 | case AMDGPUIntrinsic::R600_txbc: | 
|  | 701 | case AMDGPUIntrinsic::R600_txf: | 
|  | 702 | case AMDGPUIntrinsic::R600_txq: | 
|  | 703 | case AMDGPUIntrinsic::R600_ddx: | 
| Vincent Lejeune | 6df3943 | 2013-10-02 16:00:33 +0000 | [diff] [blame] | 704 | case AMDGPUIntrinsic::R600_ddy: | 
|  | 705 | case AMDGPUIntrinsic::R600_ldptr: { | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 706 | unsigned TextureOp; | 
|  | 707 | switch (IntrinsicID) { | 
|  | 708 | case AMDGPUIntrinsic::R600_tex: | 
|  | 709 | TextureOp = 0; | 
|  | 710 | break; | 
|  | 711 | case AMDGPUIntrinsic::R600_texc: | 
|  | 712 | TextureOp = 1; | 
|  | 713 | break; | 
|  | 714 | case AMDGPUIntrinsic::R600_txl: | 
|  | 715 | TextureOp = 2; | 
|  | 716 | break; | 
|  | 717 | case AMDGPUIntrinsic::R600_txlc: | 
|  | 718 | TextureOp = 3; | 
|  | 719 | break; | 
|  | 720 | case AMDGPUIntrinsic::R600_txb: | 
|  | 721 | TextureOp = 4; | 
|  | 722 | break; | 
|  | 723 | case AMDGPUIntrinsic::R600_txbc: | 
|  | 724 | TextureOp = 5; | 
|  | 725 | break; | 
|  | 726 | case AMDGPUIntrinsic::R600_txf: | 
|  | 727 | TextureOp = 6; | 
|  | 728 | break; | 
|  | 729 | case AMDGPUIntrinsic::R600_txq: | 
|  | 730 | TextureOp = 7; | 
|  | 731 | break; | 
|  | 732 | case AMDGPUIntrinsic::R600_ddx: | 
|  | 733 | TextureOp = 8; | 
|  | 734 | break; | 
|  | 735 | case AMDGPUIntrinsic::R600_ddy: | 
|  | 736 | TextureOp = 9; | 
|  | 737 | break; | 
| Vincent Lejeune | 6df3943 | 2013-10-02 16:00:33 +0000 | [diff] [blame] | 738 | case AMDGPUIntrinsic::R600_ldptr: | 
|  | 739 | TextureOp = 10; | 
|  | 740 | break; | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 741 | default: | 
|  | 742 | llvm_unreachable("Unknow Texture Operation"); | 
|  | 743 | } | 
|  | 744 |  | 
|  | 745 | SDValue TexArgs[19] = { | 
|  | 746 | DAG.getConstant(TextureOp, MVT::i32), | 
|  | 747 | Op.getOperand(1), | 
|  | 748 | DAG.getConstant(0, MVT::i32), | 
|  | 749 | DAG.getConstant(1, MVT::i32), | 
|  | 750 | DAG.getConstant(2, MVT::i32), | 
|  | 751 | DAG.getConstant(3, MVT::i32), | 
|  | 752 | Op.getOperand(2), | 
|  | 753 | Op.getOperand(3), | 
|  | 754 | Op.getOperand(4), | 
|  | 755 | DAG.getConstant(0, MVT::i32), | 
|  | 756 | DAG.getConstant(1, MVT::i32), | 
|  | 757 | DAG.getConstant(2, MVT::i32), | 
|  | 758 | DAG.getConstant(3, MVT::i32), | 
|  | 759 | Op.getOperand(5), | 
|  | 760 | Op.getOperand(6), | 
|  | 761 | Op.getOperand(7), | 
|  | 762 | Op.getOperand(8), | 
|  | 763 | Op.getOperand(9), | 
|  | 764 | Op.getOperand(10) | 
|  | 765 | }; | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 766 | return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); | 
| Vincent Lejeune | d3eed66 | 2013-05-17 16:50:20 +0000 | [diff] [blame] | 767 | } | 
| Vincent Lejeune | 519f21e | 2013-05-17 16:50:32 +0000 | [diff] [blame] | 768 | case AMDGPUIntrinsic::AMDGPU_dp4: { | 
|  | 769 | SDValue Args[8] = { | 
|  | 770 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), | 
|  | 771 | DAG.getConstant(0, MVT::i32)), | 
|  | 772 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), | 
|  | 773 | DAG.getConstant(0, MVT::i32)), | 
|  | 774 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), | 
|  | 775 | DAG.getConstant(1, MVT::i32)), | 
|  | 776 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), | 
|  | 777 | DAG.getConstant(1, MVT::i32)), | 
|  | 778 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), | 
|  | 779 | DAG.getConstant(2, MVT::i32)), | 
|  | 780 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), | 
|  | 781 | DAG.getConstant(2, MVT::i32)), | 
|  | 782 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), | 
|  | 783 | DAG.getConstant(3, MVT::i32)), | 
|  | 784 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), | 
|  | 785 | DAG.getConstant(3, MVT::i32)) | 
|  | 786 | }; | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 787 | return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); | 
| Vincent Lejeune | 519f21e | 2013-05-17 16:50:32 +0000 | [diff] [blame] | 788 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 789 |  | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 790 | case Intrinsic::r600_read_ngroups_x: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 791 | return LowerImplicitParameter(DAG, VT, DL, 0); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 792 | case Intrinsic::r600_read_ngroups_y: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 793 | return LowerImplicitParameter(DAG, VT, DL, 1); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 794 | case Intrinsic::r600_read_ngroups_z: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 795 | return LowerImplicitParameter(DAG, VT, DL, 2); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 796 | case Intrinsic::r600_read_global_size_x: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 797 | return LowerImplicitParameter(DAG, VT, DL, 3); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 798 | case Intrinsic::r600_read_global_size_y: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 799 | return LowerImplicitParameter(DAG, VT, DL, 4); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 800 | case Intrinsic::r600_read_global_size_z: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 801 | return LowerImplicitParameter(DAG, VT, DL, 5); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 802 | case Intrinsic::r600_read_local_size_x: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 803 | return LowerImplicitParameter(DAG, VT, DL, 6); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 804 | case Intrinsic::r600_read_local_size_y: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 805 | return LowerImplicitParameter(DAG, VT, DL, 7); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 806 | case Intrinsic::r600_read_local_size_z: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 807 | return LowerImplicitParameter(DAG, VT, DL, 8); | 
|  | 808 |  | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 809 | case Intrinsic::r600_read_tgid_x: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 810 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | 
|  | 811 | AMDGPU::T1_X, VT); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 812 | case Intrinsic::r600_read_tgid_y: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 813 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | 
|  | 814 | AMDGPU::T1_Y, VT); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 815 | case Intrinsic::r600_read_tgid_z: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 816 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | 
|  | 817 | AMDGPU::T1_Z, VT); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 818 | case Intrinsic::r600_read_tidig_x: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 819 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | 
|  | 820 | AMDGPU::T0_X, VT); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 821 | case Intrinsic::r600_read_tidig_y: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 822 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | 
|  | 823 | AMDGPU::T0_Y, VT); | 
| NAKAMURA Takumi | 4f328e1 | 2013-05-22 06:37:31 +0000 | [diff] [blame] | 824 | case Intrinsic::r600_read_tidig_z: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 825 | return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, | 
|  | 826 | AMDGPU::T0_Z, VT); | 
| Matt Arsenault | 257d48d | 2014-06-24 22:13:39 +0000 | [diff] [blame] | 827 | case Intrinsic::AMDGPU_rsq: | 
|  | 828 | // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior. | 
|  | 829 | return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 830 | } | 
|  | 831 | // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) | 
|  | 832 | break; | 
|  | 833 | } | 
|  | 834 | } // end switch(Op.getOpcode()) | 
|  | 835 | return SDValue(); | 
|  | 836 | } | 
|  | 837 |  | 
|  | 838 | void R600TargetLowering::ReplaceNodeResults(SDNode *N, | 
|  | 839 | SmallVectorImpl<SDValue> &Results, | 
|  | 840 | SelectionDAG &DAG) const { | 
|  | 841 | switch (N->getOpcode()) { | 
| Matt Arsenault | d125d74 | 2014-03-27 17:23:24 +0000 | [diff] [blame] | 842 | default: | 
|  | 843 | AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); | 
|  | 844 | return; | 
| Jan Vesely | 2cb62ce | 2014-07-10 22:40:21 +0000 | [diff] [blame] | 845 | case ISD::FP_TO_UINT: | 
|  | 846 | if (N->getValueType(0) == MVT::i1) { | 
|  | 847 | Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); | 
|  | 848 | return; | 
|  | 849 | } | 
|  | 850 | // Fall-through. Since we don't care about out of bounds values | 
|  | 851 | // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint | 
|  | 852 | // considers some extra cases which are not necessary here. | 
|  | 853 | case ISD::FP_TO_SINT: { | 
|  | 854 | SDValue Result; | 
|  | 855 | if (expandFP_TO_SINT(N, Result, DAG)) | 
|  | 856 | Results.push_back(Result); | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 857 | return; | 
| Jan Vesely | 2cb62ce | 2014-07-10 22:40:21 +0000 | [diff] [blame] | 858 | } | 
| Jan Vesely | 343cd6f0 | 2014-06-22 21:43:01 +0000 | [diff] [blame] | 859 | case ISD::UDIV: { | 
|  | 860 | SDValue Op = SDValue(N, 0); | 
|  | 861 | SDLoc DL(Op); | 
|  | 862 | EVT VT = Op.getValueType(); | 
|  | 863 | SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), | 
|  | 864 | N->getOperand(0), N->getOperand(1)); | 
|  | 865 | Results.push_back(UDIVREM); | 
|  | 866 | break; | 
|  | 867 | } | 
|  | 868 | case ISD::UREM: { | 
|  | 869 | SDValue Op = SDValue(N, 0); | 
|  | 870 | SDLoc DL(Op); | 
|  | 871 | EVT VT = Op.getValueType(); | 
|  | 872 | SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), | 
|  | 873 | N->getOperand(0), N->getOperand(1)); | 
|  | 874 | Results.push_back(UDIVREM.getValue(1)); | 
|  | 875 | break; | 
|  | 876 | } | 
|  | 877 | case ISD::SDIV: { | 
|  | 878 | SDValue Op = SDValue(N, 0); | 
|  | 879 | SDLoc DL(Op); | 
|  | 880 | EVT VT = Op.getValueType(); | 
|  | 881 | SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT), | 
|  | 882 | N->getOperand(0), N->getOperand(1)); | 
|  | 883 | Results.push_back(SDIVREM); | 
|  | 884 | break; | 
|  | 885 | } | 
|  | 886 | case ISD::SREM: { | 
|  | 887 | SDValue Op = SDValue(N, 0); | 
|  | 888 | SDLoc DL(Op); | 
|  | 889 | EVT VT = Op.getValueType(); | 
|  | 890 | SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT), | 
|  | 891 | N->getOperand(0), N->getOperand(1)); | 
|  | 892 | Results.push_back(SDIVREM.getValue(1)); | 
|  | 893 | break; | 
|  | 894 | } | 
|  | 895 | case ISD::SDIVREM: { | 
|  | 896 | SDValue Op = SDValue(N, 1); | 
|  | 897 | SDValue RES = LowerSDIVREM(Op, DAG); | 
|  | 898 | Results.push_back(RES); | 
|  | 899 | Results.push_back(RES.getValue(1)); | 
|  | 900 | break; | 
|  | 901 | } | 
|  | 902 | case ISD::UDIVREM: { | 
|  | 903 | SDValue Op = SDValue(N, 0); | 
|  | 904 | SDLoc DL(Op); | 
|  | 905 | EVT VT = Op.getValueType(); | 
|  | 906 | EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); | 
|  | 907 |  | 
|  | 908 | SDValue one = DAG.getConstant(1, HalfVT); | 
|  | 909 | SDValue zero = DAG.getConstant(0, HalfVT); | 
|  | 910 |  | 
|  | 911 | //HiLo split | 
|  | 912 | SDValue LHS = N->getOperand(0); | 
|  | 913 | SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero); | 
|  | 914 | SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one); | 
|  | 915 |  | 
|  | 916 | SDValue RHS = N->getOperand(1); | 
|  | 917 | SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); | 
|  | 918 | SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); | 
|  | 919 |  | 
|  | 920 | // Get Speculative values | 
|  | 921 | SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); | 
|  | 922 | SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); | 
|  | 923 |  | 
|  | 924 | SDValue REM_Hi = zero; | 
|  | 925 | SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ); | 
|  | 926 |  | 
|  | 927 | SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ); | 
|  | 928 | SDValue DIV_Lo = zero; | 
|  | 929 |  | 
|  | 930 | const unsigned halfBitWidth = HalfVT.getSizeInBits(); | 
|  | 931 |  | 
|  | 932 | for (unsigned i = 0; i < halfBitWidth; ++i) { | 
|  | 933 | SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT); | 
|  | 934 | // Get Value of high bit | 
|  | 935 | SDValue HBit; | 
|  | 936 | if (halfBitWidth == 32 && Subtarget->hasBFE()) { | 
|  | 937 | HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); | 
|  | 938 | } else { | 
|  | 939 | HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); | 
|  | 940 | HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); | 
|  | 941 | } | 
|  | 942 |  | 
|  | 943 | SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo, | 
|  | 944 | DAG.getConstant(halfBitWidth - 1, HalfVT)); | 
|  | 945 | REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one); | 
|  | 946 | REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry); | 
|  | 947 |  | 
|  | 948 | REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one); | 
|  | 949 | REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit); | 
|  | 950 |  | 
|  | 951 |  | 
|  | 952 | SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); | 
|  | 953 |  | 
|  | 954 | SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT); | 
|  | 955 | SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE); | 
|  | 956 |  | 
|  | 957 | DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); | 
|  | 958 |  | 
|  | 959 | // Update REM | 
|  | 960 |  | 
|  | 961 | SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); | 
|  | 962 |  | 
|  | 963 | REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE); | 
|  | 964 | REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero); | 
|  | 965 | REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one); | 
|  | 966 | } | 
|  | 967 |  | 
|  | 968 | SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); | 
|  | 969 | SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi); | 
|  | 970 | Results.push_back(DIV); | 
|  | 971 | Results.push_back(REM); | 
|  | 972 | break; | 
|  | 973 | } | 
|  | 974 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 975 | } | 
|  | 976 |  | 
| Tom Stellard | 880a80a | 2014-06-17 16:53:14 +0000 | [diff] [blame] | 977 | SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, | 
|  | 978 | SDValue Vector) const { | 
|  | 979 |  | 
|  | 980 | SDLoc DL(Vector); | 
|  | 981 | EVT VecVT = Vector.getValueType(); | 
|  | 982 | EVT EltVT = VecVT.getVectorElementType(); | 
|  | 983 | SmallVector<SDValue, 8> Args; | 
|  | 984 |  | 
|  | 985 | for (unsigned i = 0, e = VecVT.getVectorNumElements(); | 
|  | 986 | i != e; ++i) { | 
|  | 987 | Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, | 
|  | 988 | Vector, DAG.getConstant(i, getVectorIdxTy()))); | 
|  | 989 | } | 
|  | 990 |  | 
|  | 991 | return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); | 
|  | 992 | } | 
|  | 993 |  | 
|  | 994 | SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, | 
|  | 995 | SelectionDAG &DAG) const { | 
|  | 996 |  | 
|  | 997 | SDLoc DL(Op); | 
|  | 998 | SDValue Vector = Op.getOperand(0); | 
|  | 999 | SDValue Index = Op.getOperand(1); | 
|  | 1000 |  | 
|  | 1001 | if (isa<ConstantSDNode>(Index) || | 
|  | 1002 | Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) | 
|  | 1003 | return Op; | 
|  | 1004 |  | 
|  | 1005 | Vector = vectorToVerticalVector(DAG, Vector); | 
|  | 1006 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), | 
|  | 1007 | Vector, Index); | 
|  | 1008 | } | 
|  | 1009 |  | 
|  | 1010 | SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, | 
|  | 1011 | SelectionDAG &DAG) const { | 
|  | 1012 | SDLoc DL(Op); | 
|  | 1013 | SDValue Vector = Op.getOperand(0); | 
|  | 1014 | SDValue Value = Op.getOperand(1); | 
|  | 1015 | SDValue Index = Op.getOperand(2); | 
|  | 1016 |  | 
|  | 1017 | if (isa<ConstantSDNode>(Index) || | 
|  | 1018 | Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) | 
|  | 1019 | return Op; | 
|  | 1020 |  | 
|  | 1021 | Vector = vectorToVerticalVector(DAG, Vector); | 
|  | 1022 | SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), | 
|  | 1023 | Vector, Value, Index); | 
|  | 1024 | return vectorToVerticalVector(DAG, Insert); | 
|  | 1025 | } | 
|  | 1026 |  | 
| Vincent Lejeune | b55940c | 2013-07-09 15:03:11 +0000 | [diff] [blame] | 1027 | SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { | 
|  | 1028 | // On hw >= R700, COS/SIN input must be between -1. and 1. | 
|  | 1029 | // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) | 
|  | 1030 | EVT VT = Op.getValueType(); | 
|  | 1031 | SDValue Arg = Op.getOperand(0); | 
|  | 1032 | SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT, | 
|  | 1033 | DAG.getNode(ISD::FADD, SDLoc(Op), VT, | 
|  | 1034 | DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg, | 
|  | 1035 | DAG.getConstantFP(0.15915494309, MVT::f32)), | 
|  | 1036 | DAG.getConstantFP(0.5, MVT::f32))); | 
|  | 1037 | unsigned TrigNode; | 
|  | 1038 | switch (Op.getOpcode()) { | 
|  | 1039 | case ISD::FCOS: | 
|  | 1040 | TrigNode = AMDGPUISD::COS_HW; | 
|  | 1041 | break; | 
|  | 1042 | case ISD::FSIN: | 
|  | 1043 | TrigNode = AMDGPUISD::SIN_HW; | 
|  | 1044 | break; | 
|  | 1045 | default: | 
|  | 1046 | llvm_unreachable("Wrong trig opcode"); | 
|  | 1047 | } | 
|  | 1048 | SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT, | 
|  | 1049 | DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart, | 
|  | 1050 | DAG.getConstantFP(-0.5, MVT::f32))); | 
|  | 1051 | if (Gen >= AMDGPUSubtarget::R700) | 
|  | 1052 | return TrigVal; | 
|  | 1053 | // On R600 hw, COS/SIN input must be between -Pi and Pi. | 
|  | 1054 | return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal, | 
|  | 1055 | DAG.getConstantFP(3.14159265359, MVT::f32)); | 
|  | 1056 | } | 
|  | 1057 |  | 
| Jan Vesely | 25f3627 | 2014-06-18 12:27:13 +0000 | [diff] [blame] | 1058 | SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { | 
|  | 1059 | SDLoc DL(Op); | 
|  | 1060 | EVT VT = Op.getValueType(); | 
|  | 1061 |  | 
|  | 1062 | SDValue Lo = Op.getOperand(0); | 
|  | 1063 | SDValue Hi = Op.getOperand(1); | 
|  | 1064 | SDValue Shift = Op.getOperand(2); | 
|  | 1065 | SDValue Zero = DAG.getConstant(0, VT); | 
|  | 1066 | SDValue One  = DAG.getConstant(1, VT); | 
|  | 1067 |  | 
|  | 1068 | SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT); | 
|  | 1069 | SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT); | 
|  | 1070 | SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); | 
|  | 1071 | SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); | 
|  | 1072 |  | 
|  | 1073 | // The dance around Width1 is necessary for 0 special case. | 
|  | 1074 | // Without it the CompShift might be 32, producing incorrect results in | 
|  | 1075 | // Overflow. So we do the shift in two steps, the alternative is to | 
|  | 1076 | // add a conditional to filter the special case. | 
|  | 1077 |  | 
|  | 1078 | SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); | 
|  | 1079 | Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); | 
|  | 1080 |  | 
|  | 1081 | SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); | 
|  | 1082 | HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); | 
|  | 1083 | SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); | 
|  | 1084 |  | 
|  | 1085 | SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); | 
|  | 1086 | SDValue LoBig = Zero; | 
|  | 1087 |  | 
|  | 1088 | Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); | 
|  | 1089 | Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); | 
|  | 1090 |  | 
|  | 1091 | return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); | 
|  | 1092 | } | 
|  | 1093 |  | 
| Jan Vesely | 900ff2e | 2014-06-18 12:27:15 +0000 | [diff] [blame] | 1094 | SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { | 
|  | 1095 | SDLoc DL(Op); | 
|  | 1096 | EVT VT = Op.getValueType(); | 
|  | 1097 |  | 
|  | 1098 | SDValue Lo = Op.getOperand(0); | 
|  | 1099 | SDValue Hi = Op.getOperand(1); | 
|  | 1100 | SDValue Shift = Op.getOperand(2); | 
|  | 1101 | SDValue Zero = DAG.getConstant(0, VT); | 
|  | 1102 | SDValue One  = DAG.getConstant(1, VT); | 
|  | 1103 |  | 
| Jan Vesely | ecf5133 | 2014-06-18 12:27:17 +0000 | [diff] [blame] | 1104 | const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; | 
|  | 1105 |  | 
| Jan Vesely | 900ff2e | 2014-06-18 12:27:15 +0000 | [diff] [blame] | 1106 | SDValue Width  = DAG.getConstant(VT.getSizeInBits(), VT); | 
|  | 1107 | SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT); | 
|  | 1108 | SDValue BigShift  = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); | 
|  | 1109 | SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); | 
|  | 1110 |  | 
|  | 1111 | // The dance around Width1 is necessary for 0 special case. | 
|  | 1112 | // Without it the CompShift might be 32, producing incorrect results in | 
|  | 1113 | // Overflow. So we do the shift in two steps, the alternative is to | 
|  | 1114 | // add a conditional to filter the special case. | 
|  | 1115 |  | 
|  | 1116 | SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); | 
|  | 1117 | Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); | 
|  | 1118 |  | 
| Jan Vesely | ecf5133 | 2014-06-18 12:27:17 +0000 | [diff] [blame] | 1119 | SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift); | 
| Jan Vesely | 900ff2e | 2014-06-18 12:27:15 +0000 | [diff] [blame] | 1120 | SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); | 
|  | 1121 | LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); | 
|  | 1122 |  | 
| Jan Vesely | ecf5133 | 2014-06-18 12:27:17 +0000 | [diff] [blame] | 1123 | SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift); | 
|  | 1124 | SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero; | 
| Jan Vesely | 900ff2e | 2014-06-18 12:27:15 +0000 | [diff] [blame] | 1125 |  | 
|  | 1126 | Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); | 
|  | 1127 | Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); | 
|  | 1128 |  | 
|  | 1129 | return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); | 
|  | 1130 | } | 
|  | 1131 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1132 | SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { | 
|  | 1133 | return DAG.getNode( | 
|  | 1134 | ISD::SETCC, | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1135 | SDLoc(Op), | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1136 | MVT::i1, | 
|  | 1137 | Op, DAG.getConstantFP(0.0f, MVT::f32), | 
|  | 1138 | DAG.getCondCode(ISD::SETNE) | 
|  | 1139 | ); | 
|  | 1140 | } | 
|  | 1141 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1142 | SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1143 | SDLoc DL, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1144 | unsigned DwordOffset) const { | 
|  | 1145 | unsigned ByteOffset = DwordOffset * 4; | 
|  | 1146 | PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), | 
| Tom Stellard | 1e80309 | 2013-07-23 01:48:18 +0000 | [diff] [blame] | 1147 | AMDGPUAS::CONSTANT_BUFFER_0); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1148 |  | 
|  | 1149 | // We shouldn't be using an offset wider than 16-bits for implicit parameters. | 
|  | 1150 | assert(isInt<16>(ByteOffset)); | 
|  | 1151 |  | 
|  | 1152 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), | 
|  | 1153 | DAG.getConstant(ByteOffset, MVT::i32), // PTR | 
|  | 1154 | MachinePointerInfo(ConstantPointerNull::get(PtrType)), | 
|  | 1155 | false, false, false, 0); | 
|  | 1156 | } | 
|  | 1157 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1158 | bool R600TargetLowering::isZero(SDValue Op) const { | 
|  | 1159 | if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { | 
|  | 1160 | return Cst->isNullValue(); | 
|  | 1161 | } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ | 
|  | 1162 | return CstFP->isZero(); | 
|  | 1163 | } else { | 
|  | 1164 | return false; | 
|  | 1165 | } | 
|  | 1166 | } | 
|  | 1167 |  | 
|  | 1168 | SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1169 | SDLoc DL(Op); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1170 | EVT VT = Op.getValueType(); | 
|  | 1171 |  | 
|  | 1172 | SDValue LHS = Op.getOperand(0); | 
|  | 1173 | SDValue RHS = Op.getOperand(1); | 
|  | 1174 | SDValue True = Op.getOperand(2); | 
|  | 1175 | SDValue False = Op.getOperand(3); | 
|  | 1176 | SDValue CC = Op.getOperand(4); | 
|  | 1177 | SDValue Temp; | 
|  | 1178 |  | 
|  | 1179 | // LHS and RHS are guaranteed to be the same value type | 
|  | 1180 | EVT CompareVT = LHS.getValueType(); | 
|  | 1181 |  | 
|  | 1182 | // Check if we can lower this to a native operation. | 
|  | 1183 |  | 
| Tom Stellard | 2add82d | 2013-03-08 15:37:09 +0000 | [diff] [blame] | 1184 | // Try to lower to a SET* instruction: | 
|  | 1185 | // | 
|  | 1186 | // SET* can match the following patterns: | 
|  | 1187 | // | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 1188 | // select_cc f32, f32, -1,  0, cc_supported | 
|  | 1189 | // select_cc f32, f32, 1.0f, 0.0f, cc_supported | 
|  | 1190 | // select_cc i32, i32, -1,  0, cc_supported | 
| Tom Stellard | 2add82d | 2013-03-08 15:37:09 +0000 | [diff] [blame] | 1191 | // | 
|  | 1192 |  | 
|  | 1193 | // Move hardware True/False values to the correct operand. | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 1194 | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); | 
|  | 1195 | ISD::CondCode InverseCC = | 
|  | 1196 | ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); | 
| Tom Stellard | 5694d30 | 2013-09-28 02:50:43 +0000 | [diff] [blame] | 1197 | if (isHWTrueValue(False) && isHWFalseValue(True)) { | 
|  | 1198 | if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) { | 
|  | 1199 | std::swap(False, True); | 
|  | 1200 | CC = DAG.getCondCode(InverseCC); | 
|  | 1201 | } else { | 
|  | 1202 | ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC); | 
|  | 1203 | if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) { | 
|  | 1204 | std::swap(False, True); | 
|  | 1205 | std::swap(LHS, RHS); | 
|  | 1206 | CC = DAG.getCondCode(SwapInvCC); | 
|  | 1207 | } | 
|  | 1208 | } | 
| Tom Stellard | 2add82d | 2013-03-08 15:37:09 +0000 | [diff] [blame] | 1209 | } | 
|  | 1210 |  | 
|  | 1211 | if (isHWTrueValue(True) && isHWFalseValue(False) && | 
|  | 1212 | (CompareVT == VT || VT == MVT::i32)) { | 
|  | 1213 | // This can be matched by a SET* instruction. | 
|  | 1214 | return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); | 
|  | 1215 | } | 
|  | 1216 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1217 | // Try to lower to a CND* instruction: | 
| Tom Stellard | 2add82d | 2013-03-08 15:37:09 +0000 | [diff] [blame] | 1218 | // | 
|  | 1219 | // CND* can match the following patterns: | 
|  | 1220 | // | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 1221 | // select_cc f32, 0.0, f32, f32, cc_supported | 
|  | 1222 | // select_cc f32, 0.0, i32, i32, cc_supported | 
|  | 1223 | // select_cc i32, 0,   f32, f32, cc_supported | 
|  | 1224 | // select_cc i32, 0,   i32, i32, cc_supported | 
| Tom Stellard | 2add82d | 2013-03-08 15:37:09 +0000 | [diff] [blame] | 1225 | // | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 1226 |  | 
|  | 1227 | // Try to move the zero value to the RHS | 
|  | 1228 | if (isZero(LHS)) { | 
|  | 1229 | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); | 
|  | 1230 | // Try swapping the operands | 
|  | 1231 | ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode); | 
|  | 1232 | if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { | 
|  | 1233 | std::swap(LHS, RHS); | 
|  | 1234 | CC = DAG.getCondCode(CCSwapped); | 
|  | 1235 | } else { | 
|  | 1236 | // Try inverting the conditon and then swapping the operands | 
|  | 1237 | ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger()); | 
|  | 1238 | CCSwapped = ISD::getSetCCSwappedOperands(CCInv); | 
|  | 1239 | if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { | 
|  | 1240 | std::swap(True, False); | 
|  | 1241 | std::swap(LHS, RHS); | 
|  | 1242 | CC = DAG.getCondCode(CCSwapped); | 
|  | 1243 | } | 
|  | 1244 | } | 
|  | 1245 | } | 
|  | 1246 | if (isZero(RHS)) { | 
|  | 1247 | SDValue Cond = LHS; | 
|  | 1248 | SDValue Zero = RHS; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1249 | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); | 
|  | 1250 | if (CompareVT != VT) { | 
|  | 1251 | // Bitcast True / False to the correct types.  This will end up being | 
|  | 1252 | // a nop, but it allows us to define only a single pattern in the | 
|  | 1253 | // .TD files for each CND* instruction rather than having to have | 
|  | 1254 | // one pattern for integer True/False and one for fp True/False | 
|  | 1255 | True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); | 
|  | 1256 | False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); | 
|  | 1257 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1258 |  | 
|  | 1259 | switch (CCOpcode) { | 
|  | 1260 | case ISD::SETONE: | 
|  | 1261 | case ISD::SETUNE: | 
|  | 1262 | case ISD::SETNE: | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1263 | CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); | 
|  | 1264 | Temp = True; | 
|  | 1265 | True = False; | 
|  | 1266 | False = Temp; | 
|  | 1267 | break; | 
|  | 1268 | default: | 
|  | 1269 | break; | 
|  | 1270 | } | 
|  | 1271 | SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, | 
|  | 1272 | Cond, Zero, | 
|  | 1273 | True, False, | 
|  | 1274 | DAG.getCondCode(CCOpcode)); | 
|  | 1275 | return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); | 
|  | 1276 | } | 
|  | 1277 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1278 | // If we make it this for it means we have no native instructions to handle | 
|  | 1279 | // this SELECT_CC, so we must lower it. | 
|  | 1280 | SDValue HWTrue, HWFalse; | 
|  | 1281 |  | 
|  | 1282 | if (CompareVT == MVT::f32) { | 
|  | 1283 | HWTrue = DAG.getConstantFP(1.0f, CompareVT); | 
|  | 1284 | HWFalse = DAG.getConstantFP(0.0f, CompareVT); | 
|  | 1285 | } else if (CompareVT == MVT::i32) { | 
|  | 1286 | HWTrue = DAG.getConstant(-1, CompareVT); | 
|  | 1287 | HWFalse = DAG.getConstant(0, CompareVT); | 
|  | 1288 | } | 
|  | 1289 | else { | 
| Matt Arsenault | eaa3a7e | 2013-12-10 21:37:42 +0000 | [diff] [blame] | 1290 | llvm_unreachable("Unhandled value type in LowerSELECT_CC"); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1291 | } | 
|  | 1292 |  | 
|  | 1293 | // Lower this unsupported SELECT_CC into a combination of two supported | 
|  | 1294 | // SELECT_CC operations. | 
|  | 1295 | SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); | 
|  | 1296 |  | 
|  | 1297 | return DAG.getNode(ISD::SELECT_CC, DL, VT, | 
|  | 1298 | Cond, HWFalse, | 
|  | 1299 | True, False, | 
|  | 1300 | DAG.getCondCode(ISD::SETNE)); | 
|  | 1301 | } | 
|  | 1302 |  | 
| Alp Toker | cb40291 | 2014-01-24 17:20:08 +0000 | [diff] [blame] | 1303 | /// LLVM generates byte-addressed pointers.  For indirect addressing, we need to | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1304 | /// convert these pointers to a register index.  Each register holds | 
|  | 1305 | /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the | 
|  | 1306 | /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used | 
|  | 1307 | /// for indirect addressing. | 
|  | 1308 | SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, | 
|  | 1309 | unsigned StackWidth, | 
|  | 1310 | SelectionDAG &DAG) const { | 
|  | 1311 | unsigned SRLPad; | 
|  | 1312 | switch(StackWidth) { | 
|  | 1313 | case 1: | 
|  | 1314 | SRLPad = 2; | 
|  | 1315 | break; | 
|  | 1316 | case 2: | 
|  | 1317 | SRLPad = 3; | 
|  | 1318 | break; | 
|  | 1319 | case 4: | 
|  | 1320 | SRLPad = 4; | 
|  | 1321 | break; | 
|  | 1322 | default: llvm_unreachable("Invalid stack width"); | 
|  | 1323 | } | 
|  | 1324 |  | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1325 | return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr, | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1326 | DAG.getConstant(SRLPad, MVT::i32)); | 
|  | 1327 | } | 
|  | 1328 |  | 
|  | 1329 | void R600TargetLowering::getStackAddress(unsigned StackWidth, | 
|  | 1330 | unsigned ElemIdx, | 
|  | 1331 | unsigned &Channel, | 
|  | 1332 | unsigned &PtrIncr) const { | 
|  | 1333 | switch (StackWidth) { | 
|  | 1334 | default: | 
|  | 1335 | case 1: | 
|  | 1336 | Channel = 0; | 
|  | 1337 | if (ElemIdx > 0) { | 
|  | 1338 | PtrIncr = 1; | 
|  | 1339 | } else { | 
|  | 1340 | PtrIncr = 0; | 
|  | 1341 | } | 
|  | 1342 | break; | 
|  | 1343 | case 2: | 
|  | 1344 | Channel = ElemIdx % 2; | 
|  | 1345 | if (ElemIdx == 2) { | 
|  | 1346 | PtrIncr = 1; | 
|  | 1347 | } else { | 
|  | 1348 | PtrIncr = 0; | 
|  | 1349 | } | 
|  | 1350 | break; | 
|  | 1351 | case 4: | 
|  | 1352 | Channel = ElemIdx; | 
|  | 1353 | PtrIncr = 0; | 
|  | 1354 | break; | 
|  | 1355 | } | 
|  | 1356 | } | 
|  | 1357 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1358 | SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1359 | SDLoc DL(Op); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1360 | StoreSDNode *StoreNode = cast<StoreSDNode>(Op); | 
|  | 1361 | SDValue Chain = Op.getOperand(0); | 
|  | 1362 | SDValue Value = Op.getOperand(1); | 
|  | 1363 | SDValue Ptr = Op.getOperand(2); | 
|  | 1364 |  | 
| Tom Stellard | 2ffc330 | 2013-08-26 15:05:44 +0000 | [diff] [blame] | 1365 | SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG); | 
| Tom Stellard | fbab827 | 2013-08-16 01:12:11 +0000 | [diff] [blame] | 1366 | if (Result.getNode()) { | 
|  | 1367 | return Result; | 
|  | 1368 | } | 
|  | 1369 |  | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 1370 | if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) { | 
|  | 1371 | if (StoreNode->isTruncatingStore()) { | 
|  | 1372 | EVT VT = Value.getValueType(); | 
| Tom Stellard | fbab827 | 2013-08-16 01:12:11 +0000 | [diff] [blame] | 1373 | assert(VT.bitsLE(MVT::i32)); | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 1374 | EVT MemVT = StoreNode->getMemoryVT(); | 
|  | 1375 | SDValue MaskConstant; | 
|  | 1376 | if (MemVT == MVT::i8) { | 
|  | 1377 | MaskConstant = DAG.getConstant(0xFF, MVT::i32); | 
|  | 1378 | } else { | 
|  | 1379 | assert(MemVT == MVT::i16); | 
|  | 1380 | MaskConstant = DAG.getConstant(0xFFFF, MVT::i32); | 
|  | 1381 | } | 
|  | 1382 | SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr, | 
|  | 1383 | DAG.getConstant(2, MVT::i32)); | 
|  | 1384 | SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr, | 
|  | 1385 | DAG.getConstant(0x00000003, VT)); | 
|  | 1386 | SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); | 
|  | 1387 | SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, | 
|  | 1388 | DAG.getConstant(3, VT)); | 
|  | 1389 | SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift); | 
|  | 1390 | SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift); | 
|  | 1391 | // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 | 
|  | 1392 | // vector instead. | 
|  | 1393 | SDValue Src[4] = { | 
|  | 1394 | ShiftedValue, | 
|  | 1395 | DAG.getConstant(0, MVT::i32), | 
|  | 1396 | DAG.getConstant(0, MVT::i32), | 
|  | 1397 | Mask | 
|  | 1398 | }; | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1399 | SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src); | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 1400 | SDValue Args[3] = { Chain, Input, DWordAddr }; | 
|  | 1401 | return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, | 
| Craig Topper | 206fcd4 | 2014-04-26 19:29:41 +0000 | [diff] [blame] | 1402 | Op->getVTList(), Args, MemVT, | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 1403 | StoreNode->getMemOperand()); | 
|  | 1404 | } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && | 
|  | 1405 | Value.getValueType().bitsGE(MVT::i32)) { | 
|  | 1406 | // Convert pointer from byte address to dword address. | 
|  | 1407 | Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), | 
|  | 1408 | DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), | 
|  | 1409 | Ptr, DAG.getConstant(2, MVT::i32))); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1410 |  | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 1411 | if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { | 
| Matt Arsenault | eaa3a7e | 2013-12-10 21:37:42 +0000 | [diff] [blame] | 1412 | llvm_unreachable("Truncated and indexed stores not supported yet"); | 
| Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 1413 | } else { | 
|  | 1414 | Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); | 
|  | 1415 | } | 
|  | 1416 | return Chain; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1417 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1418 | } | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1419 |  | 
|  | 1420 | EVT ValueVT = Value.getValueType(); | 
|  | 1421 |  | 
|  | 1422 | if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { | 
|  | 1423 | return SDValue(); | 
|  | 1424 | } | 
|  | 1425 |  | 
| Tom Stellard | e937360 | 2014-01-22 19:24:14 +0000 | [diff] [blame] | 1426 | SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); | 
|  | 1427 | if (Ret.getNode()) { | 
|  | 1428 | return Ret; | 
|  | 1429 | } | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1430 | // Lowering for indirect addressing | 
|  | 1431 |  | 
|  | 1432 | const MachineFunction &MF = DAG.getMachineFunction(); | 
| Eric Christopher | d913448 | 2014-08-04 21:25:23 +0000 | [diff] [blame] | 1433 | const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>( | 
|  | 1434 | getTargetMachine().getSubtargetImpl()->getFrameLowering()); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1435 | unsigned StackWidth = TFL->getStackWidth(MF); | 
|  | 1436 |  | 
|  | 1437 | Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); | 
|  | 1438 |  | 
|  | 1439 | if (ValueVT.isVector()) { | 
|  | 1440 | unsigned NumElemVT = ValueVT.getVectorNumElements(); | 
|  | 1441 | EVT ElemVT = ValueVT.getVectorElementType(); | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1442 | SmallVector<SDValue, 4> Stores(NumElemVT); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1443 |  | 
|  | 1444 | assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " | 
|  | 1445 | "vector width in load"); | 
|  | 1446 |  | 
|  | 1447 | for (unsigned i = 0; i < NumElemVT; ++i) { | 
|  | 1448 | unsigned Channel, PtrIncr; | 
|  | 1449 | getStackAddress(StackWidth, i, Channel, PtrIncr); | 
|  | 1450 | Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, | 
|  | 1451 | DAG.getConstant(PtrIncr, MVT::i32)); | 
|  | 1452 | SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, | 
|  | 1453 | Value, DAG.getConstant(i, MVT::i32)); | 
|  | 1454 |  | 
|  | 1455 | Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, | 
|  | 1456 | Chain, Elem, Ptr, | 
|  | 1457 | DAG.getTargetConstant(Channel, MVT::i32)); | 
|  | 1458 | } | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1459 | Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1460 | } else { | 
|  | 1461 | if (ValueVT == MVT::i8) { | 
|  | 1462 | Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); | 
|  | 1463 | } | 
|  | 1464 | Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, | 
| NAKAMURA Takumi | 18ca09c | 2013-05-22 06:37:25 +0000 | [diff] [blame] | 1465 | DAG.getTargetConstant(0, MVT::i32)); // Channel | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1466 | } | 
|  | 1467 |  | 
|  | 1468 | return Chain; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1469 | } | 
|  | 1470 |  | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1471 | // return (512 + (kc_bank << 12) | 
|  | 1472 | static int | 
|  | 1473 | ConstantAddressBlock(unsigned AddressSpace) { | 
|  | 1474 | switch (AddressSpace) { | 
|  | 1475 | case AMDGPUAS::CONSTANT_BUFFER_0: | 
|  | 1476 | return 512; | 
|  | 1477 | case AMDGPUAS::CONSTANT_BUFFER_1: | 
|  | 1478 | return 512 + 4096; | 
|  | 1479 | case AMDGPUAS::CONSTANT_BUFFER_2: | 
|  | 1480 | return 512 + 4096 * 2; | 
|  | 1481 | case AMDGPUAS::CONSTANT_BUFFER_3: | 
|  | 1482 | return 512 + 4096 * 3; | 
|  | 1483 | case AMDGPUAS::CONSTANT_BUFFER_4: | 
|  | 1484 | return 512 + 4096 * 4; | 
|  | 1485 | case AMDGPUAS::CONSTANT_BUFFER_5: | 
|  | 1486 | return 512 + 4096 * 5; | 
|  | 1487 | case AMDGPUAS::CONSTANT_BUFFER_6: | 
|  | 1488 | return 512 + 4096 * 6; | 
|  | 1489 | case AMDGPUAS::CONSTANT_BUFFER_7: | 
|  | 1490 | return 512 + 4096 * 7; | 
|  | 1491 | case AMDGPUAS::CONSTANT_BUFFER_8: | 
|  | 1492 | return 512 + 4096 * 8; | 
|  | 1493 | case AMDGPUAS::CONSTANT_BUFFER_9: | 
|  | 1494 | return 512 + 4096 * 9; | 
|  | 1495 | case AMDGPUAS::CONSTANT_BUFFER_10: | 
|  | 1496 | return 512 + 4096 * 10; | 
|  | 1497 | case AMDGPUAS::CONSTANT_BUFFER_11: | 
|  | 1498 | return 512 + 4096 * 11; | 
|  | 1499 | case AMDGPUAS::CONSTANT_BUFFER_12: | 
|  | 1500 | return 512 + 4096 * 12; | 
|  | 1501 | case AMDGPUAS::CONSTANT_BUFFER_13: | 
|  | 1502 | return 512 + 4096 * 13; | 
|  | 1503 | case AMDGPUAS::CONSTANT_BUFFER_14: | 
|  | 1504 | return 512 + 4096 * 14; | 
|  | 1505 | case AMDGPUAS::CONSTANT_BUFFER_15: | 
|  | 1506 | return 512 + 4096 * 15; | 
|  | 1507 | default: | 
|  | 1508 | return -1; | 
|  | 1509 | } | 
|  | 1510 | } | 
|  | 1511 |  | 
|  | 1512 | SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const | 
|  | 1513 | { | 
|  | 1514 | EVT VT = Op.getValueType(); | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1515 | SDLoc DL(Op); | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1516 | LoadSDNode *LoadNode = cast<LoadSDNode>(Op); | 
|  | 1517 | SDValue Chain = Op.getOperand(0); | 
|  | 1518 | SDValue Ptr = Op.getOperand(1); | 
|  | 1519 | SDValue LoweredLoad; | 
|  | 1520 |  | 
| Tom Stellard | e937360 | 2014-01-22 19:24:14 +0000 | [diff] [blame] | 1521 | SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); | 
|  | 1522 | if (Ret.getNode()) { | 
| Matt Arsenault | 7939acd | 2014-04-07 16:44:24 +0000 | [diff] [blame] | 1523 | SDValue Ops[2] = { | 
|  | 1524 | Ret, | 
|  | 1525 | Chain | 
|  | 1526 | }; | 
| Craig Topper | 64941d9 | 2014-04-27 19:20:57 +0000 | [diff] [blame] | 1527 | return DAG.getMergeValues(Ops, DL); | 
| Tom Stellard | e937360 | 2014-01-22 19:24:14 +0000 | [diff] [blame] | 1528 | } | 
|  | 1529 |  | 
| Tom Stellard | 067c815 | 2014-07-21 14:01:14 +0000 | [diff] [blame] | 1530 | // Lower loads constant address space global variable loads | 
|  | 1531 | if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && | 
|  | 1532 | isa<GlobalVariable>( | 
|  | 1533 | GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) { | 
|  | 1534 |  | 
|  | 1535 | SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL, | 
|  | 1536 | getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); | 
|  | 1537 | Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, | 
|  | 1538 | DAG.getConstant(2, MVT::i32)); | 
|  | 1539 | return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(), | 
|  | 1540 | LoadNode->getChain(), Ptr, | 
|  | 1541 | DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2)); | 
|  | 1542 | } | 
| Tom Stellard | e937360 | 2014-01-22 19:24:14 +0000 | [diff] [blame] | 1543 |  | 
| Tom Stellard | 35bb18c | 2013-08-26 15:06:04 +0000 | [diff] [blame] | 1544 | if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { | 
|  | 1545 | SDValue MergedValues[2] = { | 
| Matt Arsenault | 83e6058 | 2014-07-24 17:10:35 +0000 | [diff] [blame] | 1546 | ScalarizeVectorLoad(Op, DAG), | 
| Tom Stellard | 35bb18c | 2013-08-26 15:06:04 +0000 | [diff] [blame] | 1547 | Chain | 
|  | 1548 | }; | 
| Craig Topper | 64941d9 | 2014-04-27 19:20:57 +0000 | [diff] [blame] | 1549 | return DAG.getMergeValues(MergedValues, DL); | 
| Tom Stellard | 35bb18c | 2013-08-26 15:06:04 +0000 | [diff] [blame] | 1550 | } | 
|  | 1551 |  | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1552 | int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); | 
| Matt Arsenault | 00a0d6f | 2013-11-13 02:39:07 +0000 | [diff] [blame] | 1553 | if (ConstantBlock > -1 && | 
|  | 1554 | ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || | 
|  | 1555 | (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1556 | SDValue Result; | 
| Nick Lewycky | aad475b | 2014-04-15 07:22:52 +0000 | [diff] [blame] | 1557 | if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) || | 
|  | 1558 | isa<Constant>(LoadNode->getMemOperand()->getValue()) || | 
| Matt Arsenault | ef1a950 | 2013-11-01 17:39:26 +0000 | [diff] [blame] | 1559 | isa<ConstantSDNode>(Ptr)) { | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1560 | SDValue Slots[4]; | 
|  | 1561 | for (unsigned i = 0; i < 4; i++) { | 
|  | 1562 | // We want Const position encoded with the following formula : | 
|  | 1563 | // (((512 + (kc_bank << 12) + const_index) << 2) + chan) | 
|  | 1564 | // const_index is Ptr computed by llvm using an alignment of 16. | 
|  | 1565 | // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and | 
|  | 1566 | // then div by 4 at the ISel step | 
|  | 1567 | SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, | 
|  | 1568 | DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); | 
|  | 1569 | Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); | 
|  | 1570 | } | 
| Tom Stellard | 0344cdf | 2013-08-01 15:23:42 +0000 | [diff] [blame] | 1571 | EVT NewVT = MVT::v4i32; | 
|  | 1572 | unsigned NumElements = 4; | 
|  | 1573 | if (VT.isVector()) { | 
|  | 1574 | NewVT = VT; | 
|  | 1575 | NumElements = VT.getVectorNumElements(); | 
|  | 1576 | } | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1577 | Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, | 
| Craig Topper | 2d2aa0c | 2014-04-30 07:17:30 +0000 | [diff] [blame] | 1578 | makeArrayRef(Slots, NumElements)); | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1579 | } else { | 
| Alp Toker | f907b89 | 2013-12-05 05:44:44 +0000 | [diff] [blame] | 1580 | // non-constant ptr can't be folded, keeps it as a v4f32 load | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1581 | Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, | 
| Vincent Lejeune | 743dca0 | 2013-03-05 15:04:29 +0000 | [diff] [blame] | 1582 | DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)), | 
| Christian Konig | 189357c | 2013-03-07 09:03:59 +0000 | [diff] [blame] | 1583 | DAG.getConstant(LoadNode->getAddressSpace() - | 
| NAKAMURA Takumi | 18ca09c | 2013-05-22 06:37:25 +0000 | [diff] [blame] | 1584 | AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32) | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1585 | ); | 
|  | 1586 | } | 
|  | 1587 |  | 
|  | 1588 | if (!VT.isVector()) { | 
|  | 1589 | Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, | 
|  | 1590 | DAG.getConstant(0, MVT::i32)); | 
|  | 1591 | } | 
|  | 1592 |  | 
|  | 1593 | SDValue MergedValues[2] = { | 
| Matt Arsenault | 7939acd | 2014-04-07 16:44:24 +0000 | [diff] [blame] | 1594 | Result, | 
|  | 1595 | Chain | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1596 | }; | 
| Craig Topper | 64941d9 | 2014-04-27 19:20:57 +0000 | [diff] [blame] | 1597 | return DAG.getMergeValues(MergedValues, DL); | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1598 | } | 
|  | 1599 |  | 
| Matt Arsenault | 909d0c0 | 2013-10-30 23:43:29 +0000 | [diff] [blame] | 1600 | // For most operations returning SDValue() will result in the node being | 
|  | 1601 | // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we | 
|  | 1602 | // need to manually expand loads that may be legal in some address spaces and | 
|  | 1603 | // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for | 
|  | 1604 | // compute shaders, since the data is sign extended when it is uploaded to the | 
|  | 1605 | // buffer. However SEXT loads from other address spaces are not supported, so | 
|  | 1606 | // we need to expand them here. | 
| Tom Stellard | 8402144 | 2013-07-23 01:48:24 +0000 | [diff] [blame] | 1607 | if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { | 
|  | 1608 | EVT MemVT = LoadNode->getMemoryVT(); | 
|  | 1609 | assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); | 
|  | 1610 | SDValue ShiftAmount = | 
|  | 1611 | DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32); | 
|  | 1612 | SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr, | 
|  | 1613 | LoadNode->getPointerInfo(), MemVT, | 
|  | 1614 | LoadNode->isVolatile(), | 
|  | 1615 | LoadNode->isNonTemporal(), | 
| Louis Gerbarg | 67474e3 | 2014-07-31 21:45:05 +0000 | [diff] [blame] | 1616 | LoadNode->isInvariant(), | 
| Tom Stellard | 8402144 | 2013-07-23 01:48:24 +0000 | [diff] [blame] | 1617 | LoadNode->getAlignment()); | 
|  | 1618 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount); | 
|  | 1619 | SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount); | 
|  | 1620 |  | 
|  | 1621 | SDValue MergedValues[2] = { Sra, Chain }; | 
| Craig Topper | 64941d9 | 2014-04-27 19:20:57 +0000 | [diff] [blame] | 1622 | return DAG.getMergeValues(MergedValues, DL); | 
| Tom Stellard | 8402144 | 2013-07-23 01:48:24 +0000 | [diff] [blame] | 1623 | } | 
|  | 1624 |  | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1625 | if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { | 
|  | 1626 | return SDValue(); | 
|  | 1627 | } | 
|  | 1628 |  | 
|  | 1629 | // Lowering for indirect addressing | 
|  | 1630 | const MachineFunction &MF = DAG.getMachineFunction(); | 
| Eric Christopher | d913448 | 2014-08-04 21:25:23 +0000 | [diff] [blame] | 1631 | const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>( | 
|  | 1632 | getTargetMachine().getSubtargetImpl()->getFrameLowering()); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1633 | unsigned StackWidth = TFL->getStackWidth(MF); | 
|  | 1634 |  | 
|  | 1635 | Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); | 
|  | 1636 |  | 
|  | 1637 | if (VT.isVector()) { | 
|  | 1638 | unsigned NumElemVT = VT.getVectorNumElements(); | 
|  | 1639 | EVT ElemVT = VT.getVectorElementType(); | 
|  | 1640 | SDValue Loads[4]; | 
|  | 1641 |  | 
|  | 1642 | assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " | 
|  | 1643 | "vector width in load"); | 
|  | 1644 |  | 
|  | 1645 | for (unsigned i = 0; i < NumElemVT; ++i) { | 
|  | 1646 | unsigned Channel, PtrIncr; | 
|  | 1647 | getStackAddress(StackWidth, i, Channel, PtrIncr); | 
|  | 1648 | Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, | 
|  | 1649 | DAG.getConstant(PtrIncr, MVT::i32)); | 
|  | 1650 | Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, | 
|  | 1651 | Chain, Ptr, | 
|  | 1652 | DAG.getTargetConstant(Channel, MVT::i32), | 
|  | 1653 | Op.getOperand(2)); | 
|  | 1654 | } | 
|  | 1655 | for (unsigned i = NumElemVT; i < 4; ++i) { | 
|  | 1656 | Loads[i] = DAG.getUNDEF(ElemVT); | 
|  | 1657 | } | 
|  | 1658 | EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1659 | LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads); | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1660 | } else { | 
|  | 1661 | LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, | 
|  | 1662 | Chain, Ptr, | 
|  | 1663 | DAG.getTargetConstant(0, MVT::i32), // Channel | 
|  | 1664 | Op.getOperand(2)); | 
|  | 1665 | } | 
|  | 1666 |  | 
| Matt Arsenault | 7939acd | 2014-04-07 16:44:24 +0000 | [diff] [blame] | 1667 | SDValue Ops[2] = { | 
|  | 1668 | LoweredLoad, | 
|  | 1669 | Chain | 
|  | 1670 | }; | 
| Tom Stellard | f3b2a1e | 2013-02-06 17:32:29 +0000 | [diff] [blame] | 1671 |  | 
| Craig Topper | 64941d9 | 2014-04-27 19:20:57 +0000 | [diff] [blame] | 1672 | return DAG.getMergeValues(Ops, DL); | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1673 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1674 |  | 
| Matt Arsenault | 1d555c4 | 2014-06-23 18:00:55 +0000 | [diff] [blame] | 1675 | SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { | 
|  | 1676 | SDValue Chain = Op.getOperand(0); | 
|  | 1677 | SDValue Cond  = Op.getOperand(1); | 
|  | 1678 | SDValue Jump  = Op.getOperand(2); | 
|  | 1679 |  | 
|  | 1680 | return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), | 
|  | 1681 | Chain, Jump, Cond); | 
|  | 1682 | } | 
|  | 1683 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1684 | /// XXX Only kernel functions are supported, so we can assume for now that | 
|  | 1685 | /// every function is a kernel function, but in the future we should use | 
|  | 1686 | /// separate calling conventions for kernel and non-kernel functions. | 
|  | 1687 | SDValue R600TargetLowering::LowerFormalArguments( | 
|  | 1688 | SDValue Chain, | 
|  | 1689 | CallingConv::ID CallConv, | 
|  | 1690 | bool isVarArg, | 
|  | 1691 | const SmallVectorImpl<ISD::InputArg> &Ins, | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1692 | SDLoc DL, SelectionDAG &DAG, | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1693 | SmallVectorImpl<SDValue> &InVals) const { | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 1694 | SmallVector<CCValAssign, 16> ArgLocs; | 
| Eric Christopher | b521750 | 2014-08-06 18:45:26 +0000 | [diff] [blame] | 1695 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | 
|  | 1696 | *DAG.getContext()); | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 1697 | MachineFunction &MF = DAG.getMachineFunction(); | 
| Matt Arsenault | 762af96 | 2014-07-13 03:06:39 +0000 | [diff] [blame] | 1698 | unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType(); | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 1699 |  | 
| Tom Stellard | af77543 | 2013-10-23 00:44:32 +0000 | [diff] [blame] | 1700 | SmallVector<ISD::InputArg, 8> LocalIns; | 
|  | 1701 |  | 
| Matt Arsenault | 209a7b9 | 2014-04-18 07:40:20 +0000 | [diff] [blame] | 1702 | getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns); | 
| Tom Stellard | af77543 | 2013-10-23 00:44:32 +0000 | [diff] [blame] | 1703 |  | 
|  | 1704 | AnalyzeFormalArguments(CCInfo, LocalIns); | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 1705 |  | 
| Tom Stellard | 1e80309 | 2013-07-23 01:48:18 +0000 | [diff] [blame] | 1706 | for (unsigned i = 0, e = Ins.size(); i < e; ++i) { | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 1707 | CCValAssign &VA = ArgLocs[i]; | 
| Tom Stellard | af77543 | 2013-10-23 00:44:32 +0000 | [diff] [blame] | 1708 | EVT VT = Ins[i].VT; | 
|  | 1709 | EVT MemVT = LocalIns[i].VT; | 
| Tom Stellard | 78e0129 | 2013-07-23 01:47:58 +0000 | [diff] [blame] | 1710 |  | 
| Vincent Lejeune | f143af3 | 2013-11-11 22:10:24 +0000 | [diff] [blame] | 1711 | if (ShaderType != ShaderType::COMPUTE) { | 
|  | 1712 | unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); | 
|  | 1713 | SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); | 
|  | 1714 | InVals.push_back(Register); | 
|  | 1715 | continue; | 
|  | 1716 | } | 
|  | 1717 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1718 | PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), | 
| Tom Stellard | 1e80309 | 2013-07-23 01:48:18 +0000 | [diff] [blame] | 1719 | AMDGPUAS::CONSTANT_BUFFER_0); | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 1720 |  | 
| Matt Arsenault | fae0298 | 2014-03-17 18:58:11 +0000 | [diff] [blame] | 1721 | // i64 isn't a legal type, so the register type used ends up as i32, which | 
|  | 1722 | // isn't expected here. It attempts to create this sextload, but it ends up | 
|  | 1723 | // being invalid. Somehow this seems to work with i64 arguments, but breaks | 
|  | 1724 | // for <1 x i64>. | 
|  | 1725 |  | 
| Tom Stellard | acfeebf | 2013-07-23 01:48:05 +0000 | [diff] [blame] | 1726 | // The first 36 bytes of the input buffer contains information about | 
|  | 1727 | // thread group and global sizes. | 
| Matt Arsenault | e1f030c | 2014-04-11 20:59:54 +0000 | [diff] [blame] | 1728 |  | 
|  | 1729 | // FIXME: This should really check the extload type, but the handling of | 
|  | 1730 | // extload vecto parameters seems to be broken. | 
|  | 1731 | //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; | 
|  | 1732 | ISD::LoadExtType Ext = ISD::SEXTLOAD; | 
|  | 1733 | SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain, | 
| Tom Stellard | af77543 | 2013-10-23 00:44:32 +0000 | [diff] [blame] | 1734 | DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), | 
|  | 1735 | MachinePointerInfo(UndefValue::get(PtrTy)), | 
| Louis Gerbarg | 67474e3 | 2014-07-31 21:45:05 +0000 | [diff] [blame] | 1736 | MemVT, false, false, false, 4); | 
| Matt Arsenault | 209a7b9 | 2014-04-18 07:40:20 +0000 | [diff] [blame] | 1737 |  | 
|  | 1738 | // 4 is the preferred alignment for the CONSTANT memory space. | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1739 | InVals.push_back(Arg); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1740 | } | 
|  | 1741 | return Chain; | 
|  | 1742 | } | 
|  | 1743 |  | 
| Matt Arsenault | 75865923 | 2013-05-18 00:21:46 +0000 | [diff] [blame] | 1744 | EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { | 
| Matt Arsenault | 209a7b9 | 2014-04-18 07:40:20 +0000 | [diff] [blame] | 1745 | if (!VT.isVector()) | 
|  | 1746 | return MVT::i32; | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1747 | return VT.changeVectorElementTypeToInteger(); | 
|  | 1748 | } | 
|  | 1749 |  | 
| Matt Arsenault | 209a7b9 | 2014-04-18 07:40:20 +0000 | [diff] [blame] | 1750 | static SDValue CompactSwizzlableVector( | 
|  | 1751 | SelectionDAG &DAG, SDValue VectorEntry, | 
|  | 1752 | DenseMap<unsigned, unsigned> &RemapSwizzle) { | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1753 | assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); | 
|  | 1754 | assert(RemapSwizzle.empty()); | 
|  | 1755 | SDValue NewBldVec[4] = { | 
| Matt Arsenault | 209a7b9 | 2014-04-18 07:40:20 +0000 | [diff] [blame] | 1756 | VectorEntry.getOperand(0), | 
|  | 1757 | VectorEntry.getOperand(1), | 
|  | 1758 | VectorEntry.getOperand(2), | 
|  | 1759 | VectorEntry.getOperand(3) | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1760 | }; | 
|  | 1761 |  | 
|  | 1762 | for (unsigned i = 0; i < 4; i++) { | 
| Vincent Lejeune | fa58a5f | 2013-10-13 17:56:10 +0000 | [diff] [blame] | 1763 | if (NewBldVec[i].getOpcode() == ISD::UNDEF) | 
|  | 1764 | // We mask write here to teach later passes that the ith element of this | 
|  | 1765 | // vector is undef. Thus we can use it to reduce 128 bits reg usage, | 
|  | 1766 | // break false dependencies and additionnaly make assembly easier to read. | 
|  | 1767 | RemapSwizzle[i] = 7; // SEL_MASK_WRITE | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1768 | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { | 
|  | 1769 | if (C->isZero()) { | 
|  | 1770 | RemapSwizzle[i] = 4; // SEL_0 | 
|  | 1771 | NewBldVec[i] = DAG.getUNDEF(MVT::f32); | 
|  | 1772 | } else if (C->isExactlyValue(1.0)) { | 
|  | 1773 | RemapSwizzle[i] = 5; // SEL_1 | 
|  | 1774 | NewBldVec[i] = DAG.getUNDEF(MVT::f32); | 
|  | 1775 | } | 
|  | 1776 | } | 
|  | 1777 |  | 
|  | 1778 | if (NewBldVec[i].getOpcode() == ISD::UNDEF) | 
|  | 1779 | continue; | 
|  | 1780 | for (unsigned j = 0; j < i; j++) { | 
|  | 1781 | if (NewBldVec[i] == NewBldVec[j]) { | 
|  | 1782 | NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); | 
|  | 1783 | RemapSwizzle[i] = j; | 
|  | 1784 | break; | 
|  | 1785 | } | 
|  | 1786 | } | 
|  | 1787 | } | 
|  | 1788 |  | 
|  | 1789 | return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1790 | VectorEntry.getValueType(), NewBldVec); | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1791 | } | 
|  | 1792 |  | 
| Benjamin Kramer | 193960c | 2013-06-11 13:32:25 +0000 | [diff] [blame] | 1793 | static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, | 
|  | 1794 | DenseMap<unsigned, unsigned> &RemapSwizzle) { | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1795 | assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); | 
|  | 1796 | assert(RemapSwizzle.empty()); | 
|  | 1797 | SDValue NewBldVec[4] = { | 
|  | 1798 | VectorEntry.getOperand(0), | 
|  | 1799 | VectorEntry.getOperand(1), | 
|  | 1800 | VectorEntry.getOperand(2), | 
|  | 1801 | VectorEntry.getOperand(3) | 
|  | 1802 | }; | 
|  | 1803 | bool isUnmovable[4] = { false, false, false, false }; | 
| Vincent Lejeune | cc0ea74 | 2013-12-10 14:43:31 +0000 | [diff] [blame] | 1804 | for (unsigned i = 0; i < 4; i++) { | 
| Vincent Lejeune | b8aac8d | 2013-07-09 15:03:25 +0000 | [diff] [blame] | 1805 | RemapSwizzle[i] = i; | 
| Vincent Lejeune | cc0ea74 | 2013-12-10 14:43:31 +0000 | [diff] [blame] | 1806 | if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | 
|  | 1807 | unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) | 
|  | 1808 | ->getZExtValue(); | 
|  | 1809 | if (i == Idx) | 
|  | 1810 | isUnmovable[Idx] = true; | 
|  | 1811 | } | 
|  | 1812 | } | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1813 |  | 
|  | 1814 | for (unsigned i = 0; i < 4; i++) { | 
|  | 1815 | if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | 
|  | 1816 | unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) | 
|  | 1817 | ->getZExtValue(); | 
| Vincent Lejeune | 301beb8 | 2013-10-13 17:56:04 +0000 | [diff] [blame] | 1818 | if (isUnmovable[Idx]) | 
|  | 1819 | continue; | 
|  | 1820 | // Swap i and Idx | 
|  | 1821 | std::swap(NewBldVec[Idx], NewBldVec[i]); | 
|  | 1822 | std::swap(RemapSwizzle[i], RemapSwizzle[Idx]); | 
|  | 1823 | break; | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1824 | } | 
|  | 1825 | } | 
|  | 1826 |  | 
|  | 1827 | return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1828 | VectorEntry.getValueType(), NewBldVec); | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 1829 | } | 
|  | 1830 |  | 
|  | 1831 |  | 
|  | 1832 | SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, | 
|  | 1833 | SDValue Swz[4], SelectionDAG &DAG) const { | 
|  | 1834 | assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR); | 
|  | 1835 | // Old -> New swizzle values | 
|  | 1836 | DenseMap<unsigned, unsigned> SwizzleRemap; | 
|  | 1837 |  | 
|  | 1838 | BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); | 
|  | 1839 | for (unsigned i = 0; i < 4; i++) { | 
|  | 1840 | unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); | 
|  | 1841 | if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) | 
|  | 1842 | Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); | 
|  | 1843 | } | 
|  | 1844 |  | 
|  | 1845 | SwizzleRemap.clear(); | 
|  | 1846 | BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); | 
|  | 1847 | for (unsigned i = 0; i < 4; i++) { | 
|  | 1848 | unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); | 
|  | 1849 | if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) | 
|  | 1850 | Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); | 
|  | 1851 | } | 
|  | 1852 |  | 
|  | 1853 | return BuildVector; | 
|  | 1854 | } | 
|  | 1855 |  | 
|  | 1856 |  | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1857 | //===----------------------------------------------------------------------===// | 
|  | 1858 | // Custom DAG Optimizations | 
|  | 1859 | //===----------------------------------------------------------------------===// | 
|  | 1860 |  | 
|  | 1861 | SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, | 
|  | 1862 | DAGCombinerInfo &DCI) const { | 
|  | 1863 | SelectionDAG &DAG = DCI.DAG; | 
|  | 1864 |  | 
|  | 1865 | switch (N->getOpcode()) { | 
| Tom Stellard | 50122a5 | 2014-04-07 19:45:41 +0000 | [diff] [blame] | 1866 | default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1867 | // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) | 
|  | 1868 | case ISD::FP_ROUND: { | 
|  | 1869 | SDValue Arg = N->getOperand(0); | 
|  | 1870 | if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1871 | return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0), | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1872 | Arg.getOperand(0)); | 
|  | 1873 | } | 
|  | 1874 | break; | 
|  | 1875 | } | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 1876 |  | 
|  | 1877 | // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> | 
|  | 1878 | // (i32 select_cc f32, f32, -1, 0 cc) | 
|  | 1879 | // | 
|  | 1880 | // Mesa's GLSL frontend generates the above pattern a lot and we can lower | 
|  | 1881 | // this to one of the SET*_DX10 instructions. | 
|  | 1882 | case ISD::FP_TO_SINT: { | 
|  | 1883 | SDValue FNeg = N->getOperand(0); | 
|  | 1884 | if (FNeg.getOpcode() != ISD::FNEG) { | 
|  | 1885 | return SDValue(); | 
|  | 1886 | } | 
|  | 1887 | SDValue SelectCC = FNeg.getOperand(0); | 
|  | 1888 | if (SelectCC.getOpcode() != ISD::SELECT_CC || | 
|  | 1889 | SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS | 
|  | 1890 | SelectCC.getOperand(2).getValueType() != MVT::f32 || // True | 
|  | 1891 | !isHWTrueValue(SelectCC.getOperand(2)) || | 
|  | 1892 | !isHWFalseValue(SelectCC.getOperand(3))) { | 
|  | 1893 | return SDValue(); | 
|  | 1894 | } | 
|  | 1895 |  | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1896 | return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 1897 | SelectCC.getOperand(0), // LHS | 
|  | 1898 | SelectCC.getOperand(1), // RHS | 
|  | 1899 | DAG.getConstant(-1, MVT::i32), // True | 
|  | 1900 | DAG.getConstant(0, MVT::i32),  // Flase | 
|  | 1901 | SelectCC.getOperand(4)); // CC | 
|  | 1902 |  | 
|  | 1903 | break; | 
|  | 1904 | } | 
| Quentin Colombet | e2e0548 | 2013-07-30 00:27:16 +0000 | [diff] [blame] | 1905 |  | 
| NAKAMURA Takumi | 8a04643 | 2013-10-28 04:07:38 +0000 | [diff] [blame] | 1906 | // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx | 
|  | 1907 | // => build_vector elt0, ... , NewEltIdx, ... , eltN | 
| Quentin Colombet | e2e0548 | 2013-07-30 00:27:16 +0000 | [diff] [blame] | 1908 | case ISD::INSERT_VECTOR_ELT: { | 
|  | 1909 | SDValue InVec = N->getOperand(0); | 
|  | 1910 | SDValue InVal = N->getOperand(1); | 
|  | 1911 | SDValue EltNo = N->getOperand(2); | 
|  | 1912 | SDLoc dl(N); | 
|  | 1913 |  | 
|  | 1914 | // If the inserted element is an UNDEF, just use the input vector. | 
|  | 1915 | if (InVal.getOpcode() == ISD::UNDEF) | 
|  | 1916 | return InVec; | 
|  | 1917 |  | 
|  | 1918 | EVT VT = InVec.getValueType(); | 
|  | 1919 |  | 
|  | 1920 | // If we can't generate a legal BUILD_VECTOR, exit | 
|  | 1921 | if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) | 
|  | 1922 | return SDValue(); | 
|  | 1923 |  | 
|  | 1924 | // Check that we know which element is being inserted | 
|  | 1925 | if (!isa<ConstantSDNode>(EltNo)) | 
|  | 1926 | return SDValue(); | 
|  | 1927 | unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); | 
|  | 1928 |  | 
|  | 1929 | // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially | 
|  | 1930 | // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the | 
|  | 1931 | // vector elements. | 
|  | 1932 | SmallVector<SDValue, 8> Ops; | 
|  | 1933 | if (InVec.getOpcode() == ISD::BUILD_VECTOR) { | 
|  | 1934 | Ops.append(InVec.getNode()->op_begin(), | 
|  | 1935 | InVec.getNode()->op_end()); | 
|  | 1936 | } else if (InVec.getOpcode() == ISD::UNDEF) { | 
|  | 1937 | unsigned NElts = VT.getVectorNumElements(); | 
|  | 1938 | Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); | 
|  | 1939 | } else { | 
|  | 1940 | return SDValue(); | 
|  | 1941 | } | 
|  | 1942 |  | 
|  | 1943 | // Insert the element | 
|  | 1944 | if (Elt < Ops.size()) { | 
|  | 1945 | // All the operands of BUILD_VECTOR must have the same type; | 
|  | 1946 | // we enforce that here. | 
|  | 1947 | EVT OpVT = Ops[0].getValueType(); | 
|  | 1948 | if (InVal.getValueType() != OpVT) | 
|  | 1949 | InVal = OpVT.bitsGT(InVal.getValueType()) ? | 
|  | 1950 | DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : | 
|  | 1951 | DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); | 
|  | 1952 | Ops[Elt] = InVal; | 
|  | 1953 | } | 
|  | 1954 |  | 
|  | 1955 | // Return the new vector | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 1956 | return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); | 
| Quentin Colombet | e2e0548 | 2013-07-30 00:27:16 +0000 | [diff] [blame] | 1957 | } | 
|  | 1958 |  | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1959 | // Extract_vec (Build_vector) generated by custom lowering | 
|  | 1960 | // also needs to be customly combined | 
|  | 1961 | case ISD::EXTRACT_VECTOR_ELT: { | 
|  | 1962 | SDValue Arg = N->getOperand(0); | 
|  | 1963 | if (Arg.getOpcode() == ISD::BUILD_VECTOR) { | 
|  | 1964 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { | 
|  | 1965 | unsigned Element = Const->getZExtValue(); | 
|  | 1966 | return Arg->getOperand(Element); | 
|  | 1967 | } | 
|  | 1968 | } | 
| Tom Stellard | dd04c83 | 2013-01-31 22:11:53 +0000 | [diff] [blame] | 1969 | if (Arg.getOpcode() == ISD::BITCAST && | 
|  | 1970 | Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { | 
|  | 1971 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { | 
|  | 1972 | unsigned Element = Const->getZExtValue(); | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 1973 | return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(), | 
| Tom Stellard | dd04c83 | 2013-01-31 22:11:53 +0000 | [diff] [blame] | 1974 | Arg->getOperand(0).getOperand(Element)); | 
|  | 1975 | } | 
|  | 1976 | } | 
| Tom Stellard | 365366f | 2013-01-23 02:09:06 +0000 | [diff] [blame] | 1977 | } | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 1978 |  | 
|  | 1979 | case ISD::SELECT_CC: { | 
| Tom Stellard | afa8b53 | 2014-05-09 16:42:16 +0000 | [diff] [blame] | 1980 | // Try common optimizations | 
|  | 1981 | SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI); | 
|  | 1982 | if (Ret.getNode()) | 
|  | 1983 | return Ret; | 
|  | 1984 |  | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 1985 | // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> | 
|  | 1986 | //      selectcc x, y, a, b, inv(cc) | 
| Tom Stellard | 5e52489 | 2013-03-08 15:37:11 +0000 | [diff] [blame] | 1987 | // | 
|  | 1988 | // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> | 
|  | 1989 | //      selectcc x, y, a, b, cc | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 1990 | SDValue LHS = N->getOperand(0); | 
|  | 1991 | if (LHS.getOpcode() != ISD::SELECT_CC) { | 
|  | 1992 | return SDValue(); | 
|  | 1993 | } | 
|  | 1994 |  | 
|  | 1995 | SDValue RHS = N->getOperand(1); | 
|  | 1996 | SDValue True = N->getOperand(2); | 
|  | 1997 | SDValue False = N->getOperand(3); | 
| Tom Stellard | 5e52489 | 2013-03-08 15:37:11 +0000 | [diff] [blame] | 1998 | ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 1999 |  | 
|  | 2000 | if (LHS.getOperand(2).getNode() != True.getNode() || | 
|  | 2001 | LHS.getOperand(3).getNode() != False.getNode() || | 
| Tom Stellard | 5e52489 | 2013-03-08 15:37:11 +0000 | [diff] [blame] | 2002 | RHS.getNode() != False.getNode()) { | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 2003 | return SDValue(); | 
|  | 2004 | } | 
|  | 2005 |  | 
| Tom Stellard | 5e52489 | 2013-03-08 15:37:11 +0000 | [diff] [blame] | 2006 | switch (NCC) { | 
|  | 2007 | default: return SDValue(); | 
|  | 2008 | case ISD::SETNE: return LHS; | 
|  | 2009 | case ISD::SETEQ: { | 
|  | 2010 | ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); | 
|  | 2011 | LHSCC = ISD::getSetCCInverse(LHSCC, | 
|  | 2012 | LHS.getOperand(0).getValueType().isInteger()); | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 2013 | if (DCI.isBeforeLegalizeOps() || | 
|  | 2014 | isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType())) | 
|  | 2015 | return DAG.getSelectCC(SDLoc(N), | 
|  | 2016 | LHS.getOperand(0), | 
|  | 2017 | LHS.getOperand(1), | 
|  | 2018 | LHS.getOperand(2), | 
|  | 2019 | LHS.getOperand(3), | 
|  | 2020 | LHSCC); | 
|  | 2021 | break; | 
| Vincent Lejeune | d80bc15 | 2013-02-14 16:55:06 +0000 | [diff] [blame] | 2022 | } | 
| Tom Stellard | 5e52489 | 2013-03-08 15:37:11 +0000 | [diff] [blame] | 2023 | } | 
| Tom Stellard | cd42818 | 2013-09-28 02:50:38 +0000 | [diff] [blame] | 2024 | return SDValue(); | 
| Tom Stellard | 5e52489 | 2013-03-08 15:37:11 +0000 | [diff] [blame] | 2025 | } | 
| Tom Stellard | fbab827 | 2013-08-16 01:12:11 +0000 | [diff] [blame] | 2026 |  | 
| Vincent Lejeune | d80bc15 | 2013-02-14 16:55:06 +0000 | [diff] [blame] | 2027 | case AMDGPUISD::EXPORT: { | 
|  | 2028 | SDValue Arg = N->getOperand(1); | 
|  | 2029 | if (Arg.getOpcode() != ISD::BUILD_VECTOR) | 
|  | 2030 | break; | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 2031 |  | 
| Vincent Lejeune | d80bc15 | 2013-02-14 16:55:06 +0000 | [diff] [blame] | 2032 | SDValue NewArgs[8] = { | 
|  | 2033 | N->getOperand(0), // Chain | 
|  | 2034 | SDValue(), | 
|  | 2035 | N->getOperand(2), // ArrayBase | 
|  | 2036 | N->getOperand(3), // Type | 
|  | 2037 | N->getOperand(4), // SWZ_X | 
|  | 2038 | N->getOperand(5), // SWZ_Y | 
|  | 2039 | N->getOperand(6), // SWZ_Z | 
|  | 2040 | N->getOperand(7) // SWZ_W | 
|  | 2041 | }; | 
| Andrew Trick | ef9de2a | 2013-05-25 02:42:55 +0000 | [diff] [blame] | 2042 | SDLoc DL(N); | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 2043 | NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG); | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 2044 | return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs); | 
| Tom Stellard | e06163a | 2013-02-07 14:02:35 +0000 | [diff] [blame] | 2045 | } | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 2046 | case AMDGPUISD::TEXTURE_FETCH: { | 
|  | 2047 | SDValue Arg = N->getOperand(1); | 
|  | 2048 | if (Arg.getOpcode() != ISD::BUILD_VECTOR) | 
|  | 2049 | break; | 
|  | 2050 |  | 
|  | 2051 | SDValue NewArgs[19] = { | 
|  | 2052 | N->getOperand(0), | 
|  | 2053 | N->getOperand(1), | 
|  | 2054 | N->getOperand(2), | 
|  | 2055 | N->getOperand(3), | 
|  | 2056 | N->getOperand(4), | 
|  | 2057 | N->getOperand(5), | 
|  | 2058 | N->getOperand(6), | 
|  | 2059 | N->getOperand(7), | 
|  | 2060 | N->getOperand(8), | 
|  | 2061 | N->getOperand(9), | 
|  | 2062 | N->getOperand(10), | 
|  | 2063 | N->getOperand(11), | 
|  | 2064 | N->getOperand(12), | 
|  | 2065 | N->getOperand(13), | 
|  | 2066 | N->getOperand(14), | 
|  | 2067 | N->getOperand(15), | 
|  | 2068 | N->getOperand(16), | 
|  | 2069 | N->getOperand(17), | 
|  | 2070 | N->getOperand(18), | 
|  | 2071 | }; | 
|  | 2072 | NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG); | 
|  | 2073 | return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(), | 
| Craig Topper | 48d114b | 2014-04-26 18:35:24 +0000 | [diff] [blame] | 2074 | NewArgs); | 
| Vincent Lejeune | 276ceb8 | 2013-06-04 15:04:53 +0000 | [diff] [blame] | 2075 | } | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 2076 | } | 
| Matt Arsenault | 5565f65e | 2014-05-22 18:09:07 +0000 | [diff] [blame] | 2077 |  | 
|  | 2078 | return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 2079 | } | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2080 |  | 
|  | 2081 | static bool | 
|  | 2082 | FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, | 
| Vincent Lejeune | 9a248e5 | 2013-09-12 23:44:53 +0000 | [diff] [blame] | 2083 | SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) { | 
| Eric Christopher | fc6de42 | 2014-08-05 02:39:49 +0000 | [diff] [blame] | 2084 | const R600InstrInfo *TII = | 
|  | 2085 | static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo()); | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2086 | if (!Src.isMachineOpcode()) | 
|  | 2087 | return false; | 
|  | 2088 | switch (Src.getMachineOpcode()) { | 
|  | 2089 | case AMDGPU::FNEG_R600: | 
|  | 2090 | if (!Neg.getNode()) | 
|  | 2091 | return false; | 
|  | 2092 | Src = Src.getOperand(0); | 
|  | 2093 | Neg = DAG.getTargetConstant(1, MVT::i32); | 
|  | 2094 | return true; | 
|  | 2095 | case AMDGPU::FABS_R600: | 
|  | 2096 | if (!Abs.getNode()) | 
|  | 2097 | return false; | 
|  | 2098 | Src = Src.getOperand(0); | 
|  | 2099 | Abs = DAG.getTargetConstant(1, MVT::i32); | 
|  | 2100 | return true; | 
|  | 2101 | case AMDGPU::CONST_COPY: { | 
|  | 2102 | unsigned Opcode = ParentNode->getMachineOpcode(); | 
|  | 2103 | bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; | 
|  | 2104 |  | 
|  | 2105 | if (!Sel.getNode()) | 
|  | 2106 | return false; | 
|  | 2107 |  | 
|  | 2108 | SDValue CstOffset = Src.getOperand(0); | 
|  | 2109 | if (ParentNode->getValueType(0).isVector()) | 
|  | 2110 | return false; | 
|  | 2111 |  | 
|  | 2112 | // Gather constants values | 
|  | 2113 | int SrcIndices[] = { | 
|  | 2114 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), | 
|  | 2115 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), | 
|  | 2116 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), | 
|  | 2117 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), | 
|  | 2118 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), | 
|  | 2119 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), | 
|  | 2120 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), | 
|  | 2121 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), | 
|  | 2122 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), | 
|  | 2123 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), | 
|  | 2124 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) | 
|  | 2125 | }; | 
|  | 2126 | std::vector<unsigned> Consts; | 
| Matt Arsenault | 4d64f96 | 2014-05-12 19:23:21 +0000 | [diff] [blame] | 2127 | for (int OtherSrcIdx : SrcIndices) { | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2128 | int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); | 
|  | 2129 | if (OtherSrcIdx < 0 || OtherSelIdx < 0) | 
|  | 2130 | continue; | 
|  | 2131 | if (HasDst) { | 
|  | 2132 | OtherSrcIdx--; | 
|  | 2133 | OtherSelIdx--; | 
|  | 2134 | } | 
|  | 2135 | if (RegisterSDNode *Reg = | 
|  | 2136 | dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { | 
|  | 2137 | if (Reg->getReg() == AMDGPU::ALU_CONST) { | 
| Matt Arsenault | b3ee388 | 2014-05-12 19:26:38 +0000 | [diff] [blame] | 2138 | ConstantSDNode *Cst | 
|  | 2139 | = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx)); | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2140 | Consts.push_back(Cst->getZExtValue()); | 
|  | 2141 | } | 
|  | 2142 | } | 
|  | 2143 | } | 
|  | 2144 |  | 
| Matt Arsenault | 37c12d7 | 2014-05-12 20:42:57 +0000 | [diff] [blame] | 2145 | ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset); | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2146 | Consts.push_back(Cst->getZExtValue()); | 
|  | 2147 | if (!TII->fitsConstReadLimitations(Consts)) { | 
|  | 2148 | return false; | 
|  | 2149 | } | 
|  | 2150 |  | 
|  | 2151 | Sel = CstOffset; | 
|  | 2152 | Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); | 
|  | 2153 | return true; | 
|  | 2154 | } | 
| Vincent Lejeune | 9a248e5 | 2013-09-12 23:44:53 +0000 | [diff] [blame] | 2155 | case AMDGPU::MOV_IMM_I32: | 
|  | 2156 | case AMDGPU::MOV_IMM_F32: { | 
|  | 2157 | unsigned ImmReg = AMDGPU::ALU_LITERAL_X; | 
|  | 2158 | uint64_t ImmValue = 0; | 
|  | 2159 |  | 
|  | 2160 |  | 
|  | 2161 | if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) { | 
|  | 2162 | ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0)); | 
|  | 2163 | float FloatValue = FPC->getValueAPF().convertToFloat(); | 
|  | 2164 | if (FloatValue == 0.0) { | 
|  | 2165 | ImmReg = AMDGPU::ZERO; | 
|  | 2166 | } else if (FloatValue == 0.5) { | 
|  | 2167 | ImmReg = AMDGPU::HALF; | 
|  | 2168 | } else if (FloatValue == 1.0) { | 
|  | 2169 | ImmReg = AMDGPU::ONE; | 
|  | 2170 | } else { | 
|  | 2171 | ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); | 
|  | 2172 | } | 
|  | 2173 | } else { | 
|  | 2174 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0)); | 
|  | 2175 | uint64_t Value = C->getZExtValue(); | 
|  | 2176 | if (Value == 0) { | 
|  | 2177 | ImmReg = AMDGPU::ZERO; | 
|  | 2178 | } else if (Value == 1) { | 
|  | 2179 | ImmReg = AMDGPU::ONE_INT; | 
|  | 2180 | } else { | 
|  | 2181 | ImmValue = Value; | 
|  | 2182 | } | 
|  | 2183 | } | 
|  | 2184 |  | 
|  | 2185 | // Check that we aren't already using an immediate. | 
|  | 2186 | // XXX: It's possible for an instruction to have more than one | 
|  | 2187 | // immediate operand, but this is not supported yet. | 
|  | 2188 | if (ImmReg == AMDGPU::ALU_LITERAL_X) { | 
|  | 2189 | if (!Imm.getNode()) | 
|  | 2190 | return false; | 
|  | 2191 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm); | 
|  | 2192 | assert(C); | 
|  | 2193 | if (C->getZExtValue()) | 
|  | 2194 | return false; | 
|  | 2195 | Imm = DAG.getTargetConstant(ImmValue, MVT::i32); | 
|  | 2196 | } | 
|  | 2197 | Src = DAG.getRegister(ImmReg, MVT::i32); | 
|  | 2198 | return true; | 
|  | 2199 | } | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2200 | default: | 
|  | 2201 | return false; | 
|  | 2202 | } | 
|  | 2203 | } | 
|  | 2204 |  | 
|  | 2205 |  | 
|  | 2206 | /// \brief Fold the instructions after selecting them | 
|  | 2207 | SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, | 
|  | 2208 | SelectionDAG &DAG) const { | 
| Eric Christopher | fc6de42 | 2014-08-05 02:39:49 +0000 | [diff] [blame] | 2209 | const R600InstrInfo *TII = | 
|  | 2210 | static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo()); | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2211 | if (!Node->isMachineOpcode()) | 
|  | 2212 | return Node; | 
|  | 2213 | unsigned Opcode = Node->getMachineOpcode(); | 
|  | 2214 | SDValue FakeOp; | 
|  | 2215 |  | 
|  | 2216 | std::vector<SDValue> Ops; | 
| Craig Topper | 66e588b | 2014-06-29 00:40:57 +0000 | [diff] [blame] | 2217 | for (const SDUse &I : Node->ops()) | 
|  | 2218 | Ops.push_back(I); | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2219 |  | 
|  | 2220 | if (Opcode == AMDGPU::DOT_4) { | 
|  | 2221 | int OperandIdx[] = { | 
|  | 2222 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), | 
|  | 2223 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), | 
|  | 2224 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), | 
|  | 2225 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), | 
|  | 2226 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), | 
|  | 2227 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), | 
|  | 2228 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), | 
|  | 2229 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) | 
| NAKAMURA Takumi | 4bb85f9 | 2013-10-28 04:07:23 +0000 | [diff] [blame] | 2230 | }; | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2231 | int NegIdx[] = { | 
|  | 2232 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), | 
|  | 2233 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), | 
|  | 2234 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), | 
|  | 2235 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), | 
|  | 2236 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), | 
|  | 2237 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), | 
|  | 2238 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), | 
|  | 2239 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) | 
|  | 2240 | }; | 
|  | 2241 | int AbsIdx[] = { | 
|  | 2242 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), | 
|  | 2243 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), | 
|  | 2244 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), | 
|  | 2245 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), | 
|  | 2246 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), | 
|  | 2247 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), | 
|  | 2248 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), | 
|  | 2249 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) | 
|  | 2250 | }; | 
|  | 2251 | for (unsigned i = 0; i < 8; i++) { | 
|  | 2252 | if (OperandIdx[i] < 0) | 
|  | 2253 | return Node; | 
|  | 2254 | SDValue &Src = Ops[OperandIdx[i] - 1]; | 
|  | 2255 | SDValue &Neg = Ops[NegIdx[i] - 1]; | 
|  | 2256 | SDValue &Abs = Ops[AbsIdx[i] - 1]; | 
|  | 2257 | bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; | 
|  | 2258 | int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); | 
|  | 2259 | if (HasDst) | 
|  | 2260 | SelIdx--; | 
|  | 2261 | SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; | 
| Vincent Lejeune | 9a248e5 | 2013-09-12 23:44:53 +0000 | [diff] [blame] | 2262 | if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) | 
|  | 2263 | return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); | 
|  | 2264 | } | 
|  | 2265 | } else if (Opcode == AMDGPU::REG_SEQUENCE) { | 
|  | 2266 | for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) { | 
|  | 2267 | SDValue &Src = Ops[i]; | 
|  | 2268 | if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2269 | return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); | 
|  | 2270 | } | 
| Vincent Lejeune | 0167a31 | 2013-09-12 23:45:00 +0000 | [diff] [blame] | 2271 | } else if (Opcode == AMDGPU::CLAMP_R600) { | 
|  | 2272 | SDValue Src = Node->getOperand(0); | 
|  | 2273 | if (!Src.isMachineOpcode() || | 
|  | 2274 | !TII->hasInstrModifiers(Src.getMachineOpcode())) | 
|  | 2275 | return Node; | 
|  | 2276 | int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(), | 
|  | 2277 | AMDGPU::OpName::clamp); | 
|  | 2278 | if (ClampIdx < 0) | 
|  | 2279 | return Node; | 
|  | 2280 | std::vector<SDValue> Ops; | 
|  | 2281 | unsigned NumOp = Src.getNumOperands(); | 
|  | 2282 | for(unsigned i = 0; i < NumOp; ++i) | 
| NAKAMURA Takumi | 4bb85f9 | 2013-10-28 04:07:23 +0000 | [diff] [blame] | 2283 | Ops.push_back(Src.getOperand(i)); | 
| Vincent Lejeune | 0167a31 | 2013-09-12 23:45:00 +0000 | [diff] [blame] | 2284 | Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32); | 
|  | 2285 | return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node), | 
|  | 2286 | Node->getVTList(), Ops); | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2287 | } else { | 
|  | 2288 | if (!TII->hasInstrModifiers(Opcode)) | 
|  | 2289 | return Node; | 
|  | 2290 | int OperandIdx[] = { | 
|  | 2291 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), | 
|  | 2292 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), | 
|  | 2293 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) | 
|  | 2294 | }; | 
|  | 2295 | int NegIdx[] = { | 
|  | 2296 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), | 
|  | 2297 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), | 
|  | 2298 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) | 
|  | 2299 | }; | 
|  | 2300 | int AbsIdx[] = { | 
|  | 2301 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), | 
|  | 2302 | TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), | 
|  | 2303 | -1 | 
|  | 2304 | }; | 
|  | 2305 | for (unsigned i = 0; i < 3; i++) { | 
|  | 2306 | if (OperandIdx[i] < 0) | 
|  | 2307 | return Node; | 
|  | 2308 | SDValue &Src = Ops[OperandIdx[i] - 1]; | 
|  | 2309 | SDValue &Neg = Ops[NegIdx[i] - 1]; | 
|  | 2310 | SDValue FakeAbs; | 
|  | 2311 | SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; | 
|  | 2312 | bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; | 
|  | 2313 | int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); | 
| Vincent Lejeune | 9a248e5 | 2013-09-12 23:44:53 +0000 | [diff] [blame] | 2314 | int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal); | 
|  | 2315 | if (HasDst) { | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2316 | SelIdx--; | 
| Vincent Lejeune | 9a248e5 | 2013-09-12 23:44:53 +0000 | [diff] [blame] | 2317 | ImmIdx--; | 
|  | 2318 | } | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2319 | SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp; | 
| Vincent Lejeune | 9a248e5 | 2013-09-12 23:44:53 +0000 | [diff] [blame] | 2320 | SDValue &Imm = Ops[ImmIdx]; | 
|  | 2321 | if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) | 
| Vincent Lejeune | ab3baf8 | 2013-09-12 23:44:44 +0000 | [diff] [blame] | 2322 | return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); | 
|  | 2323 | } | 
|  | 2324 | } | 
|  | 2325 |  | 
|  | 2326 | return Node; | 
|  | 2327 | } |