David Stuttard | f77079f | 2019-01-14 11:55:24 +0000 | [diff] [blame] | 1 | //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
David Stuttard | f77079f | 2019-01-14 11:55:24 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// Any MIMG instructions that use tfe or lwe require an initialization of the |
| 11 | /// result register that will be written in the case of a memory access failure |
| 12 | /// The required code is also added to tie this init code to the result of the |
| 13 | /// img instruction |
| 14 | /// |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | // |
| 17 | |
| 18 | #include "AMDGPU.h" |
| 19 | #include "AMDGPUSubtarget.h" |
| 20 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 21 | #include "SIInstrInfo.h" |
| 22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 25 | #include "llvm/IR/Function.h" |
| 26 | #include "llvm/Support/Debug.h" |
| 27 | #include "llvm/Target/TargetMachine.h" |
| 28 | |
| 29 | #define DEBUG_TYPE "si-img-init" |
| 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | namespace { |
| 34 | |
| 35 | class SIAddIMGInit : public MachineFunctionPass { |
| 36 | public: |
| 37 | static char ID; |
| 38 | |
| 39 | public: |
| 40 | SIAddIMGInit() : MachineFunctionPass(ID) { |
| 41 | initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); |
| 42 | } |
| 43 | |
| 44 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 45 | |
| 46 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 47 | AU.setPreservesCFG(); |
| 48 | MachineFunctionPass::getAnalysisUsage(AU); |
| 49 | } |
| 50 | }; |
| 51 | |
| 52 | } // End anonymous namespace. |
| 53 | |
| 54 | INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) |
| 55 | |
| 56 | char SIAddIMGInit::ID = 0; |
| 57 | |
| 58 | char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; |
| 59 | |
| 60 | FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } |
| 61 | |
| 62 | bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { |
| 63 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 64 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 65 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 66 | const SIRegisterInfo *RI = ST.getRegisterInfo(); |
| 67 | bool Changed = false; |
| 68 | |
| 69 | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; |
| 70 | ++BI) { |
| 71 | MachineBasicBlock &MBB = *BI; |
| 72 | MachineBasicBlock::iterator I, Next; |
| 73 | for (I = MBB.begin(); I != MBB.end(); I = Next) { |
| 74 | Next = std::next(I); |
| 75 | MachineInstr &MI = *I; |
| 76 | |
| 77 | auto Opcode = MI.getOpcode(); |
| 78 | if (TII->isMIMG(Opcode) && !MI.mayStore()) { |
| 79 | MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); |
| 80 | MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); |
| 81 | MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); |
| 82 | |
| 83 | // Check for instructions that don't have tfe or lwe fields |
| 84 | // There shouldn't be any at this point. |
| 85 | assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); |
| 86 | |
| 87 | unsigned TFEVal = TFE->getImm(); |
| 88 | unsigned LWEVal = LWE->getImm(); |
| 89 | unsigned D16Val = D16 ? D16->getImm() : 0; |
| 90 | |
| 91 | if (TFEVal || LWEVal) { |
| 92 | // At least one of TFE or LWE are non-zero |
| 93 | // We have to insert a suitable initialization of the result value and |
| 94 | // tie this to the dest of the image instruction. |
| 95 | |
| 96 | const DebugLoc &DL = MI.getDebugLoc(); |
| 97 | |
| 98 | int DstIdx = |
| 99 | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); |
| 100 | |
| 101 | // Calculate which dword we have to initialize to 0. |
| 102 | MachineOperand *MO_Dmask = |
| 103 | TII->getNamedOperand(MI, AMDGPU::OpName::dmask); |
| 104 | |
| 105 | // check that dmask operand is found. |
| 106 | assert(MO_Dmask && "Expected dmask operand in instruction"); |
| 107 | |
| 108 | unsigned dmask = MO_Dmask->getImm(); |
| 109 | // Determine the number of active lanes taking into account the |
| 110 | // Gather4 special case |
| 111 | unsigned ActiveLanes = |
| 112 | TII->isGather4(Opcode) ? 4 : countPopulation(dmask); |
| 113 | |
| 114 | // Subreg indices are counted from 1 |
| 115 | // When D16 then we want next whole VGPR after write data. |
| 116 | static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); |
| 117 | |
| 118 | bool Packed = !ST.hasUnpackedD16VMem(); |
| 119 | |
| 120 | unsigned InitIdx = |
| 121 | D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; |
| 122 | |
| 123 | // Abandon attempt if the dst size isn't large enough |
| 124 | // - this is in fact an error but this is picked up elsewhere and |
| 125 | // reported correctly. |
| 126 | uint32_t DstSize = |
| 127 | RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; |
| 128 | if (DstSize < InitIdx) |
| 129 | continue; |
| 130 | |
| 131 | // Create a register for the intialization value. |
| 132 | unsigned PrevDst = |
| 133 | MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); |
| 134 | unsigned NewDst = 0; // Final initialized value will be in here |
| 135 | |
| 136 | // If PRTStrictNull feature is enabled (the default) then initialize |
| 137 | // all the result registers to 0, otherwise just the error indication |
| 138 | // register (VGPRn+1) |
| 139 | unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; |
| 140 | unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; |
| 141 | |
| 142 | if (DstSize == 1) { |
| 143 | // In this case we can just initialize the result directly |
| 144 | BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) |
| 145 | .addImm(0); |
| 146 | NewDst = PrevDst; |
| 147 | } else { |
| 148 | BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); |
| 149 | for (; SizeLeft; SizeLeft--, CurrIdx++) { |
| 150 | NewDst = |
| 151 | MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); |
| 152 | // Initialize dword |
| 153 | unsigned SubReg = |
| 154 | MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
| 155 | BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) |
| 156 | .addImm(0); |
| 157 | // Insert into the super-reg |
| 158 | BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) |
| 159 | .addReg(PrevDst) |
| 160 | .addReg(SubReg) |
| 161 | .addImm(CurrIdx); |
| 162 | |
| 163 | PrevDst = NewDst; |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | // Add as an implicit operand |
| 168 | MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); |
| 169 | |
| 170 | // Tie the just added implicit operand to the dst |
| 171 | MI.tieOperands(DstIdx, MI.getNumOperands() - 1); |
| 172 | |
| 173 | Changed = true; |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | return Changed; |
| 180 | } |