| //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines an instruction selector for the RISCV target. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "RISCV.h" |
| #include "MCTargetDesc/RISCVMCTargetDesc.h" |
| #include "RISCVTargetMachine.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/SelectionDAGISel.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/MathExtras.h" |
| #include "llvm/Support/raw_ostream.h" |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "riscv-isel" |
| |
| // RISCV-specific code to select RISCV machine instructions for |
| // SelectionDAG operations. |
| namespace { |
| class RISCVDAGToDAGISel final : public SelectionDAGISel { |
| const RISCVSubtarget *Subtarget; |
| |
| public: |
| explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) |
| : SelectionDAGISel(TargetMachine) {} |
| |
| StringRef getPassName() const override { |
| return "RISCV DAG->DAG Pattern Instruction Selection"; |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override { |
| Subtarget = &MF.getSubtarget<RISCVSubtarget>(); |
| return SelectionDAGISel::runOnMachineFunction(MF); |
| } |
| |
| void PostprocessISelDAG() override; |
| |
| void Select(SDNode *Node) override; |
| |
| bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, |
| std::vector<SDValue> &OutOps) override; |
| |
| bool SelectAddrFI(SDValue Addr, SDValue &Base); |
| |
| // Include the pieces autogenerated from the target description. |
| #include "RISCVGenDAGISel.inc" |
| |
| private: |
| void doPeepholeLoadStoreADDI(); |
| void doPeepholeGlobalAddiLuiOffset(); |
| void doPeepholeBuildPairF64SplitF64(); |
| }; |
| } |
| |
| void RISCVDAGToDAGISel::PostprocessISelDAG() { |
| doPeepholeLoadStoreADDI(); |
| doPeepholeGlobalAddiLuiOffset(); |
| doPeepholeBuildPairF64SplitF64(); |
| } |
| |
| void RISCVDAGToDAGISel::Select(SDNode *Node) { |
| unsigned Opcode = Node->getOpcode(); |
| MVT XLenVT = Subtarget->getXLenVT(); |
| |
| // If we have a custom node, we have already selected |
| if (Node->isMachineOpcode()) { |
| LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); |
| Node->setNodeId(-1); |
| return; |
| } |
| |
| // Instruction Selection not handled by the auto-generated tablegen selection |
| // should be handled here. |
| EVT VT = Node->getValueType(0); |
| if (Opcode == ISD::Constant && VT == XLenVT) { |
| auto *ConstNode = cast<ConstantSDNode>(Node); |
| // Materialize zero constants as copies from X0. This allows the coalescer |
| // to propagate these into other instructions. |
| if (ConstNode->isNullValue()) { |
| SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), |
| RISCV::X0, XLenVT); |
| ReplaceNode(Node, New.getNode()); |
| return; |
| } |
| } |
| if (Opcode == ISD::FrameIndex) { |
| SDLoc DL(Node); |
| SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); |
| int FI = cast<FrameIndexSDNode>(Node)->getIndex(); |
| EVT VT = Node->getValueType(0); |
| SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); |
| ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); |
| return; |
| } |
| |
| // Select the default instruction. |
| SelectCode(Node); |
| } |
| |
| bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( |
| const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { |
| switch (ConstraintID) { |
| case InlineAsm::Constraint_i: |
| case InlineAsm::Constraint_m: |
| // We just support simple memory operands that have a single address |
| // operand and need no special handling. |
| OutOps.push_back(Op); |
| return false; |
| default: |
| break; |
| } |
| |
| return true; |
| } |
| |
| bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { |
| if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) { |
| Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); |
| return true; |
| } |
| return false; |
| } |
| |
| // Detect the pattern lui %hi(global) --> ADDI %lo(global) |
| // HiLUI LoADDI |
| static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI, |
| SDValue &HiLUI, GlobalAddressSDNode *&GAlo, |
| GlobalAddressSDNode *&GAhi) { |
| // Try to detect the pattern on every operand of the tail instruction. |
| for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) { |
| LoADDI = Tail->getOperand(Idx); |
| // LoADDI should only be used by one instruction (Tail). |
| if (!LoADDI->isMachineOpcode() || |
| !(LoADDI->getMachineOpcode() == RISCV::ADDI) || |
| !isa<GlobalAddressSDNode>(LoADDI->getOperand(1)) || |
| !LoADDI->hasOneUse()) |
| continue; |
| // Check for existence of %lo target flag. |
| GAlo = cast<GlobalAddressSDNode>(LoADDI->getOperand(1)); |
| if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) || |
| !(GAlo->getOffset() == 0)) |
| return false; |
| // Check for existence of %hi target flag. |
| HiLUI = LoADDI->getOperand(0); |
| if (!HiLUI->isMachineOpcode() || |
| !(HiLUI->getMachineOpcode() == RISCV::LUI) || |
| !isa<GlobalAddressSDNode>(HiLUI->getOperand(0)) || !HiLUI->hasOneUse()) |
| return false; |
| GAhi = cast<GlobalAddressSDNode>(HiLUI->getOperand(0)); |
| if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) || |
| !(GAhi->getOffset() == 0)) |
| return false; |
| return true; |
| } |
| return false; |
| } |
| |
| static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) { |
| if (!OffsetLUI->isMachineOpcode() || |
| !(OffsetLUI->getMachineOpcode() == RISCV::LUI) || |
| !isa<ConstantSDNode>(OffsetLUI->getOperand(0))) |
| return false; |
| Offset = cast<ConstantSDNode>(OffsetLUI->getOperand(0))->getSExtValue(); |
| Offset = Offset << 12; |
| LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n"); |
| return true; |
| } |
| |
| static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) { |
| // LoADDI should only be used by the tail instruction only. |
| if (!OffsetLoADDI->isMachineOpcode() || |
| !(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) || |
| !isa<ConstantSDNode>(OffsetLoADDI->getOperand(1)) || |
| !OffsetLoADDI->hasOneUse()) |
| return false; |
| int64_t OffLo = |
| cast<ConstantSDNode>(OffsetLoADDI->getOperand(1))->getZExtValue(); |
| // HiLUI should only be used by the loADDI. |
| SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0)); |
| if (!OffsetHiLUI->isMachineOpcode() || |
| !(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) || |
| !isa<ConstantSDNode>(OffsetHiLUI->getOperand(0)) || |
| !OffsetHiLUI->hasOneUse()) |
| return false; |
| int64_t OffHi = |
| cast<ConstantSDNode>(OffsetHiLUI->getOperand(0))->getSExtValue(); |
| Offset = (OffHi << 12) + OffLo; |
| LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n"); |
| return true; |
| } |
| |
| static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG, |
| GlobalAddressSDNode *GAhi, |
| GlobalAddressSDNode *GAlo, |
| SDValue &GlobalHiLUI, SDValue &GlobalLoADDI, |
| int64_t Offset) { |
| // Update the offset in GAhi and GAlo. |
| SDLoc DL(Tail->getOperand(1)); |
| SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL, |
| GlobalHiLUI.getValueType(), |
| Offset, RISCVII::MO_HI); |
| SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL, |
| GlobalLoADDI.getValueType(), |
| Offset, RISCVII::MO_LO); |
| CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew); |
| CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew); |
| // Update all uses of the Tail with the GlobalLoADDI. After |
| // this Tail will be a dead node. |
| SDValue From = SDValue(Tail, 0); |
| CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1); |
| } |
| |
| // TODO: This transformation might be better implemeted in a Machine Funtion |
| // Pass as discussed here: https://reviews.llvm.org/D45748. |
| // |
| // Merge the offset of address calculation into the offset field |
| // of a global address node in a global address lowering sequence ("LUI |
| // %hi(global) --> add %lo(global)") under the following conditions: 1) The |
| // offset field in the global address lowering sequence is zero. 2) The lowered |
| // global address is only used in one node, referred to as "Tail". |
| |
| // This peephole does the following transformations to merge the offset: |
| |
| // 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset |
| // ---> |
| // ADDI (LUI %hi(global + offset)) %lo(global + offset). |
| // |
| // This generates: |
| // lui a0, hi (global + offset) |
| // add a0, a0, lo (global + offset) |
| // Instead of |
| // lui a0, hi (global) |
| // addi a0, hi (global) |
| // addi a0, offset |
| // This pattern is for cases when the offset is small enough to fit in the |
| // immediate filed of ADDI (less than 12 bits). |
| |
| // 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset)) |
| // ---> |
| // offset = hi_offset << 12 |
| // ADDI (LUI %hi(global + offset)) %lo(global + offset) |
| |
| // Which generates the ASM: |
| // lui a0, hi(global + offset) |
| // addi a0, lo(global + offset) |
| // Instead of: |
| // lui a0, hi(global) |
| // addi a0, lo(global) |
| // lui a1, (offset) |
| // add a0, a0, a1 |
| |
| // This pattern is for cases when the offset doesn't fit in an immediate field |
| // of ADDI but the lower 12 bits are all zeros. |
| |
| // 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI |
| // hi_offset))) |
| // ---> |
| // ADDI (LUI %hi(global + offset)) %lo(global + offset) |
| // Which generates the ASM: |
| // lui a1, %hi(global + offhi20<<12 + offlo12) |
| // addi a1, %lo(global + offhi20<<12 + offlo12) |
| // Instead of: |
| // lui a0, hi(global) |
| // addi a0, lo(global) |
| // lui a1, (offhi20) |
| // addi a1, (offlo12) |
| // add a0, a0, a1 |
| // This pattern is for cases when the offset doesn't fit in an immediate field |
| // of ADDI and both the lower 1 bits and high 20 bits are non zero. |
| void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() { |
| SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); |
| ++Position; |
| SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin()); |
| while (Position != Begin) { |
| SDNode *Tail = &*--Position; |
| // Skip dead nodes and any non-machine opcodes. |
| if (Tail->use_empty() || !Tail->isMachineOpcode()) |
| continue; |
| // The tail instruction can be an ADD or an ADDI. |
| if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD || |
| Tail->getMachineOpcode() == RISCV::ADDI)) |
| continue; |
| // First detect the global address part of pattern: |
| // (lui %hi(global) --> Addi %lo(global)) |
| unsigned GlobalLoADDiIdx; |
| SDValue GlobalLoADDI; |
| SDValue GlobalHiLUI; |
| GlobalAddressSDNode *GAhi; |
| GlobalAddressSDNode *GAlo; |
| if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI, |
| GAlo, GAhi)) |
| continue; |
| LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n"); |
| // Detect the offset part for the address calculation by looking at the |
| // other operand of the tail instruction: |
| int64_t Offset; |
| if (Tail->getMachineOpcode() == RISCV::ADD) { |
| // If the Tail is an ADD instruction, the offset can be in two forms: |
| // 1) LUI hi_Offset followed by: |
| // ADDI lo_offset |
| // This happens in case the offset has non zero bits in |
| // both hi 20 and lo 12 bits. |
| // 2) LUI (offset20) |
| // This happens in case the lower 12 bits of the offset are zeros. |
| SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx); |
| if (!matchAddiLuiOffset(OffsetVal, Offset) && |
| !matchLuiOffset(OffsetVal, Offset)) |
| continue; |
| } else |
| // The Tail is an ADDI instruction: |
| Offset = cast<ConstantSDNode>(Tail->getOperand(1 - GlobalLoADDiIdx)) |
| ->getSExtValue(); |
| |
| LLVM_DEBUG( |
| dbgs() |
| << " Fold offset value into global offset of LUI %hi and ADDI %lo\n"); |
| LLVM_DEBUG(dbgs() << "\tTail:"); |
| LLVM_DEBUG(Tail->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:"); |
| LLVM_DEBUG(GlobalHiLUI->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:"); |
| LLVM_DEBUG(GlobalLoADDI->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "\n"); |
| updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI, |
| Offset); |
| } |
| CurDAG->RemoveDeadNodes(); |
| } |
| |
| // Merge an ADDI into the offset of a load/store instruction where possible. |
| // (load (add base, off), 0) -> (load base, off) |
| // (store val, (add base, off)) -> (store val, base, off) |
| void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() { |
| SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); |
| ++Position; |
| |
| while (Position != CurDAG->allnodes_begin()) { |
| SDNode *N = &*--Position; |
| // Skip dead nodes and any non-machine opcodes. |
| if (N->use_empty() || !N->isMachineOpcode()) |
| continue; |
| |
| int OffsetOpIdx; |
| int BaseOpIdx; |
| |
| // Only attempt this optimisation for I-type loads and S-type stores. |
| switch (N->getMachineOpcode()) { |
| default: |
| continue; |
| case RISCV::LB: |
| case RISCV::LH: |
| case RISCV::LW: |
| case RISCV::LBU: |
| case RISCV::LHU: |
| case RISCV::LWU: |
| case RISCV::LD: |
| case RISCV::FLW: |
| case RISCV::FLD: |
| BaseOpIdx = 0; |
| OffsetOpIdx = 1; |
| break; |
| case RISCV::SB: |
| case RISCV::SH: |
| case RISCV::SW: |
| case RISCV::SD: |
| case RISCV::FSW: |
| case RISCV::FSD: |
| BaseOpIdx = 1; |
| OffsetOpIdx = 2; |
| break; |
| } |
| |
| // Currently, the load/store offset must be 0 to be considered for this |
| // peephole optimisation. |
| if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) || |
| N->getConstantOperandVal(OffsetOpIdx) != 0) |
| continue; |
| |
| SDValue Base = N->getOperand(BaseOpIdx); |
| |
| // If the base is an ADDI, we can merge it in to the load/store. |
| if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) |
| continue; |
| |
| SDValue ImmOperand = Base.getOperand(1); |
| |
| if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) { |
| ImmOperand = CurDAG->getTargetConstant( |
| Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType()); |
| } else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { |
| ImmOperand = CurDAG->getTargetGlobalAddress( |
| GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), |
| GA->getOffset(), GA->getTargetFlags()); |
| } else { |
| continue; |
| } |
| |
| LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); |
| LLVM_DEBUG(Base->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "\nN: "); |
| LLVM_DEBUG(N->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "\n"); |
| |
| // Modify the offset operand of the load/store. |
| if (BaseOpIdx == 0) // Load |
| CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, |
| N->getOperand(2)); |
| else // Store |
| CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), |
| ImmOperand, N->getOperand(3)); |
| |
| // The add-immediate may now be dead, in which case remove it. |
| if (Base.getNode()->use_empty()) |
| CurDAG->RemoveDeadNode(Base.getNode()); |
| } |
| } |
| |
| // Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is |
| // built of two i32 values, only to be split apart again. This must be done |
| // here as a peephole optimisation as the DAG has not been fully legalized at |
| // the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so |
| // some nodes would not yet have been replaced with libcalls. |
| void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() { |
| SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); |
| ++Position; |
| |
| while (Position != CurDAG->allnodes_begin()) { |
| SDNode *N = &*--Position; |
| // Skip dead nodes and any nodes other than SplitF64Pseudo. |
| if (N->use_empty() || !N->isMachineOpcode() || |
| !(N->getMachineOpcode() == RISCV::SplitF64Pseudo)) |
| continue; |
| |
| // If the operand to SplitF64 is a BuildPairF64, the split operation is |
| // redundant. Just use the operands to BuildPairF64 as the result. |
| SDValue F64Val = N->getOperand(0); |
| if (F64Val.isMachineOpcode() && |
| F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) { |
| LLVM_DEBUG( |
| dbgs() << "Removing redundant SplitF64Pseudo and replacing uses " |
| "with BuildPairF64Pseudo operands:\n"); |
| LLVM_DEBUG(dbgs() << "N: "); |
| LLVM_DEBUG(N->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "F64Val: "); |
| LLVM_DEBUG(F64Val->dump(CurDAG)); |
| LLVM_DEBUG(dbgs() << "\n"); |
| SDValue From[] = {SDValue(N, 0), SDValue(N, 1)}; |
| SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)}; |
| CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); |
| } |
| } |
| CurDAG->RemoveDeadNodes(); |
| } |
| |
| // This pass converts a legalized DAG into a RISCV-specific DAG, ready |
| // for instruction scheduling. |
| FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { |
| return new RISCVDAGToDAGISel(TM); |
| } |