|  | //===- AArch64SLSHardening.cpp - Harden Straight Line Missspeculation -----===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file contains a pass to insert code to mitigate against side channel | 
|  | // vulnerabilities that may happen under straight line miss-speculation. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AArch64InstrInfo.h" | 
|  | #include "AArch64Subtarget.h" | 
|  | #include "Utils/AArch64BaseInfo.h" | 
|  | #include "llvm/ADT/BitVector.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/CodeGen/IndirectThunks.h" | 
|  | #include "llvm/CodeGen/MachineBasicBlock.h" | 
|  | #include "llvm/CodeGen/MachineFunction.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/MachineInstr.h" | 
|  | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | #include "llvm/CodeGen/MachineOperand.h" | 
|  | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | #include "llvm/CodeGen/RegisterScavenging.h" | 
|  | #include "llvm/IR/DebugLoc.h" | 
|  | #include "llvm/Pass.h" | 
|  | #include "llvm/Support/CodeGen.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  | #include <cassert> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "aarch64-sls-hardening" | 
|  |  | 
|  | #define AARCH64_SLS_HARDENING_NAME "AArch64 sls hardening pass" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class AArch64SLSHardening : public MachineFunctionPass { | 
|  | public: | 
|  | const TargetInstrInfo *TII; | 
|  | const TargetRegisterInfo *TRI; | 
|  | const AArch64Subtarget *ST; | 
|  |  | 
|  | static char ID; | 
|  |  | 
|  | AArch64SLSHardening() : MachineFunctionPass(ID) { | 
|  | initializeAArch64SLSHardeningPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &Fn) override; | 
|  |  | 
|  | StringRef getPassName() const override { return AARCH64_SLS_HARDENING_NAME; } | 
|  |  | 
|  | private: | 
|  | bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const; | 
|  | bool hardenBLRs(MachineBasicBlock &MBB) const; | 
|  | MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator) const; | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | char AArch64SLSHardening::ID = 0; | 
|  |  | 
|  | INITIALIZE_PASS(AArch64SLSHardening, "aarch64-sls-hardening", | 
|  | AARCH64_SLS_HARDENING_NAME, false, false) | 
|  |  | 
|  | static void insertSpeculationBarrier(const AArch64Subtarget *ST, | 
|  | MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator MBBI, | 
|  | DebugLoc DL, | 
|  | bool AlwaysUseISBDSB = false) { | 
|  | assert(MBBI != MBB.begin() && | 
|  | "Must not insert SpeculationBarrierEndBB as only instruction in MBB."); | 
|  | assert(std::prev(MBBI)->isBarrier() && | 
|  | "SpeculationBarrierEndBB must only follow unconditional control flow " | 
|  | "instructions."); | 
|  | assert(std::prev(MBBI)->isTerminator() && | 
|  | "SpeculationBarrierEndBB must only follow terminators."); | 
|  | const TargetInstrInfo *TII = ST->getInstrInfo(); | 
|  | unsigned BarrierOpc = ST->hasSB() && !AlwaysUseISBDSB | 
|  | ? AArch64::SpeculationBarrierSBEndBB | 
|  | : AArch64::SpeculationBarrierISBDSBEndBB; | 
|  | if (MBBI == MBB.end() || | 
|  | (MBBI->getOpcode() != AArch64::SpeculationBarrierSBEndBB && | 
|  | MBBI->getOpcode() != AArch64::SpeculationBarrierISBDSBEndBB)) | 
|  | BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc)); | 
|  | } | 
|  |  | 
|  | bool AArch64SLSHardening::runOnMachineFunction(MachineFunction &MF) { | 
|  | ST = &MF.getSubtarget<AArch64Subtarget>(); | 
|  | TII = MF.getSubtarget().getInstrInfo(); | 
|  | TRI = MF.getSubtarget().getRegisterInfo(); | 
|  |  | 
|  | bool Modified = false; | 
|  | for (auto &MBB : MF) { | 
|  | Modified |= hardenReturnsAndBRs(MBB); | 
|  | Modified |= hardenBLRs(MBB); | 
|  | } | 
|  |  | 
|  | return Modified; | 
|  | } | 
|  |  | 
|  | static bool isBLR(const MachineInstr &MI) { | 
|  | switch (MI.getOpcode()) { | 
|  | case AArch64::BLR: | 
|  | case AArch64::BLRNoIP: | 
|  | return true; | 
|  | case AArch64::BLRAA: | 
|  | case AArch64::BLRAB: | 
|  | case AArch64::BLRAAZ: | 
|  | case AArch64::BLRABZ: | 
|  | llvm_unreachable("Currently, LLVM's code generator does not support " | 
|  | "producing BLRA* instructions. Therefore, there's no " | 
|  | "support in this pass for those instructions."); | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool AArch64SLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const { | 
|  | if (!ST->hardenSlsRetBr()) | 
|  | return false; | 
|  | bool Modified = false; | 
|  | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end(); | 
|  | MachineBasicBlock::iterator NextMBBI; | 
|  | for (; MBBI != E; MBBI = NextMBBI) { | 
|  | MachineInstr &MI = *MBBI; | 
|  | NextMBBI = std::next(MBBI); | 
|  | if (MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode())) { | 
|  | assert(MI.isTerminator()); | 
|  | insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc()); | 
|  | Modified = true; | 
|  | } | 
|  | } | 
|  | return Modified; | 
|  | } | 
|  |  | 
|  | static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_"; | 
|  |  | 
|  | static const struct ThunkNameAndReg { | 
|  | const char* Name; | 
|  | Register Reg; | 
|  | } SLSBLRThunks[] = { | 
|  | { "__llvm_slsblr_thunk_x0",  AArch64::X0}, | 
|  | { "__llvm_slsblr_thunk_x1",  AArch64::X1}, | 
|  | { "__llvm_slsblr_thunk_x2",  AArch64::X2}, | 
|  | { "__llvm_slsblr_thunk_x3",  AArch64::X3}, | 
|  | { "__llvm_slsblr_thunk_x4",  AArch64::X4}, | 
|  | { "__llvm_slsblr_thunk_x5",  AArch64::X5}, | 
|  | { "__llvm_slsblr_thunk_x6",  AArch64::X6}, | 
|  | { "__llvm_slsblr_thunk_x7",  AArch64::X7}, | 
|  | { "__llvm_slsblr_thunk_x8",  AArch64::X8}, | 
|  | { "__llvm_slsblr_thunk_x9",  AArch64::X9}, | 
|  | { "__llvm_slsblr_thunk_x10",  AArch64::X10}, | 
|  | { "__llvm_slsblr_thunk_x11",  AArch64::X11}, | 
|  | { "__llvm_slsblr_thunk_x12",  AArch64::X12}, | 
|  | { "__llvm_slsblr_thunk_x13",  AArch64::X13}, | 
|  | { "__llvm_slsblr_thunk_x14",  AArch64::X14}, | 
|  | { "__llvm_slsblr_thunk_x15",  AArch64::X15}, | 
|  | // X16 and X17 are deliberately missing, as the mitigation requires those | 
|  | // register to not be used in BLR. See comment in ConvertBLRToBL for more | 
|  | // details. | 
|  | { "__llvm_slsblr_thunk_x18",  AArch64::X18}, | 
|  | { "__llvm_slsblr_thunk_x19",  AArch64::X19}, | 
|  | { "__llvm_slsblr_thunk_x20",  AArch64::X20}, | 
|  | { "__llvm_slsblr_thunk_x21",  AArch64::X21}, | 
|  | { "__llvm_slsblr_thunk_x22",  AArch64::X22}, | 
|  | { "__llvm_slsblr_thunk_x23",  AArch64::X23}, | 
|  | { "__llvm_slsblr_thunk_x24",  AArch64::X24}, | 
|  | { "__llvm_slsblr_thunk_x25",  AArch64::X25}, | 
|  | { "__llvm_slsblr_thunk_x26",  AArch64::X26}, | 
|  | { "__llvm_slsblr_thunk_x27",  AArch64::X27}, | 
|  | { "__llvm_slsblr_thunk_x28",  AArch64::X28}, | 
|  | { "__llvm_slsblr_thunk_x29",  AArch64::FP}, | 
|  | // X30 is deliberately missing, for similar reasons as X16 and X17 are | 
|  | // missing. | 
|  | { "__llvm_slsblr_thunk_x31",  AArch64::XZR}, | 
|  | }; | 
|  |  | 
|  | namespace { | 
|  | struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> { | 
|  | const char *getThunkPrefix() { return SLSBLRNamePrefix; } | 
|  | bool mayUseThunk(const MachineFunction &MF) { | 
|  | // FIXME: This could also check if there are any BLRs in the function | 
|  | // to more accurately reflect if a thunk will be needed. | 
|  | return MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr(); | 
|  | } | 
|  | void insertThunks(MachineModuleInfo &MMI); | 
|  | void populateThunk(MachineFunction &MF); | 
|  | }; | 
|  | } // namespace | 
|  |  | 
|  | void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) { | 
|  | // FIXME: It probably would be possible to filter which thunks to produce | 
|  | // based on which registers are actually used in BLR instructions in this | 
|  | // function. But would that be a worthwhile optimization? | 
|  | for (auto T : SLSBLRThunks) | 
|  | createThunkFunction(MMI, T.Name); | 
|  | } | 
|  |  | 
|  | void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { | 
|  | // FIXME: How to better communicate Register number, rather than through | 
|  | // name and lookup table? | 
|  | assert(MF.getName().startswith(getThunkPrefix())); | 
|  | auto ThunkIt = llvm::find_if( | 
|  | SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); }); | 
|  | assert(ThunkIt != std::end(SLSBLRThunks)); | 
|  | Register ThunkReg = ThunkIt->Reg; | 
|  |  | 
|  | const TargetInstrInfo *TII = | 
|  | MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); | 
|  | assert (MF.size() == 1); | 
|  | MachineBasicBlock *Entry = &MF.front(); | 
|  | Entry->clear(); | 
|  |  | 
|  | //  These thunks need to consist of the following instructions: | 
|  | //  __llvm_slsblr_thunk_xN: | 
|  | //      BR xN | 
|  | //      barrierInsts | 
|  | Entry->addLiveIn(ThunkReg); | 
|  | // MOV X16, ThunkReg == ORR X16, XZR, ThunkReg, LSL #0 | 
|  | BuildMI(Entry, DebugLoc(), TII->get(AArch64::ORRXrs), AArch64::X16) | 
|  | .addReg(AArch64::XZR) | 
|  | .addReg(ThunkReg) | 
|  | .addImm(0); | 
|  | BuildMI(Entry, DebugLoc(), TII->get(AArch64::BR)).addReg(AArch64::X16); | 
|  | // Make sure the thunks do not make use of the SB extension in case there is | 
|  | // a function somewhere that will call to it that for some reason disabled | 
|  | // the SB extension locally on that function, even though it's enabled for | 
|  | // the module otherwise. Therefore set AlwaysUseISBSDB to true. | 
|  | insertSpeculationBarrier(&MF.getSubtarget<AArch64Subtarget>(), *Entry, | 
|  | Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/); | 
|  | } | 
|  |  | 
|  | MachineBasicBlock & | 
|  | AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB, | 
|  | MachineBasicBlock::iterator MBBI) const { | 
|  | // Transform a BLR to a BL as follows: | 
|  | // Before: | 
|  | //   |-----------------------------| | 
|  | //   |      ...                    | | 
|  | //   |  instI                      | | 
|  | //   |  BLR xN                     | | 
|  | //   |  instJ                      | | 
|  | //   |      ...                    | | 
|  | //   |-----------------------------| | 
|  | // | 
|  | // After: | 
|  | //   |-----------------------------| | 
|  | //   |      ...                    | | 
|  | //   |  instI                      | | 
|  | //   |  BL __llvm_slsblr_thunk_xN  | | 
|  | //   |  instJ                      | | 
|  | //   |      ...                    | | 
|  | //   |-----------------------------| | 
|  | // | 
|  | //   __llvm_slsblr_thunk_xN: | 
|  | //   |-----------------------------| | 
|  | //   |  BR xN                      | | 
|  | //   |  barrierInsts               | | 
|  | //   |-----------------------------| | 
|  | // | 
|  | // The __llvm_slsblr_thunk_xN thunks are created by the SLSBLRThunkInserter. | 
|  | // This function merely needs to transform BLR xN into BL | 
|  | // __llvm_slsblr_thunk_xN. | 
|  | // | 
|  | // Since linkers are allowed to clobber X16 and X17 on function calls, the | 
|  | // above mitigation only works if the original BLR instruction was not | 
|  | // BLR X16 nor BLR X17. Code generation before must make sure that no BLR | 
|  | // X16|X17 was produced if the mitigation is enabled. | 
|  |  | 
|  | MachineInstr &BLR = *MBBI; | 
|  | assert(isBLR(BLR)); | 
|  | unsigned BLOpcode; | 
|  | Register Reg; | 
|  | bool RegIsKilled; | 
|  | switch (BLR.getOpcode()) { | 
|  | case AArch64::BLR: | 
|  | case AArch64::BLRNoIP: | 
|  | BLOpcode = AArch64::BL; | 
|  | Reg = BLR.getOperand(0).getReg(); | 
|  | assert(Reg != AArch64::X16 && Reg != AArch64::X17 && Reg != AArch64::LR); | 
|  | RegIsKilled = BLR.getOperand(0).isKill(); | 
|  | break; | 
|  | case AArch64::BLRAA: | 
|  | case AArch64::BLRAB: | 
|  | case AArch64::BLRAAZ: | 
|  | case AArch64::BLRABZ: | 
|  | llvm_unreachable("BLRA instructions cannot yet be produced by LLVM, " | 
|  | "therefore there is no need to support them for now."); | 
|  | default: | 
|  | llvm_unreachable("unhandled BLR"); | 
|  | } | 
|  | DebugLoc DL = BLR.getDebugLoc(); | 
|  |  | 
|  | // If we'd like to support also BLRAA and BLRAB instructions, we'd need | 
|  | // a lot more different kind of thunks. | 
|  | // For example, a | 
|  | // | 
|  | // BLRAA xN, xM | 
|  | // | 
|  | // instruction probably would need to be transformed to something like: | 
|  | // | 
|  | // BL __llvm_slsblraa_thunk_x<N>_x<M> | 
|  | // | 
|  | // __llvm_slsblraa_thunk_x<N>_x<M>: | 
|  | //   BRAA x<N>, x<M> | 
|  | //   barrierInsts | 
|  | // | 
|  | // Given that about 30 different values of N are possible and about 30 | 
|  | // different values of M are possible in the above, with the current way | 
|  | // of producing indirect thunks, we'd be producing about 30 times 30, i.e. | 
|  | // about 900 thunks (where most might not be actually called). This would | 
|  | // multiply further by two to support both BLRAA and BLRAB variants of those | 
|  | // instructions. | 
|  | // If we'd want to support this, we'd probably need to look into a different | 
|  | // way to produce thunk functions, based on which variants are actually | 
|  | // needed, rather than producing all possible variants. | 
|  | // So far, LLVM does never produce BLRA* instructions, so let's leave this | 
|  | // for the future when LLVM can start producing BLRA* instructions. | 
|  | MachineFunction &MF = *MBBI->getMF(); | 
|  | MCContext &Context = MBB.getParent()->getContext(); | 
|  | auto ThunkIt = | 
|  | llvm::find_if(SLSBLRThunks, [Reg](auto T) { return T.Reg == Reg; }); | 
|  | assert (ThunkIt != std::end(SLSBLRThunks)); | 
|  | MCSymbol *Sym = Context.getOrCreateSymbol(ThunkIt->Name); | 
|  |  | 
|  | MachineInstr *BL = BuildMI(MBB, MBBI, DL, TII->get(BLOpcode)).addSym(Sym); | 
|  |  | 
|  | // Now copy the implicit operands from BLR to BL and copy other necessary | 
|  | // info. | 
|  | // However, both BLR and BL instructions implictly use SP and implicitly | 
|  | // define LR. Blindly copying implicit operands would result in SP and LR | 
|  | // operands to be present multiple times. While this may not be too much of | 
|  | // an issue, let's avoid that for cleanliness, by removing those implicit | 
|  | // operands from the BL created above before we copy over all implicit | 
|  | // operands from the BLR. | 
|  | int ImpLROpIdx = -1; | 
|  | int ImpSPOpIdx = -1; | 
|  | for (unsigned OpIdx = BL->getNumExplicitOperands(); | 
|  | OpIdx < BL->getNumOperands(); OpIdx++) { | 
|  | MachineOperand Op = BL->getOperand(OpIdx); | 
|  | if (!Op.isReg()) | 
|  | continue; | 
|  | if (Op.getReg() == AArch64::LR && Op.isDef()) | 
|  | ImpLROpIdx = OpIdx; | 
|  | if (Op.getReg() == AArch64::SP && !Op.isDef()) | 
|  | ImpSPOpIdx = OpIdx; | 
|  | } | 
|  | assert(ImpLROpIdx != -1); | 
|  | assert(ImpSPOpIdx != -1); | 
|  | int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx); | 
|  | int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx); | 
|  | BL->RemoveOperand(FirstOpIdxToRemove); | 
|  | BL->RemoveOperand(SecondOpIdxToRemove); | 
|  | // Now copy over the implicit operands from the original BLR | 
|  | BL->copyImplicitOps(MF, BLR); | 
|  | MF.moveCallSiteInfo(&BLR, BL); | 
|  | // Also add the register called in the BLR as being used in the called thunk. | 
|  | BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/, | 
|  | RegIsKilled /*isKill*/)); | 
|  | // Remove BLR instruction | 
|  | MBB.erase(MBBI); | 
|  |  | 
|  | return MBB; | 
|  | } | 
|  |  | 
|  | bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const { | 
|  | if (!ST->hardenSlsBlr()) | 
|  | return false; | 
|  | bool Modified = false; | 
|  | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); | 
|  | MachineBasicBlock::iterator NextMBBI; | 
|  | for (; MBBI != E; MBBI = NextMBBI) { | 
|  | MachineInstr &MI = *MBBI; | 
|  | NextMBBI = std::next(MBBI); | 
|  | if (isBLR(MI)) { | 
|  | ConvertBLRToBL(MBB, MBBI); | 
|  | Modified = true; | 
|  | } | 
|  | } | 
|  | return Modified; | 
|  | } | 
|  |  | 
|  | FunctionPass *llvm::createAArch64SLSHardeningPass() { | 
|  | return new AArch64SLSHardening(); | 
|  | } | 
|  |  | 
|  | namespace { | 
|  | class AArch64IndirectThunks : public MachineFunctionPass { | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | AArch64IndirectThunks() : MachineFunctionPass(ID) {} | 
|  |  | 
|  | StringRef getPassName() const override { return "AArch64 Indirect Thunks"; } | 
|  |  | 
|  | bool doInitialization(Module &M) override; | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | AU.addRequired<MachineModuleInfoWrapperPass>(); | 
|  | AU.addPreserved<MachineModuleInfoWrapperPass>(); | 
|  | } | 
|  |  | 
|  | private: | 
|  | std::tuple<SLSBLRThunkInserter> TIs; | 
|  |  | 
|  | // FIXME: When LLVM moves to C++17, these can become folds | 
|  | template <typename... ThunkInserterT> | 
|  | static void initTIs(Module &M, | 
|  | std::tuple<ThunkInserterT...> &ThunkInserters) { | 
|  | (void)std::initializer_list<int>{ | 
|  | (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; | 
|  | } | 
|  | template <typename... ThunkInserterT> | 
|  | static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, | 
|  | std::tuple<ThunkInserterT...> &ThunkInserters) { | 
|  | bool Modified = false; | 
|  | (void)std::initializer_list<int>{ | 
|  | Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; | 
|  | return Modified; | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | char AArch64IndirectThunks::ID = 0; | 
|  |  | 
|  | FunctionPass *llvm::createAArch64IndirectThunks() { | 
|  | return new AArch64IndirectThunks(); | 
|  | } | 
|  |  | 
|  | bool AArch64IndirectThunks::doInitialization(Module &M) { | 
|  | initTIs(M, TIs); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool AArch64IndirectThunks::runOnMachineFunction(MachineFunction &MF) { | 
|  | LLVM_DEBUG(dbgs() << getPassName() << '\n'); | 
|  | auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); | 
|  | return runTIs(MMI, MF, TIs); | 
|  | } |