Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 1 | //===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file contains a pass (at IR level) to replace atomic instructions which |
| 11 | // cannot be implemented as a single instruction with cmpxchg-based loops. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "X86.h" |
| 16 | #include "X86TargetMachine.h" |
| 17 | #include "llvm/CodeGen/Passes.h" |
| 18 | #include "llvm/IR/Function.h" |
| 19 | #include "llvm/IR/IRBuilder.h" |
| 20 | #include "llvm/IR/Instructions.h" |
| 21 | #include "llvm/IR/Intrinsics.h" |
| 22 | #include "llvm/IR/Module.h" |
| 23 | #include "llvm/Support/Debug.h" |
| 24 | #include "llvm/Target/TargetLowering.h" |
| 25 | #include "llvm/Target/TargetMachine.h" |
| 26 | using namespace llvm; |
| 27 | |
| 28 | #define DEBUG_TYPE "x86-atomic-expand" |
| 29 | |
| 30 | namespace { |
| 31 | class X86AtomicExpandPass : public FunctionPass { |
| 32 | const X86TargetMachine *TM; |
| 33 | public: |
| 34 | static char ID; // Pass identification, replacement for typeid |
| 35 | explicit X86AtomicExpandPass(const X86TargetMachine *TM) |
| 36 | : FunctionPass(ID), TM(TM) {} |
| 37 | |
| 38 | bool runOnFunction(Function &F) override; |
| 39 | bool expandAtomicInsts(Function &F); |
| 40 | |
| 41 | bool needsCmpXchgNb(Type *MemType); |
| 42 | |
| 43 | /// There are four kinds of atomic operations. Two never need expanding: |
| 44 | /// cmpxchg is what we expand the others *to*, and loads are easily handled |
| 45 | /// by ISelLowering. Atomicrmw and store can need expanding in some |
| 46 | /// circumstances. |
| 47 | bool shouldExpand(Instruction *Inst); |
| 48 | |
| 49 | /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms |
| 50 | /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic. |
| 51 | bool shouldExpandStore(StoreInst *SI); |
| 52 | |
| 53 | /// Only some atomicrmw instructions need expanding -- some operations |
| 54 | /// (e.g. max) have absolutely no architectural support; some (e.g. or) have |
| 55 | /// limited support but can't return the previous value; some (e.g. add) |
| 56 | /// have complete support in the instruction set. |
| 57 | /// |
| 58 | /// Also, naturally, 128-bit operations always need to be expanded. |
| 59 | bool shouldExpandAtomicRMW(AtomicRMWInst *AI); |
| 60 | |
| 61 | bool expandAtomicRMW(AtomicRMWInst *AI); |
| 62 | bool expandAtomicStore(StoreInst *SI); |
| 63 | }; |
| 64 | } |
| 65 | |
| 66 | char X86AtomicExpandPass::ID = 0; |
| 67 | |
| 68 | FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) { |
| 69 | return new X86AtomicExpandPass(TM); |
| 70 | } |
| 71 | |
| 72 | bool X86AtomicExpandPass::runOnFunction(Function &F) { |
| 73 | SmallVector<Instruction *, 1> AtomicInsts; |
| 74 | |
| 75 | // Changing control-flow while iterating through it is a bad idea, so gather a |
| 76 | // list of all atomic instructions before we start. |
| 77 | for (BasicBlock &BB : F) |
| 78 | for (Instruction &Inst : BB) { |
| 79 | if (isa<AtomicRMWInst>(&Inst) || |
| 80 | (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) |
| 81 | AtomicInsts.push_back(&Inst); |
| 82 | } |
| 83 | |
| 84 | bool MadeChange = false; |
| 85 | for (Instruction *Inst : AtomicInsts) { |
| 86 | if (!shouldExpand(Inst)) |
| 87 | continue; |
| 88 | |
| 89 | if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) |
| 90 | MadeChange |= expandAtomicRMW(AI); |
| 91 | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) |
| 92 | MadeChange |= expandAtomicStore(SI); |
Tim Northover | 334d8eeb | 2014-07-01 22:10:30 +0000 | [diff] [blame] | 93 | |
| 94 | assert(MadeChange && "Atomic inst not expanded when it should be?"); |
| 95 | Inst->eraseFromParent(); |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 96 | } |
| 97 | |
| 98 | return MadeChange; |
| 99 | } |
| 100 | |
Saleem Abdulrasool | b51d464 | 2014-07-14 16:28:13 +0000 | [diff] [blame] | 101 | /// Returns true if the operand type is 1 step up from the native width, and |
| 102 | /// the corresponding cmpxchg8b or cmpxchg16b instruction is available |
| 103 | /// (otherwise we leave them alone to become __sync_fetch_and_... calls). |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 104 | bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) { |
| 105 | const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 106 | unsigned OpWidth = MemType->getPrimitiveSizeInBits(); |
Saleem Abdulrasool | b51d464 | 2014-07-14 16:28:13 +0000 | [diff] [blame] | 107 | |
| 108 | if (OpWidth == 64) |
| 109 | return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b |
| 110 | if (OpWidth == 128) |
| 111 | return Subtarget.hasCmpxchg16b(); |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 112 | |
| 113 | return false; |
| 114 | } |
| 115 | |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 116 | bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) { |
| 117 | const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); |
| 118 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
| 119 | |
| 120 | if (needsCmpXchgNb(AI->getType())) |
| 121 | return true; |
| 122 | |
| 123 | if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth) |
| 124 | return false; |
| 125 | |
| 126 | AtomicRMWInst::BinOp Op = AI->getOperation(); |
| 127 | switch (Op) { |
| 128 | default: |
| 129 | llvm_unreachable("Unknown atomic operation"); |
| 130 | case AtomicRMWInst::Xchg: |
| 131 | case AtomicRMWInst::Add: |
| 132 | case AtomicRMWInst::Sub: |
| 133 | // It's better to use xadd, xsub or xchg for these in all cases. |
| 134 | return false; |
| 135 | case AtomicRMWInst::Or: |
| 136 | case AtomicRMWInst::And: |
| 137 | case AtomicRMWInst::Xor: |
| 138 | // If the atomicrmw's result isn't actually used, we can just add a "lock" |
| 139 | // prefix to a normal instruction for these operations. |
| 140 | return !AI->use_empty(); |
| 141 | case AtomicRMWInst::Nand: |
| 142 | case AtomicRMWInst::Max: |
| 143 | case AtomicRMWInst::Min: |
| 144 | case AtomicRMWInst::UMax: |
| 145 | case AtomicRMWInst::UMin: |
| 146 | // These always require a non-trivial set of data operations on x86. We must |
| 147 | // use a cmpxchg loop. |
| 148 | return true; |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) { |
| 153 | if (needsCmpXchgNb(SI->getValueOperand()->getType())) |
| 154 | return true; |
| 155 | |
| 156 | return false; |
| 157 | } |
| 158 | |
| 159 | bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) { |
| 160 | if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) |
| 161 | return shouldExpandAtomicRMW(AI); |
| 162 | if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) |
| 163 | return shouldExpandStore(SI); |
| 164 | return false; |
| 165 | } |
| 166 | |
| 167 | /// Emit IR to implement the given atomicrmw operation on values in registers, |
| 168 | /// returning the new value. |
| 169 | static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, |
| 170 | Value *Loaded, Value *Inc) { |
| 171 | Value *NewVal; |
| 172 | switch (Op) { |
| 173 | case AtomicRMWInst::Xchg: |
| 174 | return Inc; |
| 175 | case AtomicRMWInst::Add: |
| 176 | return Builder.CreateAdd(Loaded, Inc, "new"); |
| 177 | case AtomicRMWInst::Sub: |
| 178 | return Builder.CreateSub(Loaded, Inc, "new"); |
| 179 | case AtomicRMWInst::And: |
| 180 | return Builder.CreateAnd(Loaded, Inc, "new"); |
| 181 | case AtomicRMWInst::Nand: |
| 182 | return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); |
| 183 | case AtomicRMWInst::Or: |
| 184 | return Builder.CreateOr(Loaded, Inc, "new"); |
| 185 | case AtomicRMWInst::Xor: |
| 186 | return Builder.CreateXor(Loaded, Inc, "new"); |
| 187 | case AtomicRMWInst::Max: |
| 188 | NewVal = Builder.CreateICmpSGT(Loaded, Inc); |
| 189 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); |
| 190 | case AtomicRMWInst::Min: |
| 191 | NewVal = Builder.CreateICmpSLE(Loaded, Inc); |
| 192 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); |
| 193 | case AtomicRMWInst::UMax: |
| 194 | NewVal = Builder.CreateICmpUGT(Loaded, Inc); |
| 195 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); |
| 196 | case AtomicRMWInst::UMin: |
| 197 | NewVal = Builder.CreateICmpULE(Loaded, Inc); |
| 198 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); |
| 199 | default: |
| 200 | break; |
| 201 | } |
| 202 | llvm_unreachable("Unknown atomic op"); |
| 203 | } |
| 204 | |
| 205 | bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { |
| 206 | AtomicOrdering Order = |
| 207 | AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); |
| 208 | Value *Addr = AI->getPointerOperand(); |
| 209 | BasicBlock *BB = AI->getParent(); |
| 210 | Function *F = BB->getParent(); |
| 211 | LLVMContext &Ctx = F->getContext(); |
| 212 | |
| 213 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering |
| 214 | // |
| 215 | // The standard expansion we produce is: |
| 216 | // [...] |
| 217 | // %init_loaded = load atomic iN* %addr |
| 218 | // br label %loop |
| 219 | // loop: |
| 220 | // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] |
| 221 | // %new = some_op iN %loaded, %incr |
| 222 | // %pair = cmpxchg iN* %addr, iN %loaded, iN %new |
| 223 | // %new_loaded = extractvalue { iN, i1 } %pair, 0 |
| 224 | // %success = extractvalue { iN, i1 } %pair, 1 |
| 225 | // br i1 %success, label %atomicrmw.end, label %loop |
| 226 | // atomicrmw.end: |
| 227 | // [...] |
| 228 | BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); |
| 229 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); |
| 230 | |
| 231 | // This grabs the DebugLoc from AI. |
| 232 | IRBuilder<> Builder(AI); |
| 233 | |
| 234 | // The split call above "helpfully" added a branch at the end of BB (to the |
| 235 | // wrong place), but we want a load. It's easiest to just remove |
| 236 | // the branch entirely. |
| 237 | std::prev(BB->end())->eraseFromParent(); |
| 238 | Builder.SetInsertPoint(BB); |
| 239 | LoadInst *InitLoaded = Builder.CreateLoad(Addr); |
| 240 | InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); |
| 241 | Builder.CreateBr(LoopBB); |
| 242 | |
| 243 | // Start the main loop block now that we've taken care of the preliminaries. |
| 244 | Builder.SetInsertPoint(LoopBB); |
| 245 | PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); |
| 246 | Loaded->addIncoming(InitLoaded, BB); |
| 247 | |
| 248 | Value *NewVal = |
| 249 | performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); |
| 250 | |
| 251 | Value *Pair = Builder.CreateAtomicCmpXchg( |
| 252 | Addr, Loaded, NewVal, Order, |
| 253 | AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); |
| 254 | Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); |
| 255 | Loaded->addIncoming(NewLoaded, LoopBB); |
| 256 | |
| 257 | Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); |
| 258 | Builder.CreateCondBr(Success, ExitBB, LoopBB); |
| 259 | |
| 260 | AI->replaceAllUsesWith(NewLoaded); |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 261 | |
| 262 | return true; |
| 263 | } |
| 264 | |
| 265 | bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) { |
| 266 | // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express |
| 267 | // this in terms of the usual expansion to "atomicrmw xchg". |
| 268 | IRBuilder<> Builder(SI); |
Tim Northover | 334d8eeb | 2014-07-01 22:10:30 +0000 | [diff] [blame] | 269 | AtomicOrdering Order = |
| 270 | SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering(); |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 271 | AtomicRMWInst *AI = |
| 272 | Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), |
Tim Northover | 334d8eeb | 2014-07-01 22:10:30 +0000 | [diff] [blame] | 273 | SI->getValueOperand(), Order); |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 274 | |
| 275 | // Now we have an appropriate swap instruction, lower it as usual. |
Tim Northover | 6c647ea | 2014-07-14 15:31:13 +0000 | [diff] [blame] | 276 | if (shouldExpandAtomicRMW(AI)) { |
| 277 | expandAtomicRMW(AI); |
| 278 | AI->eraseFromParent(); |
| 279 | return true; |
| 280 | } |
Tim Northover | 277066a | 2014-07-01 18:53:31 +0000 | [diff] [blame] | 281 | |
| 282 | return AI; |
| 283 | } |