blob: 47e69d8ac2595ef179f3478777558002174248b6 [file] [log] [blame]
Robin Morisset59c23cd2014-08-21 21:50:01 +00001//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
Tim Northoverc882eb02014-04-03 11:44:58 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass (at IR level) to replace atomic instructions with
James Y Knight19f6cce2016-04-12 20:18:48 +000011// __atomic_* library calls, or target specific instruction which implement the
12// same semantics in a way which better fits the target backend. This can
13// include the use of (intrinsic-based) load-linked/store-conditional loops,
14// AtomicCmpXchg, or type coercions.
Tim Northoverc882eb02014-04-03 11:44:58 +000015//
16//===----------------------------------------------------------------------===//
17
JF Bastiene8aad292015-08-03 15:29:47 +000018#include "llvm/CodeGen/AtomicExpandUtils.h"
Tim Northoverc882eb02014-04-03 11:44:58 +000019#include "llvm/CodeGen/Passes.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
Robin Morisseted3d48f2014-09-03 21:29:59 +000022#include "llvm/IR/InstIterator.h"
Tim Northoverc882eb02014-04-03 11:44:58 +000023#include "llvm/IR/Instructions.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/Module.h"
26#include "llvm/Support/Debug.h"
Philip Reames23319012015-12-16 01:24:05 +000027#include "llvm/Support/raw_ostream.h"
Tim Northoverc882eb02014-04-03 11:44:58 +000028#include "llvm/Target/TargetLowering.h"
29#include "llvm/Target/TargetMachine.h"
Eric Christopherc40e5ed2014-06-19 21:03:04 +000030#include "llvm/Target/TargetSubtargetInfo.h"
31
Tim Northoverc882eb02014-04-03 11:44:58 +000032using namespace llvm;
33
Robin Morisset59c23cd2014-08-21 21:50:01 +000034#define DEBUG_TYPE "atomic-expand"
Chandler Carruth1b9dde02014-04-22 02:02:50 +000035
Tim Northoverc882eb02014-04-03 11:44:58 +000036namespace {
Robin Morisset59c23cd2014-08-21 21:50:01 +000037 class AtomicExpand: public FunctionPass {
Eric Christopherc40e5ed2014-06-19 21:03:04 +000038 const TargetMachine *TM;
Eric Christopherb11a1b72015-01-26 19:45:40 +000039 const TargetLowering *TLI;
Tim Northoverc882eb02014-04-03 11:44:58 +000040 public:
41 static char ID; // Pass identification, replacement for typeid
Robin Morisset59c23cd2014-08-21 21:50:01 +000042 explicit AtomicExpand(const TargetMachine *TM = nullptr)
Eric Christopherb11a1b72015-01-26 19:45:40 +000043 : FunctionPass(ID), TM(TM), TLI(nullptr) {
Robin Morisset59c23cd2014-08-21 21:50:01 +000044 initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
Tim Northover037f26f22014-04-17 18:22:47 +000045 }
Tim Northoverc882eb02014-04-03 11:44:58 +000046
47 bool runOnFunction(Function &F) override;
Tim Northoverc882eb02014-04-03 11:44:58 +000048
Robin Morisseted3d48f2014-09-03 21:29:59 +000049 private:
Robin Morissetdedef332014-09-23 20:31:14 +000050 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
51 bool IsStore, bool IsLoad);
Philip Reames61a24ab2015-12-16 00:49:36 +000052 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
53 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
Ahmed Bougacha52468672015-09-11 17:08:28 +000054 bool tryExpandAtomicLoad(LoadInst *LI);
Robin Morisset6dbbbc22014-09-23 20:59:25 +000055 bool expandAtomicLoadToLL(LoadInst *LI);
56 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
Philip Reames61a24ab2015-12-16 00:49:36 +000057 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
Robin Morisseted3d48f2014-09-03 21:29:59 +000058 bool expandAtomicStore(StoreInst *SI);
JF Bastienf14889e2015-03-04 15:47:57 +000059 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
Tim Northoverf520eff2015-12-02 18:12:57 +000060 bool expandAtomicOpToLLSC(
61 Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
62 std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
Philip Reames1960cfd2016-02-19 00:06:41 +000063 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
Tim Northoverc882eb02014-04-03 11:44:58 +000064 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
Robin Morisset810739d2014-09-25 17:27:43 +000065 bool isIdempotentRMW(AtomicRMWInst *AI);
66 bool simplifyIdempotentRMW(AtomicRMWInst *AI);
James Y Knight19f6cce2016-04-12 20:18:48 +000067
68 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
69 Value *PointerOperand, Value *ValueOperand,
70 Value *CASExpected, AtomicOrdering Ordering,
71 AtomicOrdering Ordering2,
72 ArrayRef<RTLIB::Libcall> Libcalls);
73 void expandAtomicLoadToLibcall(LoadInst *LI);
74 void expandAtomicStoreToLibcall(StoreInst *LI);
75 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
76 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
Tim Northoverc882eb02014-04-03 11:44:58 +000077 };
Alexander Kornienkof00654e2015-06-23 09:49:53 +000078}
Tim Northoverc882eb02014-04-03 11:44:58 +000079
Robin Morisset59c23cd2014-08-21 21:50:01 +000080char AtomicExpand::ID = 0;
81char &llvm::AtomicExpandID = AtomicExpand::ID;
James Y Knight19f6cce2016-04-12 20:18:48 +000082INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
83 false, false)
Tim Northover037f26f22014-04-17 18:22:47 +000084
Robin Morisset59c23cd2014-08-21 21:50:01 +000085FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
86 return new AtomicExpand(TM);
Tim Northover037f26f22014-04-17 18:22:47 +000087}
88
James Y Knight19f6cce2016-04-12 20:18:48 +000089namespace {
90// Helper functions to retrieve the size of atomic instructions.
91unsigned getAtomicOpSize(LoadInst *LI) {
92 const DataLayout &DL = LI->getModule()->getDataLayout();
93 return DL.getTypeStoreSize(LI->getType());
94}
95
96unsigned getAtomicOpSize(StoreInst *SI) {
97 const DataLayout &DL = SI->getModule()->getDataLayout();
98 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
99}
100
101unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
102 const DataLayout &DL = RMWI->getModule()->getDataLayout();
103 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
104}
105
106unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
107 const DataLayout &DL = CASI->getModule()->getDataLayout();
108 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
109}
110
111// Helper functions to retrieve the alignment of atomic instructions.
112unsigned getAtomicOpAlign(LoadInst *LI) {
113 unsigned Align = LI->getAlignment();
114 // In the future, if this IR restriction is relaxed, we should
115 // return DataLayout::getABITypeAlignment when there's no align
116 // value.
117 assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
118 return Align;
119}
120
121unsigned getAtomicOpAlign(StoreInst *SI) {
122 unsigned Align = SI->getAlignment();
123 // In the future, if this IR restriction is relaxed, we should
124 // return DataLayout::getABITypeAlignment when there's no align
125 // value.
126 assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
127 return Align;
128}
129
130unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
131 // TODO(PR27168): This instruction has no alignment attribute, but unlike the
132 // default alignment for load/store, the default here is to assume
133 // it has NATURAL alignment, not DataLayout-specified alignment.
134 const DataLayout &DL = RMWI->getModule()->getDataLayout();
135 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
136}
137
138unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
139 // TODO(PR27168): same comment as above.
140 const DataLayout &DL = CASI->getModule()->getDataLayout();
141 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
142}
143
144// Determine if a particular atomic operation has a supported size,
145// and is of appropriate alignment, to be passed through for target
146// lowering. (Versus turning into a __atomic libcall)
147template <typename Inst>
148bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
149 unsigned Size = getAtomicOpSize(I);
150 unsigned Align = getAtomicOpAlign(I);
151 return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
152}
153
154} // end anonymous namespace
155
Robin Morisset59c23cd2014-08-21 21:50:01 +0000156bool AtomicExpand::runOnFunction(Function &F) {
Eric Christopher4e048eeb2015-01-27 01:04:42 +0000157 if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
Tim Northover037f26f22014-04-17 18:22:47 +0000158 return false;
Eric Christopher4e048eeb2015-01-27 01:04:42 +0000159 TLI = TM->getSubtargetImpl(F)->getTargetLowering();
Tim Northover037f26f22014-04-17 18:22:47 +0000160
Tim Northoverc882eb02014-04-03 11:44:58 +0000161 SmallVector<Instruction *, 1> AtomicInsts;
162
163 // Changing control-flow while iterating through it is a bad idea, so gather a
164 // list of all atomic instructions before we start.
James Y Knight01f2ca52016-03-28 15:05:30 +0000165 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
166 Instruction *I = &*II;
167 if (I->isAtomic() && !isa<FenceInst>(I))
168 AtomicInsts.push_back(I);
Tim Northoverc882eb02014-04-03 11:44:58 +0000169 }
170
Robin Morisseted3d48f2014-09-03 21:29:59 +0000171 bool MadeChange = false;
172 for (auto I : AtomicInsts) {
173 auto LI = dyn_cast<LoadInst>(I);
174 auto SI = dyn_cast<StoreInst>(I);
175 auto RMWI = dyn_cast<AtomicRMWInst>(I);
176 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
James Y Knight01f2ca52016-03-28 15:05:30 +0000177 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
Robin Morisseted3d48f2014-09-03 21:29:59 +0000178
James Y Knight19f6cce2016-04-12 20:18:48 +0000179 // If the Size/Alignment is not supported, replace with a libcall.
180 if (LI) {
181 if (!atomicSizeSupported(TLI, LI)) {
182 expandAtomicLoadToLibcall(LI);
183 MadeChange = true;
184 continue;
185 }
186 } else if (SI) {
187 if (!atomicSizeSupported(TLI, SI)) {
188 expandAtomicStoreToLibcall(SI);
189 MadeChange = true;
190 continue;
191 }
192 } else if (RMWI) {
193 if (!atomicSizeSupported(TLI, RMWI)) {
194 expandAtomicRMWToLibcall(RMWI);
195 MadeChange = true;
196 continue;
197 }
198 } else if (CASI) {
199 if (!atomicSizeSupported(TLI, CASI)) {
200 expandAtomicCASToLibcall(CASI);
201 MadeChange = true;
202 continue;
203 }
204 }
205
James Y Knightf44fc522016-03-16 22:12:04 +0000206 if (TLI->shouldInsertFencesForAtomic(I)) {
JF Bastien800f87a2016-04-06 21:19:33 +0000207 auto FenceOrdering = AtomicOrdering::Monotonic;
Philip Reames367fdd92016-02-18 19:45:31 +0000208 bool IsStore, IsLoad;
JF Bastien800f87a2016-04-06 21:19:33 +0000209 if (LI && isAcquireOrStronger(LI->getOrdering())) {
Robin Morissetdedef332014-09-23 20:31:14 +0000210 FenceOrdering = LI->getOrdering();
JF Bastien800f87a2016-04-06 21:19:33 +0000211 LI->setOrdering(AtomicOrdering::Monotonic);
Robin Morissetdedef332014-09-23 20:31:14 +0000212 IsStore = false;
213 IsLoad = true;
JF Bastien800f87a2016-04-06 21:19:33 +0000214 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
Robin Morissetdedef332014-09-23 20:31:14 +0000215 FenceOrdering = SI->getOrdering();
JF Bastien800f87a2016-04-06 21:19:33 +0000216 SI->setOrdering(AtomicOrdering::Monotonic);
Robin Morissetdedef332014-09-23 20:31:14 +0000217 IsStore = true;
218 IsLoad = false;
JF Bastien800f87a2016-04-06 21:19:33 +0000219 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
220 isAcquireOrStronger(RMWI->getOrdering()))) {
Robin Morissetdedef332014-09-23 20:31:14 +0000221 FenceOrdering = RMWI->getOrdering();
JF Bastien800f87a2016-04-06 21:19:33 +0000222 RMWI->setOrdering(AtomicOrdering::Monotonic);
Robin Morissetdedef332014-09-23 20:31:14 +0000223 IsStore = IsLoad = true;
Ahmed Bougacha52468672015-09-11 17:08:28 +0000224 } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
JF Bastien800f87a2016-04-06 21:19:33 +0000225 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
226 isAcquireOrStronger(CASI->getSuccessOrdering()))) {
Robin Morissetdedef332014-09-23 20:31:14 +0000227 // If a compare and swap is lowered to LL/SC, we can do smarter fence
228 // insertion, with a stronger one on the success path than on the
229 // failure path. As a result, fence insertion is directly done by
230 // expandAtomicCmpXchg in that case.
231 FenceOrdering = CASI->getSuccessOrdering();
JF Bastien800f87a2016-04-06 21:19:33 +0000232 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
233 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
Robin Morissetdedef332014-09-23 20:31:14 +0000234 IsStore = IsLoad = true;
235 }
236
JF Bastien800f87a2016-04-06 21:19:33 +0000237 if (FenceOrdering != AtomicOrdering::Monotonic) {
Robin Morissetdedef332014-09-23 20:31:14 +0000238 MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
239 }
240 }
241
Ahmed Bougacha52468672015-09-11 17:08:28 +0000242 if (LI) {
Philip Reames61a24ab2015-12-16 00:49:36 +0000243 if (LI->getType()->isFloatingPointTy()) {
244 // TODO: add a TLI hook to control this so that each target can
245 // convert to lowering the original type one at a time.
246 LI = convertAtomicLoadToIntegerType(LI);
247 assert(LI->getType()->isIntegerTy() && "invariant broken");
248 MadeChange = true;
249 }
James Y Knight19f6cce2016-04-12 20:18:48 +0000250
Ahmed Bougacha52468672015-09-11 17:08:28 +0000251 MadeChange |= tryExpandAtomicLoad(LI);
Philip Reames61a24ab2015-12-16 00:49:36 +0000252 } else if (SI) {
253 if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
254 // TODO: add a TLI hook to control this so that each target can
255 // convert to lowering the original type one at a time.
256 SI = convertAtomicStoreToIntegerType(SI);
257 assert(SI->getValueOperand()->getType()->isIntegerTy() &&
258 "invariant broken");
259 MadeChange = true;
260 }
261
262 if (TLI->shouldExpandAtomicStoreInIR(SI))
263 MadeChange |= expandAtomicStore(SI);
Robin Morisset810739d2014-09-25 17:27:43 +0000264 } else if (RMWI) {
265 // There are two different ways of expanding RMW instructions:
266 // - into a load if it is idempotent
267 // - into a Cmpxchg/LL-SC loop otherwise
268 // we try them in that order.
JF Bastienf14889e2015-03-04 15:47:57 +0000269
270 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
271 MadeChange = true;
272 } else {
273 MadeChange |= tryExpandAtomicRMW(RMWI);
274 }
Philip Reames1960cfd2016-02-19 00:06:41 +0000275 } else if (CASI) {
276 // TODO: when we're ready to make the change at the IR level, we can
277 // extend convertCmpXchgToInteger for floating point too.
278 assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
279 "unimplemented - floating point not legal at IR level");
280 if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
281 // TODO: add a TLI hook to control this so that each target can
282 // convert to lowering the original type one at a time.
283 CASI = convertCmpXchgToIntegerType(CASI);
284 assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
285 "invariant broken");
286 MadeChange = true;
287 }
288
289 if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
290 MadeChange |= expandAtomicCmpXchg(CASI);
Robin Morisseted3d48f2014-09-03 21:29:59 +0000291 }
292 }
Tim Northoverc882eb02014-04-03 11:44:58 +0000293 return MadeChange;
294}
295
Robin Morissetdedef332014-09-23 20:31:14 +0000296bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
297 bool IsStore, bool IsLoad) {
298 IRBuilder<> Builder(I);
299
Eric Christopherb11a1b72015-01-26 19:45:40 +0000300 auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
Robin Morissetdedef332014-09-23 20:31:14 +0000301
Eric Christopherb11a1b72015-01-26 19:45:40 +0000302 auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
Robin Morissetdedef332014-09-23 20:31:14 +0000303 // The trailing fence is emitted before the instruction instead of after
304 // because there is no easy way of setting Builder insertion point after
305 // an instruction. So we must erase it from the BB, and insert it back
306 // in the right place.
307 // We have a guard here because not every atomic operation generates a
308 // trailing fence.
309 if (TrailingFence) {
310 TrailingFence->removeFromParent();
311 TrailingFence->insertAfter(I);
312 }
313
314 return (LeadingFence || TrailingFence);
315}
316
Philip Reames61a24ab2015-12-16 00:49:36 +0000317/// Get the iX type with the same bitwidth as T.
318IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
319 const DataLayout &DL) {
320 EVT VT = TLI->getValueType(DL, T);
321 unsigned BitWidth = VT.getStoreSizeInBits();
322 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
323 return IntegerType::get(T->getContext(), BitWidth);
324}
325
326/// Convert an atomic load of a non-integral type to an integer load of the
Philip Reames1960cfd2016-02-19 00:06:41 +0000327/// equivalent bitwidth. See the function comment on
Philip Reames61a24ab2015-12-16 00:49:36 +0000328/// convertAtomicStoreToIntegerType for background.
329LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
330 auto *M = LI->getModule();
331 Type *NewTy = getCorrespondingIntegerType(LI->getType(),
332 M->getDataLayout());
333
334 IRBuilder<> Builder(LI);
335
336 Value *Addr = LI->getPointerOperand();
337 Type *PT = PointerType::get(NewTy,
338 Addr->getType()->getPointerAddressSpace());
339 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
340
341 auto *NewLI = Builder.CreateLoad(NewAddr);
342 NewLI->setAlignment(LI->getAlignment());
343 NewLI->setVolatile(LI->isVolatile());
344 NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
345 DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
346
347 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
348 LI->replaceAllUsesWith(NewVal);
349 LI->eraseFromParent();
350 return NewLI;
351}
352
Ahmed Bougacha52468672015-09-11 17:08:28 +0000353bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
354 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
355 case TargetLoweringBase::AtomicExpansionKind::None:
356 return false;
Tim Northoverf520eff2015-12-02 18:12:57 +0000357 case TargetLoweringBase::AtomicExpansionKind::LLSC:
358 return expandAtomicOpToLLSC(
359 LI, LI->getPointerOperand(), LI->getOrdering(),
360 [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
361 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
Robin Morisset6dbbbc22014-09-23 20:59:25 +0000362 return expandAtomicLoadToLL(LI);
Tim Northoverf520eff2015-12-02 18:12:57 +0000363 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
Robin Morisset6dbbbc22014-09-23 20:59:25 +0000364 return expandAtomicLoadToCmpXchg(LI);
Ahmed Bougacha52468672015-09-11 17:08:28 +0000365 }
Ahmed Bougacha52468672015-09-11 17:08:28 +0000366 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
Robin Morisset6dbbbc22014-09-23 20:59:25 +0000367}
368
369bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
Tim Northoverc882eb02014-04-03 11:44:58 +0000370 IRBuilder<> Builder(LI);
Tim Northoverc882eb02014-04-03 11:44:58 +0000371
Robin Morissetdedef332014-09-23 20:31:14 +0000372 // On some architectures, load-linked instructions are atomic for larger
373 // sizes than normal loads. For example, the only 64-bit load guaranteed
374 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
Robin Morisseta47cb412014-09-03 21:01:03 +0000375 Value *Val =
Robin Morissetdedef332014-09-23 20:31:14 +0000376 TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
Tim Northoverf520eff2015-12-02 18:12:57 +0000377 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
Tim Northoverc882eb02014-04-03 11:44:58 +0000378
379 LI->replaceAllUsesWith(Val);
380 LI->eraseFromParent();
381
382 return true;
383}
384
Robin Morisset6dbbbc22014-09-23 20:59:25 +0000385bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
386 IRBuilder<> Builder(LI);
387 AtomicOrdering Order = LI->getOrdering();
388 Value *Addr = LI->getPointerOperand();
389 Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
390 Constant *DummyVal = Constant::getNullValue(Ty);
391
392 Value *Pair = Builder.CreateAtomicCmpXchg(
393 Addr, DummyVal, DummyVal, Order,
394 AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
395 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
396
397 LI->replaceAllUsesWith(Loaded);
398 LI->eraseFromParent();
399
400 return true;
401}
402
Philip Reames61a24ab2015-12-16 00:49:36 +0000403/// Convert an atomic store of a non-integral type to an integer store of the
Philip Reames1960cfd2016-02-19 00:06:41 +0000404/// equivalent bitwidth. We used to not support floating point or vector
Philip Reames61a24ab2015-12-16 00:49:36 +0000405/// atomics in the IR at all. The backends learned to deal with the bitcast
406/// idiom because that was the only way of expressing the notion of a atomic
407/// float or vector store. The long term plan is to teach each backend to
408/// instruction select from the original atomic store, but as a migration
409/// mechanism, we convert back to the old format which the backends understand.
410/// Each backend will need individual work to recognize the new format.
411StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
412 IRBuilder<> Builder(SI);
413 auto *M = SI->getModule();
414 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
415 M->getDataLayout());
416 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
417
418 Value *Addr = SI->getPointerOperand();
419 Type *PT = PointerType::get(NewTy,
420 Addr->getType()->getPointerAddressSpace());
421 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
422
423 StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
424 NewSI->setAlignment(SI->getAlignment());
425 NewSI->setVolatile(SI->isVolatile());
426 NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
427 DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
428 SI->eraseFromParent();
429 return NewSI;
430}
431
Robin Morisset59c23cd2014-08-21 21:50:01 +0000432bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
Robin Morisset25c8e312014-09-17 00:06:58 +0000433 // This function is only called on atomic stores that are too large to be
434 // atomic if implemented as a native store. So we replace them by an
435 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
436 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
JF Bastienf14889e2015-03-04 15:47:57 +0000437 // It is the responsibility of the target to only signal expansion via
Robin Morisset25c8e312014-09-17 00:06:58 +0000438 // shouldExpandAtomicRMW in cases where this is required and possible.
Tim Northoverc882eb02014-04-03 11:44:58 +0000439 IRBuilder<> Builder(SI);
440 AtomicRMWInst *AI =
441 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
442 SI->getValueOperand(), SI->getOrdering());
443 SI->eraseFromParent();
444
445 // Now we have an appropriate swap instruction, lower it as usual.
JF Bastienf14889e2015-03-04 15:47:57 +0000446 return tryExpandAtomicRMW(AI);
Tim Northoverc882eb02014-04-03 11:44:58 +0000447}
448
JF Bastiene8aad292015-08-03 15:29:47 +0000449static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
450 Value *Loaded, Value *NewVal,
451 AtomicOrdering MemOpOrder,
452 Value *&Success, Value *&NewLoaded) {
453 Value* Pair = Builder.CreateAtomicCmpXchg(
454 Addr, Loaded, NewVal, MemOpOrder,
455 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
456 Success = Builder.CreateExtractValue(Pair, 1, "success");
457 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
458}
459
Robin Morisset25c8e312014-09-17 00:06:58 +0000460/// Emit IR to implement the given atomicrmw operation on values in registers,
461/// returning the new value.
462static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
463 Value *Loaded, Value *Inc) {
464 Value *NewVal;
465 switch (Op) {
466 case AtomicRMWInst::Xchg:
467 return Inc;
468 case AtomicRMWInst::Add:
469 return Builder.CreateAdd(Loaded, Inc, "new");
470 case AtomicRMWInst::Sub:
471 return Builder.CreateSub(Loaded, Inc, "new");
472 case AtomicRMWInst::And:
473 return Builder.CreateAnd(Loaded, Inc, "new");
474 case AtomicRMWInst::Nand:
475 return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
476 case AtomicRMWInst::Or:
477 return Builder.CreateOr(Loaded, Inc, "new");
478 case AtomicRMWInst::Xor:
479 return Builder.CreateXor(Loaded, Inc, "new");
480 case AtomicRMWInst::Max:
481 NewVal = Builder.CreateICmpSGT(Loaded, Inc);
482 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
483 case AtomicRMWInst::Min:
484 NewVal = Builder.CreateICmpSLE(Loaded, Inc);
485 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
486 case AtomicRMWInst::UMax:
487 NewVal = Builder.CreateICmpUGT(Loaded, Inc);
488 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
489 case AtomicRMWInst::UMin:
490 NewVal = Builder.CreateICmpULE(Loaded, Inc);
491 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
492 default:
493 llvm_unreachable("Unknown atomic op");
494 }
495}
496
Tim Northoverf520eff2015-12-02 18:12:57 +0000497bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
498 switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
499 case TargetLoweringBase::AtomicExpansionKind::None:
500 return false;
501 case TargetLoweringBase::AtomicExpansionKind::LLSC:
502 return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
503 [&](IRBuilder<> &Builder, Value *Loaded) {
504 return performAtomicOp(AI->getOperation(),
505 Builder, Loaded,
506 AI->getValOperand());
507 });
508 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
509 return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
510 default:
511 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
512 }
513}
514
515bool AtomicExpand::expandAtomicOpToLLSC(
516 Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
517 std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
518 BasicBlock *BB = I->getParent();
Tim Northoverc882eb02014-04-03 11:44:58 +0000519 Function *F = BB->getParent();
520 LLVMContext &Ctx = F->getContext();
521
522 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
523 //
524 // The standard expansion we produce is:
525 // [...]
526 // fence?
527 // atomicrmw.start:
528 // %loaded = @load.linked(%addr)
529 // %new = some_op iN %loaded, %incr
530 // %stored = @store_conditional(%new, %addr)
531 // %try_again = icmp i32 ne %stored, 0
532 // br i1 %try_again, label %loop, label %atomicrmw.end
533 // atomicrmw.end:
534 // fence?
535 // [...]
Tim Northoverf520eff2015-12-02 18:12:57 +0000536 BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
Tim Northoverc882eb02014-04-03 11:44:58 +0000537 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
538
Tim Northoverf520eff2015-12-02 18:12:57 +0000539 // This grabs the DebugLoc from I.
540 IRBuilder<> Builder(I);
Tim Northoverc882eb02014-04-03 11:44:58 +0000541
542 // The split call above "helpfully" added a branch at the end of BB (to the
543 // wrong place), but we might want a fence too. It's easiest to just remove
544 // the branch entirely.
545 std::prev(BB->end())->eraseFromParent();
546 Builder.SetInsertPoint(BB);
Tim Northoverc882eb02014-04-03 11:44:58 +0000547 Builder.CreateBr(LoopBB);
548
549 // Start the main loop block now that we've taken care of the preliminaries.
550 Builder.SetInsertPoint(LoopBB);
Robin Morisseta47cb412014-09-03 21:01:03 +0000551 Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
Tim Northoverc882eb02014-04-03 11:44:58 +0000552
Tim Northoverf520eff2015-12-02 18:12:57 +0000553 Value *NewVal = PerformOp(Builder, Loaded);
Tim Northoverc882eb02014-04-03 11:44:58 +0000554
Eric Christopherd9134482014-08-04 21:25:23 +0000555 Value *StoreSuccess =
Robin Morisseta47cb412014-09-03 21:01:03 +0000556 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
Tim Northoverc882eb02014-04-03 11:44:58 +0000557 Value *TryAgain = Builder.CreateICmpNE(
558 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
559 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
560
561 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
Tim Northoverc882eb02014-04-03 11:44:58 +0000562
Tim Northoverf520eff2015-12-02 18:12:57 +0000563 I->replaceAllUsesWith(Loaded);
564 I->eraseFromParent();
Tim Northoverc882eb02014-04-03 11:44:58 +0000565
566 return true;
567}
568
Philip Reames1960cfd2016-02-19 00:06:41 +0000569/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
570/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
571/// IR. As a migration step, we convert back to what use to be the standard
572/// way to represent a pointer cmpxchg so that we can update backends one by
573/// one.
574AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
575 auto *M = CI->getModule();
576 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
577 M->getDataLayout());
578
579 IRBuilder<> Builder(CI);
580
581 Value *Addr = CI->getPointerOperand();
582 Type *PT = PointerType::get(NewTy,
583 Addr->getType()->getPointerAddressSpace());
584 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
585
586 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
587 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
588
589
590 auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
591 CI->getSuccessOrdering(),
592 CI->getFailureOrdering(),
593 CI->getSynchScope());
594 NewCI->setVolatile(CI->isVolatile());
595 NewCI->setWeak(CI->isWeak());
596 DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
597
598 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
599 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
600
601 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
602
603 Value *Res = UndefValue::get(CI->getType());
604 Res = Builder.CreateInsertValue(Res, OldVal, 0);
605 Res = Builder.CreateInsertValue(Res, Succ, 1);
606
607 CI->replaceAllUsesWith(Res);
608 CI->eraseFromParent();
609 return NewCI;
610}
611
612
Robin Morisset59c23cd2014-08-21 21:50:01 +0000613bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Tim Northover70450c52014-04-03 13:06:54 +0000614 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
615 AtomicOrdering FailureOrder = CI->getFailureOrdering();
Tim Northoverc882eb02014-04-03 11:44:58 +0000616 Value *Addr = CI->getPointerOperand();
617 BasicBlock *BB = CI->getParent();
618 Function *F = BB->getParent();
619 LLVMContext &Ctx = F->getContext();
James Y Knightf44fc522016-03-16 22:12:04 +0000620 // If shouldInsertFencesForAtomic() returns true, then the target does not
621 // want to deal with memory orders, and emitLeading/TrailingFence should take
622 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
Robin Morisseted3d48f2014-09-03 21:29:59 +0000623 // should preserve the ordering.
James Y Knightf44fc522016-03-16 22:12:04 +0000624 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
Robin Morisseta47cb412014-09-03 21:01:03 +0000625 AtomicOrdering MemOpOrder =
JF Bastien800f87a2016-04-06 21:19:33 +0000626 ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
Tim Northoverc882eb02014-04-03 11:44:58 +0000627
Tim Northoverd32f8e62016-02-22 20:55:50 +0000628 // In implementations which use a barrier to achieve release semantics, we can
629 // delay emitting this barrier until we know a store is actually going to be
630 // attempted. The cost of this delay is that we need 2 copies of the block
631 // emitting the load-linked, affecting code size.
632 //
633 // Ideally, this logic would be unconditional except for the minsize check
634 // since in other cases the extra blocks naturally collapse down to the
635 // minimal loop. Unfortunately, this puts too much stress on later
636 // optimisations so we avoid emitting the extra logic in those cases too.
James Y Knightf44fc522016-03-16 22:12:04 +0000637 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
JF Bastien800f87a2016-04-06 21:19:33 +0000638 SuccessOrder != AtomicOrdering::Monotonic &&
639 SuccessOrder != AtomicOrdering::Acquire &&
640 !F->optForMinSize();
Tim Northoverd32f8e62016-02-22 20:55:50 +0000641
642 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
643 // do it even on minsize.
644 bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
645
Tim Northoverc882eb02014-04-03 11:44:58 +0000646 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
647 //
Tim Northover70450c52014-04-03 13:06:54 +0000648 // The full expansion we produce is:
Tim Northoverc882eb02014-04-03 11:44:58 +0000649 // [...]
Tim Northoverc882eb02014-04-03 11:44:58 +0000650 // cmpxchg.start:
Tim Northoverd32f8e62016-02-22 20:55:50 +0000651 // %unreleasedload = @load.linked(%addr)
652 // %should_store = icmp eq %unreleasedload, %desired
653 // br i1 %should_store, label %cmpxchg.fencedstore,
Ahmed Bougacha07a844d2015-09-22 17:21:44 +0000654 // label %cmpxchg.nostore
Tim Northoverd32f8e62016-02-22 20:55:50 +0000655 // cmpxchg.releasingstore:
656 // fence?
657 // br label cmpxchg.trystore
Tim Northoverc882eb02014-04-03 11:44:58 +0000658 // cmpxchg.trystore:
Tim Northoverd32f8e62016-02-22 20:55:50 +0000659 // %loaded.trystore = phi [%unreleasedload, %releasingstore],
660 // [%releasedload, %cmpxchg.releasedload]
Tim Northoverc882eb02014-04-03 11:44:58 +0000661 // %stored = @store_conditional(%new, %addr)
Tim Northover20b9f732014-06-13 16:45:52 +0000662 // %success = icmp eq i32 %stored, 0
Tim Northoverd32f8e62016-02-22 20:55:50 +0000663 // br i1 %success, label %cmpxchg.success,
664 // label %cmpxchg.releasedload/%cmpxchg.failure
665 // cmpxchg.releasedload:
666 // %releasedload = @load.linked(%addr)
667 // %should_store = icmp eq %releasedload, %desired
668 // br i1 %should_store, label %cmpxchg.trystore,
669 // label %cmpxchg.failure
Tim Northover20b9f732014-06-13 16:45:52 +0000670 // cmpxchg.success:
671 // fence?
672 // br label %cmpxchg.end
Ahmed Bougacha07a844d2015-09-22 17:21:44 +0000673 // cmpxchg.nostore:
Tim Northoverd32f8e62016-02-22 20:55:50 +0000674 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
675 // [%releasedload,
676 // %cmpxchg.releasedload/%cmpxchg.trystore]
Ahmed Bougacha07a844d2015-09-22 17:21:44 +0000677 // @load_linked_fail_balance()?
678 // br label %cmpxchg.failure
Tim Northover20b9f732014-06-13 16:45:52 +0000679 // cmpxchg.failure:
Tim Northoverc882eb02014-04-03 11:44:58 +0000680 // fence?
Tim Northover70450c52014-04-03 13:06:54 +0000681 // br label %cmpxchg.end
682 // cmpxchg.end:
Tim Northoverd32f8e62016-02-22 20:55:50 +0000683 // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
684 // [%loaded.trystore, %cmpxchg.trystore]
Tim Northover20b9f732014-06-13 16:45:52 +0000685 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
686 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
687 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
Tim Northoverc882eb02014-04-03 11:44:58 +0000688 // [...]
Duncan P. N. Exon Smith8f11e1a2015-10-09 16:54:49 +0000689 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
Tim Northover20b9f732014-06-13 16:45:52 +0000690 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
Ahmed Bougacha07a844d2015-09-22 17:21:44 +0000691 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
692 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
Tim Northoverd32f8e62016-02-22 20:55:50 +0000693 auto ReleasedLoadBB =
694 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
695 auto TryStoreBB =
696 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
697 auto ReleasingStoreBB =
698 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
699 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
Tim Northoverc882eb02014-04-03 11:44:58 +0000700
701 // This grabs the DebugLoc from CI
702 IRBuilder<> Builder(CI);
703
704 // The split call above "helpfully" added a branch at the end of BB (to the
705 // wrong place), but we might want a fence too. It's easiest to just remove
706 // the branch entirely.
707 std::prev(BB->end())->eraseFromParent();
708 Builder.SetInsertPoint(BB);
James Y Knightf44fc522016-03-16 22:12:04 +0000709 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
Tim Northoverd32f8e62016-02-22 20:55:50 +0000710 TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
711 /*IsLoad=*/true);
712 Builder.CreateBr(StartBB);
Tim Northoverc882eb02014-04-03 11:44:58 +0000713
714 // Start the main loop block now that we've taken care of the preliminaries.
Tim Northoverd32f8e62016-02-22 20:55:50 +0000715 Builder.SetInsertPoint(StartBB);
716 Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
717 Value *ShouldStore = Builder.CreateICmpEQ(
718 UnreleasedLoad, CI->getCompareOperand(), "should_store");
Tim Northover70450c52014-04-03 13:06:54 +0000719
Eric Christopher572e03a2015-06-19 01:53:21 +0000720 // If the cmpxchg doesn't actually need any ordering when it fails, we can
Tim Northover70450c52014-04-03 13:06:54 +0000721 // jump straight past that fence instruction (if it exists).
Tim Northoverd32f8e62016-02-22 20:55:50 +0000722 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
723
724 Builder.SetInsertPoint(ReleasingStoreBB);
James Y Knightf44fc522016-03-16 22:12:04 +0000725 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
Tim Northoverd32f8e62016-02-22 20:55:50 +0000726 TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
727 /*IsLoad=*/true);
728 Builder.CreateBr(TryStoreBB);
Tim Northoverc882eb02014-04-03 11:44:58 +0000729
730 Builder.SetInsertPoint(TryStoreBB);
Robin Morisseta47cb412014-09-03 21:01:03 +0000731 Value *StoreSuccess = TLI->emitStoreConditional(
732 Builder, CI->getNewValOperand(), Addr, MemOpOrder);
Tim Northoverd039abd2014-06-13 16:45:36 +0000733 StoreSuccess = Builder.CreateICmpEQ(
Tim Northoverc882eb02014-04-03 11:44:58 +0000734 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
Tim Northoverd32f8e62016-02-22 20:55:50 +0000735 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
Tim Northover20b9f732014-06-13 16:45:52 +0000736 Builder.CreateCondBr(StoreSuccess, SuccessBB,
Tim Northoverd32f8e62016-02-22 20:55:50 +0000737 CI->isWeak() ? FailureBB : RetryBB);
Tim Northoverc882eb02014-04-03 11:44:58 +0000738
Tim Northoverd32f8e62016-02-22 20:55:50 +0000739 Builder.SetInsertPoint(ReleasedLoadBB);
740 Value *SecondLoad;
741 if (HasReleasedLoadBB) {
742 SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
743 ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
744 "should_store");
745
746 // If the cmpxchg doesn't actually need any ordering when it fails, we can
747 // jump straight past that fence instruction (if it exists).
748 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
749 } else
750 Builder.CreateUnreachable();
751
752 // Make sure later instructions don't get reordered with a fence if
753 // necessary.
Tim Northover20b9f732014-06-13 16:45:52 +0000754 Builder.SetInsertPoint(SuccessBB);
James Y Knightf44fc522016-03-16 22:12:04 +0000755 if (ShouldInsertFencesForAtomic)
756 TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
757 /*IsLoad=*/true);
Tim Northover70450c52014-04-03 13:06:54 +0000758 Builder.CreateBr(ExitBB);
Tim Northoverc882eb02014-04-03 11:44:58 +0000759
Ahmed Bougacha07a844d2015-09-22 17:21:44 +0000760 Builder.SetInsertPoint(NoStoreBB);
761 // In the failing case, where we don't execute the store-conditional, the
762 // target might want to balance out the load-linked with a dedicated
763 // instruction (e.g., on ARM, clearing the exclusive monitor).
764 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
765 Builder.CreateBr(FailureBB);
766
Tim Northover20b9f732014-06-13 16:45:52 +0000767 Builder.SetInsertPoint(FailureBB);
James Y Knightf44fc522016-03-16 22:12:04 +0000768 if (ShouldInsertFencesForAtomic)
769 TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
770 /*IsLoad=*/true);
Tim Northover20b9f732014-06-13 16:45:52 +0000771 Builder.CreateBr(ExitBB);
772
Tim Northoverb4ddc082014-05-30 10:09:59 +0000773 // Finally, we have control-flow based knowledge of whether the cmpxchg
774 // succeeded or not. We expose this to later passes by converting any
Tim Northoverd32f8e62016-02-22 20:55:50 +0000775 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
776 // PHI.
Tim Northover20b9f732014-06-13 16:45:52 +0000777 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
Tim Northover420a2162014-06-13 14:24:07 +0000778 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
779 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
Tim Northover20b9f732014-06-13 16:45:52 +0000780 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
Tim Northoverb4ddc082014-05-30 10:09:59 +0000781
Tim Northoverd32f8e62016-02-22 20:55:50 +0000782 // Setup the builder so we can create any PHIs we need.
783 Value *Loaded;
784 if (!HasReleasedLoadBB)
785 Loaded = UnreleasedLoad;
786 else {
787 Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
788 PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
789 TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
790 TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
791
792 Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
793 PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
794 NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
795 NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
796
797 Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
798 PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
799 ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
800 ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
801
802 Loaded = ExitLoaded;
803 }
804
Tim Northoverb4ddc082014-05-30 10:09:59 +0000805 // Look for any users of the cmpxchg that are just comparing the loaded value
806 // against the desired one, and replace them with the CFG-derived version.
Tim Northover420a2162014-06-13 14:24:07 +0000807 SmallVector<ExtractValueInst *, 2> PrunedInsts;
Tim Northoverb4ddc082014-05-30 10:09:59 +0000808 for (auto User : CI->users()) {
Tim Northover420a2162014-06-13 14:24:07 +0000809 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
810 if (!EV)
Tim Northoverb4ddc082014-05-30 10:09:59 +0000811 continue;
812
Tim Northover420a2162014-06-13 14:24:07 +0000813 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
814 "weird extraction from { iN, i1 }");
Tim Northoverb4ddc082014-05-30 10:09:59 +0000815
Tim Northover420a2162014-06-13 14:24:07 +0000816 if (EV->getIndices()[0] == 0)
817 EV->replaceAllUsesWith(Loaded);
818 else
819 EV->replaceAllUsesWith(Success);
820
821 PrunedInsts.push_back(EV);
Tim Northoverb4ddc082014-05-30 10:09:59 +0000822 }
823
Tim Northover420a2162014-06-13 14:24:07 +0000824 // We can remove the instructions now we're no longer iterating through them.
825 for (auto EV : PrunedInsts)
826 EV->eraseFromParent();
Tim Northoverc882eb02014-04-03 11:44:58 +0000827
Tim Northover420a2162014-06-13 14:24:07 +0000828 if (!CI->use_empty()) {
829 // Some use of the full struct return that we don't understand has happened,
830 // so we've got to reconstruct it properly.
831 Value *Res;
832 Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
833 Res = Builder.CreateInsertValue(Res, Success, 1);
834
835 CI->replaceAllUsesWith(Res);
836 }
837
838 CI->eraseFromParent();
Tim Northoverc882eb02014-04-03 11:44:58 +0000839 return true;
840}
Robin Morisset810739d2014-09-25 17:27:43 +0000841
842bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
843 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
844 if(!C)
845 return false;
846
847 AtomicRMWInst::BinOp Op = RMWI->getOperation();
848 switch(Op) {
849 case AtomicRMWInst::Add:
850 case AtomicRMWInst::Sub:
851 case AtomicRMWInst::Or:
852 case AtomicRMWInst::Xor:
853 return C->isZero();
854 case AtomicRMWInst::And:
855 return C->isMinusOne();
856 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
857 default:
858 return false;
859 }
860}
861
862bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
Ahmed Bougacha49b531a2015-09-12 18:51:23 +0000863 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
864 tryExpandAtomicLoad(ResultingLoad);
865 return true;
866 }
Robin Morisset810739d2014-09-25 17:27:43 +0000867 return false;
868}
JF Bastiene8aad292015-08-03 15:29:47 +0000869
870bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
871 CreateCmpXchgInstFun CreateCmpXchg) {
872 assert(AI);
873
JF Bastien800f87a2016-04-06 21:19:33 +0000874 AtomicOrdering MemOpOrder = AI->getOrdering() == AtomicOrdering::Unordered
875 ? AtomicOrdering::Monotonic
876 : AI->getOrdering();
JF Bastiene8aad292015-08-03 15:29:47 +0000877 Value *Addr = AI->getPointerOperand();
878 BasicBlock *BB = AI->getParent();
879 Function *F = BB->getParent();
880 LLVMContext &Ctx = F->getContext();
881
882 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
883 //
884 // The standard expansion we produce is:
885 // [...]
886 // %init_loaded = load atomic iN* %addr
887 // br label %loop
888 // loop:
889 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
890 // %new = some_op iN %loaded, %incr
891 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
892 // %new_loaded = extractvalue { iN, i1 } %pair, 0
893 // %success = extractvalue { iN, i1 } %pair, 1
894 // br i1 %success, label %atomicrmw.end, label %loop
895 // atomicrmw.end:
896 // [...]
Duncan P. N. Exon Smith8f11e1a2015-10-09 16:54:49 +0000897 BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
JF Bastiene8aad292015-08-03 15:29:47 +0000898 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
899
900 // This grabs the DebugLoc from AI.
901 IRBuilder<> Builder(AI);
902
903 // The split call above "helpfully" added a branch at the end of BB (to the
904 // wrong place), but we want a load. It's easiest to just remove
905 // the branch entirely.
906 std::prev(BB->end())->eraseFromParent();
907 Builder.SetInsertPoint(BB);
908 LoadInst *InitLoaded = Builder.CreateLoad(Addr);
909 // Atomics require at least natural alignment.
Richard Diamondbd753c92015-08-06 16:55:03 +0000910 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
JF Bastiene8aad292015-08-03 15:29:47 +0000911 Builder.CreateBr(LoopBB);
912
913 // Start the main loop block now that we've taken care of the preliminaries.
914 Builder.SetInsertPoint(LoopBB);
915 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
916 Loaded->addIncoming(InitLoaded, BB);
917
918 Value *NewVal =
919 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
920
921 Value *NewLoaded = nullptr;
922 Value *Success = nullptr;
923
924 CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
925 Success, NewLoaded);
926 assert(Success && NewLoaded);
927
928 Loaded->addIncoming(NewLoaded, LoopBB);
929
930 Builder.CreateCondBr(Success, ExitBB, LoopBB);
931
932 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
933
934 AI->replaceAllUsesWith(NewLoaded);
935 AI->eraseFromParent();
936
937 return true;
938}
James Y Knight19f6cce2016-04-12 20:18:48 +0000939
James Y Knight19f6cce2016-04-12 20:18:48 +0000940// In order to use one of the sized library calls such as
941// __atomic_fetch_add_4, the alignment must be sufficient, the size
942// must be one of the potentially-specialized sizes, and the value
943// type must actually exist in C on the target (otherwise, the
944// function wouldn't actually be defined.)
945static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
946 const DataLayout &DL) {
947 // TODO: "LargestSize" is an approximation for "largest type that
948 // you can express in C". It seems to be the case that int128 is
949 // supported on all 64-bit platforms, otherwise only up to 64-bit
950 // integers are supported. If we get this wrong, then we'll try to
951 // call a sized libcall that doesn't actually exist. There should
952 // really be some more reliable way in LLVM of determining integer
953 // sizes which are valid in the target's C ABI...
954 unsigned LargestSize = DL.getLargestLegalIntTypeSize() >= 64 ? 16 : 8;
955 return Align >= Size &&
956 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
957 Size <= LargestSize;
958}
959
960void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
961 static const RTLIB::Libcall Libcalls[6] = {
962 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
963 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
964 unsigned Size = getAtomicOpSize(I);
965 unsigned Align = getAtomicOpAlign(I);
966
967 bool expanded = expandAtomicOpToLibcall(
968 I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
969 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
970 (void)expanded;
971 assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
972}
973
974void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
975 static const RTLIB::Libcall Libcalls[6] = {
976 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
977 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
978 unsigned Size = getAtomicOpSize(I);
979 unsigned Align = getAtomicOpAlign(I);
980
981 bool expanded = expandAtomicOpToLibcall(
982 I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
983 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
984 (void)expanded;
985 assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
986}
987
988void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
989 static const RTLIB::Libcall Libcalls[6] = {
990 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
991 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
992 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
993 unsigned Size = getAtomicOpSize(I);
994 unsigned Align = getAtomicOpAlign(I);
995
996 bool expanded = expandAtomicOpToLibcall(
997 I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
998 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
999 Libcalls);
1000 (void)expanded;
1001 assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1002}
1003
1004static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1005 static const RTLIB::Libcall LibcallsXchg[6] = {
1006 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1007 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1008 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1009 static const RTLIB::Libcall LibcallsAdd[6] = {
1010 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1011 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1012 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1013 static const RTLIB::Libcall LibcallsSub[6] = {
1014 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1015 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1016 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1017 static const RTLIB::Libcall LibcallsAnd[6] = {
1018 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1019 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1020 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1021 static const RTLIB::Libcall LibcallsOr[6] = {
1022 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1023 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1024 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1025 static const RTLIB::Libcall LibcallsXor[6] = {
1026 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1027 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1028 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1029 static const RTLIB::Libcall LibcallsNand[6] = {
1030 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1031 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1032 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1033
1034 switch (Op) {
1035 case AtomicRMWInst::BAD_BINOP:
1036 llvm_unreachable("Should not have BAD_BINOP.");
1037 case AtomicRMWInst::Xchg:
1038 return makeArrayRef(LibcallsXchg);
1039 case AtomicRMWInst::Add:
1040 return makeArrayRef(LibcallsAdd);
1041 case AtomicRMWInst::Sub:
1042 return makeArrayRef(LibcallsSub);
1043 case AtomicRMWInst::And:
1044 return makeArrayRef(LibcallsAnd);
1045 case AtomicRMWInst::Or:
1046 return makeArrayRef(LibcallsOr);
1047 case AtomicRMWInst::Xor:
1048 return makeArrayRef(LibcallsXor);
1049 case AtomicRMWInst::Nand:
1050 return makeArrayRef(LibcallsNand);
1051 case AtomicRMWInst::Max:
1052 case AtomicRMWInst::Min:
1053 case AtomicRMWInst::UMax:
1054 case AtomicRMWInst::UMin:
1055 // No atomic libcalls are available for max/min/umax/umin.
1056 return {};
1057 }
1058 llvm_unreachable("Unexpected AtomicRMW operation.");
1059}
1060
1061void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1062 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1063
1064 unsigned Size = getAtomicOpSize(I);
1065 unsigned Align = getAtomicOpAlign(I);
1066
1067 bool Success = false;
1068 if (!Libcalls.empty())
1069 Success = expandAtomicOpToLibcall(
1070 I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1071 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1072
1073 // The expansion failed: either there were no libcalls at all for
1074 // the operation (min/max), or there were only size-specialized
1075 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1076 // CAS libcall, via a CAS loop, instead.
1077 if (!Success) {
1078 expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1079 Value *Loaded, Value *NewVal,
1080 AtomicOrdering MemOpOrder,
1081 Value *&Success, Value *&NewLoaded) {
1082 // Create the CAS instruction normally...
1083 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1084 Addr, Loaded, NewVal, MemOpOrder,
1085 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
1086 Success = Builder.CreateExtractValue(Pair, 1, "success");
1087 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1088
1089 // ...and then expand the CAS into a libcall.
1090 expandAtomicCASToLibcall(Pair);
1091 });
1092 }
1093}
1094
1095// A helper routine for the above expandAtomic*ToLibcall functions.
1096//
1097// 'Libcalls' contains an array of enum values for the particular
1098// ATOMIC libcalls to be emitted. All of the other arguments besides
1099// 'I' are extracted from the Instruction subclass by the
1100// caller. Depending on the particular call, some will be null.
1101bool AtomicExpand::expandAtomicOpToLibcall(
1102 Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1103 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1104 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1105 assert(Libcalls.size() == 6);
1106
1107 LLVMContext &Ctx = I->getContext();
1108 Module *M = I->getModule();
1109 const DataLayout &DL = M->getDataLayout();
1110 IRBuilder<> Builder(I);
1111 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1112
1113 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1114 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1115
1116 unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1117
1118 // TODO: the "order" argument type is "int", not int32. So
1119 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1120 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
JF Bastienbbb0aee62016-04-18 18:01:43 +00001121 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
James Y Knight19f6cce2016-04-12 20:18:48 +00001122 Constant *OrderingVal =
JF Bastienbbb0aee62016-04-18 18:01:43 +00001123 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1124 Constant *Ordering2Val = nullptr;
1125 if (CASExpected) {
1126 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1127 Ordering2Val =
1128 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1129 }
James Y Knight19f6cce2016-04-12 20:18:48 +00001130 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1131
1132 RTLIB::Libcall RTLibType;
1133 if (UseSizedLibcall) {
1134 switch (Size) {
1135 case 1: RTLibType = Libcalls[1]; break;
1136 case 2: RTLibType = Libcalls[2]; break;
1137 case 4: RTLibType = Libcalls[3]; break;
1138 case 8: RTLibType = Libcalls[4]; break;
1139 case 16: RTLibType = Libcalls[5]; break;
1140 }
1141 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1142 RTLibType = Libcalls[0];
1143 } else {
1144 // Can't use sized function, and there's no generic for this
1145 // operation, so give up.
1146 return false;
1147 }
1148
1149 // Build up the function call. There's two kinds. First, the sized
1150 // variants. These calls are going to be one of the following (with
1151 // N=1,2,4,8,16):
1152 // iN __atomic_load_N(iN *ptr, int ordering)
1153 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1154 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1155 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1156 // int success_order, int failure_order)
1157 //
1158 // Note that these functions can be used for non-integer atomic
1159 // operations, the values just need to be bitcast to integers on the
1160 // way in and out.
1161 //
1162 // And, then, the generic variants. They look like the following:
1163 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1164 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1165 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1166 // int ordering)
1167 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1168 // void *desired, int success_order,
1169 // int failure_order)
1170 //
1171 // The different signatures are built up depending on the
1172 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1173 // variables.
1174
1175 AllocaInst *AllocaCASExpected = nullptr;
1176 Value *AllocaCASExpected_i8 = nullptr;
1177 AllocaInst *AllocaValue = nullptr;
1178 Value *AllocaValue_i8 = nullptr;
1179 AllocaInst *AllocaResult = nullptr;
1180 Value *AllocaResult_i8 = nullptr;
1181
1182 Type *ResultTy;
1183 SmallVector<Value *, 6> Args;
1184 AttributeSet Attr;
1185
1186 // 'size' argument.
1187 if (!UseSizedLibcall) {
1188 // Note, getIntPtrType is assumed equivalent to size_t.
1189 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1190 }
1191
1192 // 'ptr' argument.
1193 Value *PtrVal =
1194 Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1195 Args.push_back(PtrVal);
1196
1197 // 'expected' argument, if present.
1198 if (CASExpected) {
1199 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1200 AllocaCASExpected->setAlignment(AllocaAlignment);
1201 AllocaCASExpected_i8 =
1202 Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1203 Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1204 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1205 Args.push_back(AllocaCASExpected_i8);
1206 }
1207
1208 // 'val' argument ('desired' for cas), if present.
1209 if (ValueOperand) {
1210 if (UseSizedLibcall) {
1211 Value *IntValue =
1212 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1213 Args.push_back(IntValue);
1214 } else {
1215 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1216 AllocaValue->setAlignment(AllocaAlignment);
1217 AllocaValue_i8 =
1218 Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1219 Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1220 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1221 Args.push_back(AllocaValue_i8);
1222 }
1223 }
1224
1225 // 'ret' argument.
1226 if (!CASExpected && HasResult && !UseSizedLibcall) {
1227 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1228 AllocaResult->setAlignment(AllocaAlignment);
1229 AllocaResult_i8 =
1230 Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1231 Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1232 Args.push_back(AllocaResult_i8);
1233 }
1234
1235 // 'ordering' ('success_order' for cas) argument.
1236 Args.push_back(OrderingVal);
1237
1238 // 'failure_order' argument, if present.
1239 if (Ordering2Val)
1240 Args.push_back(Ordering2Val);
1241
1242 // Now, the return type.
1243 if (CASExpected) {
1244 ResultTy = Type::getInt1Ty(Ctx);
1245 Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
1246 } else if (HasResult && UseSizedLibcall)
1247 ResultTy = SizedIntTy;
1248 else
1249 ResultTy = Type::getVoidTy(Ctx);
1250
1251 // Done with setting up arguments and return types, create the call:
1252 SmallVector<Type *, 6> ArgTys;
1253 for (Value *Arg : Args)
1254 ArgTys.push_back(Arg->getType());
1255 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1256 Constant *LibcallFn =
1257 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1258 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1259 Call->setAttributes(Attr);
1260 Value *Result = Call;
1261
1262 // And then, extract the results...
1263 if (ValueOperand && !UseSizedLibcall)
1264 Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1265
1266 if (CASExpected) {
1267 // The final result from the CAS is {load of 'expected' alloca, bool result
1268 // from call}
1269 Type *FinalResultTy = I->getType();
1270 Value *V = UndefValue::get(FinalResultTy);
1271 Value *ExpectedOut =
1272 Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
1273 Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1274 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1275 V = Builder.CreateInsertValue(V, Result, 1);
1276 I->replaceAllUsesWith(V);
1277 } else if (HasResult) {
1278 Value *V;
1279 if (UseSizedLibcall)
1280 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1281 else {
1282 V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
1283 Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1284 }
1285 I->replaceAllUsesWith(V);
1286 }
1287 I->eraseFromParent();
1288 return true;
1289}