Blame - lib/CodeGen/AtomicExpandPass.cpp - fp2-dev/platform/external/llvm

blob: 12f6bd77d334d457bb158a3098525386c4600041 [file] [log] [blame]

Stephen Hines	37ed9c1	2014-12-01 14:51:49 -0800	[diff] [blame^]	1	//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file contains a pass (at IR level) to replace atomic instructions with
				11	// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "llvm/CodeGen/Passes.h"
				16	#include "llvm/IR/Function.h"
				17	#include "llvm/IR/IRBuilder.h"
				18	#include "llvm/IR/InstIterator.h"
				19	#include "llvm/IR/Instructions.h"
				20	#include "llvm/IR/Intrinsics.h"
				21	#include "llvm/IR/Module.h"
				22	#include "llvm/Support/Debug.h"
				23	#include "llvm/Target/TargetLowering.h"
				24	#include "llvm/Target/TargetMachine.h"
				25	#include "llvm/Target/TargetSubtargetInfo.h"
				26
				27	using namespace llvm;
				28
				29	#define DEBUG_TYPE "atomic-expand"
				30
				31	namespace {
				32	class AtomicExpand: public FunctionPass {
				33	const TargetMachine *TM;
				34	public:
				35	static char ID; // Pass identification, replacement for typeid
				36	explicit AtomicExpand(const TargetMachine *TM = nullptr)
				37	: FunctionPass(ID), TM(TM) {
				38	initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
				39	}
				40
				41	bool runOnFunction(Function &F) override;
				42
				43	private:
				44	bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
				45	bool IsStore, bool IsLoad);
				46	bool expandAtomicLoad(LoadInst *LI);
				47	bool expandAtomicLoadToLL(LoadInst *LI);
				48	bool expandAtomicLoadToCmpXchg(LoadInst *LI);
				49	bool expandAtomicStore(StoreInst *SI);
				50	bool expandAtomicRMW(AtomicRMWInst *AI);
				51	bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
				52	bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
				53	bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
				54	bool isIdempotentRMW(AtomicRMWInst *AI);
				55	bool simplifyIdempotentRMW(AtomicRMWInst *AI);
				56	};
				57	}
				58
				59	char AtomicExpand::ID = 0;
				60	char &llvm::AtomicExpandID = AtomicExpand::ID;
				61	INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand",
				62	"Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg",
				63	false, false)
				64
				65	FunctionPass llvm::createAtomicExpandPass(const TargetMachine TM) {
				66	return new AtomicExpand(TM);
				67	}
				68
				69	bool AtomicExpand::runOnFunction(Function &F) {
				70	if (!TM \|\| !TM->getSubtargetImpl()->enableAtomicExpand())
				71	return false;
				72	auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
				73
				74	SmallVector<Instruction *, 1> AtomicInsts;
				75
				76	// Changing control-flow while iterating through it is a bad idea, so gather a
				77	// list of all atomic instructions before we start.
				78	for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
				79	if (I->isAtomic())
				80	AtomicInsts.push_back(&*I);
				81	}
				82
				83	bool MadeChange = false;
				84	for (auto I : AtomicInsts) {
				85	auto LI = dyn_cast<LoadInst>(I);
				86	auto SI = dyn_cast<StoreInst>(I);
				87	auto RMWI = dyn_cast<AtomicRMWInst>(I);
				88	auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
				89	assert((LI \|\| SI \|\| RMWI \|\| CASI \|\| isa<FenceInst>(I)) &&
				90	"Unknown atomic instruction");
				91
				92	auto FenceOrdering = Monotonic;
				93	bool IsStore, IsLoad;
				94	if (TargetLowering->getInsertFencesForAtomic()) {
				95	if (LI && isAtLeastAcquire(LI->getOrdering())) {
				96	FenceOrdering = LI->getOrdering();
				97	LI->setOrdering(Monotonic);
				98	IsStore = false;
				99	IsLoad = true;
				100	} else if (SI && isAtLeastRelease(SI->getOrdering())) {
				101	FenceOrdering = SI->getOrdering();
				102	SI->setOrdering(Monotonic);
				103	IsStore = true;
				104	IsLoad = false;
				105	} else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) \|\|
				106	isAtLeastAcquire(RMWI->getOrdering()))) {
				107	FenceOrdering = RMWI->getOrdering();
				108	RMWI->setOrdering(Monotonic);
				109	IsStore = IsLoad = true;
				110	} else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
				111	(isAtLeastRelease(CASI->getSuccessOrdering()) \|\|
				112	isAtLeastAcquire(CASI->getSuccessOrdering()))) {
				113	// If a compare and swap is lowered to LL/SC, we can do smarter fence
				114	// insertion, with a stronger one on the success path than on the
				115	// failure path. As a result, fence insertion is directly done by
				116	// expandAtomicCmpXchg in that case.
				117	FenceOrdering = CASI->getSuccessOrdering();
				118	CASI->setSuccessOrdering(Monotonic);
				119	CASI->setFailureOrdering(Monotonic);
				120	IsStore = IsLoad = true;
				121	}
				122
				123	if (FenceOrdering != Monotonic) {
				124	MadeChange \|= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
				125	}
				126	}
				127
				128	if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
				129	MadeChange \|= expandAtomicLoad(LI);
				130	} else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
				131	MadeChange \|= expandAtomicStore(SI);
				132	} else if (RMWI) {
				133	// There are two different ways of expanding RMW instructions:
				134	// - into a load if it is idempotent
				135	// - into a Cmpxchg/LL-SC loop otherwise
				136	// we try them in that order.
				137	MadeChange \|= (isIdempotentRMW(RMWI) &&
				138	simplifyIdempotentRMW(RMWI)) \|\|
				139	(TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
				140	expandAtomicRMW(RMWI));
				141	} else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
				142	MadeChange \|= expandAtomicCmpXchg(CASI);
				143	}
				144	}
				145	return MadeChange;
				146	}
				147
				148	bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
				149	bool IsStore, bool IsLoad) {
				150	IRBuilder<> Builder(I);
				151
				152	auto LeadingFence =
				153	TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
				154	Builder, Order, IsStore, IsLoad);
				155
				156	auto TrailingFence =
				157	TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
				158	Builder, Order, IsStore, IsLoad);
				159	// The trailing fence is emitted before the instruction instead of after
				160	// because there is no easy way of setting Builder insertion point after
				161	// an instruction. So we must erase it from the BB, and insert it back
				162	// in the right place.
				163	// We have a guard here because not every atomic operation generates a
				164	// trailing fence.
				165	if (TrailingFence) {
				166	TrailingFence->removeFromParent();
				167	TrailingFence->insertAfter(I);
				168	}
				169
				170	return (LeadingFence \|\| TrailingFence);
				171	}
				172
				173	bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
				174	if (TM->getSubtargetImpl()
				175	->getTargetLowering()
				176	->hasLoadLinkedStoreConditional())
				177	return expandAtomicLoadToLL(LI);
				178	else
				179	return expandAtomicLoadToCmpXchg(LI);
				180	}
				181
				182	bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
				183	auto TLI = TM->getSubtargetImpl()->getTargetLowering();
				184	IRBuilder<> Builder(LI);
				185
				186	// On some architectures, load-linked instructions are atomic for larger
				187	// sizes than normal loads. For example, the only 64-bit load guaranteed
				188	// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
				189	Value *Val =
				190	TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
				191
				192	LI->replaceAllUsesWith(Val);
				193	LI->eraseFromParent();
				194
				195	return true;
				196	}
				197
				198	bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
				199	IRBuilder<> Builder(LI);
				200	AtomicOrdering Order = LI->getOrdering();
				201	Value *Addr = LI->getPointerOperand();
				202	Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
				203	Constant *DummyVal = Constant::getNullValue(Ty);
				204
				205	Value *Pair = Builder.CreateAtomicCmpXchg(
				206	Addr, DummyVal, DummyVal, Order,
				207	AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
				208	Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
				209
				210	LI->replaceAllUsesWith(Loaded);
				211	LI->eraseFromParent();
				212
				213	return true;
				214	}
				215
				216	bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
				217	// This function is only called on atomic stores that are too large to be
				218	// atomic if implemented as a native store. So we replace them by an
				219	// atomic swap, that can be implemented for example as a ldrex/strex on ARM
				220	// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
				221	// It is the responsibility of the target to only return true in
				222	// shouldExpandAtomicRMW in cases where this is required and possible.
				223	IRBuilder<> Builder(SI);
				224	AtomicRMWInst *AI =
				225	Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
				226	SI->getValueOperand(), SI->getOrdering());
				227	SI->eraseFromParent();
				228
				229	// Now we have an appropriate swap instruction, lower it as usual.
				230	return expandAtomicRMW(AI);
				231	}
				232
				233	bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
				234	if (TM->getSubtargetImpl()
				235	->getTargetLowering()
				236	->hasLoadLinkedStoreConditional())
				237	return expandAtomicRMWToLLSC(AI);
				238	else
				239	return expandAtomicRMWToCmpXchg(AI);
				240	}
				241
				242	/// Emit IR to implement the given atomicrmw operation on values in registers,
				243	/// returning the new value.
				244	static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
				245	Value Loaded, Value Inc) {
				246	Value *NewVal;
				247	switch (Op) {
				248	case AtomicRMWInst::Xchg:
				249	return Inc;
				250	case AtomicRMWInst::Add:
				251	return Builder.CreateAdd(Loaded, Inc, "new");
				252	case AtomicRMWInst::Sub:
				253	return Builder.CreateSub(Loaded, Inc, "new");
				254	case AtomicRMWInst::And:
				255	return Builder.CreateAnd(Loaded, Inc, "new");
				256	case AtomicRMWInst::Nand:
				257	return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
				258	case AtomicRMWInst::Or:
				259	return Builder.CreateOr(Loaded, Inc, "new");
				260	case AtomicRMWInst::Xor:
				261	return Builder.CreateXor(Loaded, Inc, "new");
				262	case AtomicRMWInst::Max:
				263	NewVal = Builder.CreateICmpSGT(Loaded, Inc);
				264	return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
				265	case AtomicRMWInst::Min:
				266	NewVal = Builder.CreateICmpSLE(Loaded, Inc);
				267	return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
				268	case AtomicRMWInst::UMax:
				269	NewVal = Builder.CreateICmpUGT(Loaded, Inc);
				270	return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
				271	case AtomicRMWInst::UMin:
				272	NewVal = Builder.CreateICmpULE(Loaded, Inc);
				273	return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
				274	default:
				275	llvm_unreachable("Unknown atomic op");
				276	}
				277	}
				278
				279	bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
				280	auto TLI = TM->getSubtargetImpl()->getTargetLowering();
				281	AtomicOrdering MemOpOrder = AI->getOrdering();
				282	Value *Addr = AI->getPointerOperand();
				283	BasicBlock *BB = AI->getParent();
				284	Function *F = BB->getParent();
				285	LLVMContext &Ctx = F->getContext();
				286
				287	// Given: atomicrmw some_op iN* %addr, iN %incr ordering
				288	//
				289	// The standard expansion we produce is:
				290	// [...]
				291	// fence?
				292	// atomicrmw.start:
				293	// %loaded = @load.linked(%addr)
				294	// %new = some_op iN %loaded, %incr
				295	// %stored = @store_conditional(%new, %addr)
				296	// %try_again = icmp i32 ne %stored, 0
				297	// br i1 %try_again, label %loop, label %atomicrmw.end
				298	// atomicrmw.end:
				299	// fence?
				300	// [...]
				301	BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
				302	BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
				303
				304	// This grabs the DebugLoc from AI.
				305	IRBuilder<> Builder(AI);
				306
				307	// The split call above "helpfully" added a branch at the end of BB (to the
				308	// wrong place), but we might want a fence too. It's easiest to just remove
				309	// the branch entirely.
				310	std::prev(BB->end())->eraseFromParent();
				311	Builder.SetInsertPoint(BB);
				312	Builder.CreateBr(LoopBB);
				313
				314	// Start the main loop block now that we've taken care of the preliminaries.
				315	Builder.SetInsertPoint(LoopBB);
				316	Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
				317
				318	Value *NewVal =
				319	performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
				320
				321	Value *StoreSuccess =
				322	TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
				323	Value *TryAgain = Builder.CreateICmpNE(
				324	StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
				325	Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
				326
				327	Builder.SetInsertPoint(ExitBB, ExitBB->begin());
				328
				329	AI->replaceAllUsesWith(Loaded);
				330	AI->eraseFromParent();
				331
				332	return true;
				333	}
				334
				335	bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
				336	AtomicOrdering MemOpOrder =
				337	AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
				338	Value *Addr = AI->getPointerOperand();
				339	BasicBlock *BB = AI->getParent();
				340	Function *F = BB->getParent();
				341	LLVMContext &Ctx = F->getContext();
				342
				343	// Given: atomicrmw some_op iN* %addr, iN %incr ordering
				344	//
				345	// The standard expansion we produce is:
				346	// [...]
				347	// %init_loaded = load atomic iN* %addr
				348	// br label %loop
				349	// loop:
				350	// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
				351	// %new = some_op iN %loaded, %incr
				352	// %pair = cmpxchg iN* %addr, iN %loaded, iN %new
				353	// %new_loaded = extractvalue { iN, i1 } %pair, 0
				354	// %success = extractvalue { iN, i1 } %pair, 1
				355	// br i1 %success, label %atomicrmw.end, label %loop
				356	// atomicrmw.end:
				357	// [...]
				358	BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
				359	BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
				360
				361	// This grabs the DebugLoc from AI.
				362	IRBuilder<> Builder(AI);
				363
				364	// The split call above "helpfully" added a branch at the end of BB (to the
				365	// wrong place), but we want a load. It's easiest to just remove
				366	// the branch entirely.
				367	std::prev(BB->end())->eraseFromParent();
				368	Builder.SetInsertPoint(BB);
				369	LoadInst *InitLoaded = Builder.CreateLoad(Addr);
				370	// Atomics require at least natural alignment.
				371	InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
				372	Builder.CreateBr(LoopBB);
				373
				374	// Start the main loop block now that we've taken care of the preliminaries.
				375	Builder.SetInsertPoint(LoopBB);
				376	PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
				377	Loaded->addIncoming(InitLoaded, BB);
				378
				379	Value *NewVal =
				380	performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
				381
				382	Value *Pair = Builder.CreateAtomicCmpXchg(
				383	Addr, Loaded, NewVal, MemOpOrder,
				384	AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
				385	Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
				386	Loaded->addIncoming(NewLoaded, LoopBB);
				387
				388	Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
				389	Builder.CreateCondBr(Success, ExitBB, LoopBB);
				390
				391	Builder.SetInsertPoint(ExitBB, ExitBB->begin());
				392
				393	AI->replaceAllUsesWith(NewLoaded);
				394	AI->eraseFromParent();
				395
				396	return true;
				397	}
				398
				399	bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
				400	auto TLI = TM->getSubtargetImpl()->getTargetLowering();
				401	AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
				402	AtomicOrdering FailureOrder = CI->getFailureOrdering();
				403	Value *Addr = CI->getPointerOperand();
				404	BasicBlock *BB = CI->getParent();
				405	Function *F = BB->getParent();
				406	LLVMContext &Ctx = F->getContext();
				407	// If getInsertFencesForAtomic() returns true, then the target does not want
				408	// to deal with memory orders, and emitLeading/TrailingFence should take care
				409	// of everything. Otherwise, emitLeading/TrailingFence are no-op and we
				410	// should preserve the ordering.
				411	AtomicOrdering MemOpOrder =
				412	TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
				413
				414	// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
				415	//
				416	// The full expansion we produce is:
				417	// [...]
				418	// fence?
				419	// cmpxchg.start:
				420	// %loaded = @load.linked(%addr)
				421	// %should_store = icmp eq %loaded, %desired
				422	// br i1 %should_store, label %cmpxchg.trystore,
				423	// label %cmpxchg.failure
				424	// cmpxchg.trystore:
				425	// %stored = @store_conditional(%new, %addr)
				426	// %success = icmp eq i32 %stored, 0
				427	// br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
				428	// cmpxchg.success:
				429	// fence?
				430	// br label %cmpxchg.end
				431	// cmpxchg.failure:
				432	// fence?
				433	// br label %cmpxchg.end
				434	// cmpxchg.end:
				435	// %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
				436	// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
				437	// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
				438	// [...]
				439	BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
				440	auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
				441	auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
				442	auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
				443	auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
				444
				445	// This grabs the DebugLoc from CI
				446	IRBuilder<> Builder(CI);
				447
				448	// The split call above "helpfully" added a branch at the end of BB (to the
				449	// wrong place), but we might want a fence too. It's easiest to just remove
				450	// the branch entirely.
				451	std::prev(BB->end())->eraseFromParent();
				452	Builder.SetInsertPoint(BB);
				453	TLI->emitLeadingFence(Builder, SuccessOrder, /IsStore=/true,
				454	/IsLoad=/true);
				455	Builder.CreateBr(LoopBB);
				456
				457	// Start the main loop block now that we've taken care of the preliminaries.
				458	Builder.SetInsertPoint(LoopBB);
				459	Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
				460	Value *ShouldStore =
				461	Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
				462
				463	// If the the cmpxchg doesn't actually need any ordering when it fails, we can
				464	// jump straight past that fence instruction (if it exists).
				465	Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
				466
				467	Builder.SetInsertPoint(TryStoreBB);
				468	Value *StoreSuccess = TLI->emitStoreConditional(
				469	Builder, CI->getNewValOperand(), Addr, MemOpOrder);
				470	StoreSuccess = Builder.CreateICmpEQ(
				471	StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
				472	Builder.CreateCondBr(StoreSuccess, SuccessBB,
				473	CI->isWeak() ? FailureBB : LoopBB);
				474
				475	// Make sure later instructions don't get reordered with a fence if necessary.
				476	Builder.SetInsertPoint(SuccessBB);
				477	TLI->emitTrailingFence(Builder, SuccessOrder, /IsStore=/true,
				478	/IsLoad=/true);
				479	Builder.CreateBr(ExitBB);
				480
				481	Builder.SetInsertPoint(FailureBB);
				482	TLI->emitTrailingFence(Builder, FailureOrder, /IsStore=/true,
				483	/IsLoad=/true);
				484	Builder.CreateBr(ExitBB);
				485
				486	// Finally, we have control-flow based knowledge of whether the cmpxchg
				487	// succeeded or not. We expose this to later passes by converting any
				488	// subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
				489
				490	// Setup the builder so we can create any PHIs we need.
				491	Builder.SetInsertPoint(ExitBB, ExitBB->begin());
				492	PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
				493	Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
				494	Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
				495
				496	// Look for any users of the cmpxchg that are just comparing the loaded value
				497	// against the desired one, and replace them with the CFG-derived version.
				498	SmallVector<ExtractValueInst *, 2> PrunedInsts;
				499	for (auto User : CI->users()) {
				500	ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
				501	if (!EV)
				502	continue;
				503
				504	assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
				505	"weird extraction from { iN, i1 }");
				506
				507	if (EV->getIndices()[0] == 0)
				508	EV->replaceAllUsesWith(Loaded);
				509	else
				510	EV->replaceAllUsesWith(Success);
				511
				512	PrunedInsts.push_back(EV);
				513	}
				514
				515	// We can remove the instructions now we're no longer iterating through them.
				516	for (auto EV : PrunedInsts)
				517	EV->eraseFromParent();
				518
				519	if (!CI->use_empty()) {
				520	// Some use of the full struct return that we don't understand has happened,
				521	// so we've got to reconstruct it properly.
				522	Value *Res;
				523	Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
				524	Res = Builder.CreateInsertValue(Res, Success, 1);
				525
				526	CI->replaceAllUsesWith(Res);
				527	}
				528
				529	CI->eraseFromParent();
				530	return true;
				531	}
				532
				533	bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
				534	auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
				535	if(!C)
				536	return false;
				537
				538	AtomicRMWInst::BinOp Op = RMWI->getOperation();
				539	switch(Op) {
				540	case AtomicRMWInst::Add:
				541	case AtomicRMWInst::Sub:
				542	case AtomicRMWInst::Or:
				543	case AtomicRMWInst::Xor:
				544	return C->isZero();
				545	case AtomicRMWInst::And:
				546	return C->isMinusOne();
				547	// FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
				548	default:
				549	return false;
				550	}
				551	}
				552
				553	bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
				554	auto TLI = TM->getSubtargetImpl()->getTargetLowering();
				555
				556	if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
				557	if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
				558	expandAtomicLoad(ResultingLoad);
				559	return true;
				560	}
				561
				562	return false;
				563	}