Blame - llvm/lib/Target/X86/X86CmovConversion.cpp - toolchain/llvm-project

blob: da20c5f0abd7ce86ad8a4eaa40f54ed4e22a11a8 [file] [log] [blame]

Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	1	//====-- X86CmovConversion.cpp - Convert Cmov to Branch -------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file implements a pass that converts X86 cmov instructions into branch
				11	/// when profitable. This pass is conservative, i.e., it applies transformation
				12	/// if and only if it can gaurantee a gain with high confidence.
				13	///
				14	/// Thus, the optimization applies under the following conditions:
				15	/// 1. Consider as a candidate only CMOV in most inner loop, assuming that
				16	/// most hotspots are represented by these loops.
				17	/// 2. Given a group of CMOV instructions, that are using same EFLAGS def
				18	/// instruction:
				19	/// a. Consider them as candidates only if all have same code condition or
				20	/// opposite one, to prevent generating more than one conditional jump
				21	/// per EFLAGS def instruction.
				22	/// b. Consider them as candidates only if all are profitable to be
				23	/// converted, assuming that one bad conversion may casue a degradation.
				24	/// 3. Apply conversion only for loop that are found profitable and only for
				25	/// CMOV candidates that were found profitable.
				26	/// a. Loop is considered profitable only if conversion will reduce its
				27	/// depth cost by some thrishold.
				28	/// b. CMOV is considered profitable if the cost of its condition is higher
				29	/// than the average cost of its true-value and false-value by 25% of
				30	/// branch-misprediction-penalty, this to assure no degredassion even
				31	/// with 25% branch misprediction.
				32	///
				33	/// Note: This pass is assumed to run on SSA machine code.
				34	//===----------------------------------------------------------------------===//
				35	//
				36	// External interfaces:
				37	// FunctionPass *llvm::createX86CmovConverterPass();
				38	// bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
				39	//
				40
				41	#include "X86.h"
				42	#include "X86InstrInfo.h"
				43	#include "X86Subtarget.h"
				44	#include "llvm/ADT/Statistic.h"
				45	#include "llvm/CodeGen/MachineFunctionPass.h"
				46	#include "llvm/CodeGen/MachineInstrBuilder.h"
				47	#include "llvm/CodeGen/MachineLoopInfo.h"
				48	#include "llvm/CodeGen/MachineRegisterInfo.h"
				49	#include "llvm/CodeGen/Passes.h"
				50	#include "llvm/CodeGen/TargetSchedule.h"
				51	#include "llvm/IR/InstIterator.h"
				52	#include "llvm/Support/Debug.h"
				53	#include "llvm/Support/raw_ostream.h"
				54	using namespace llvm;
				55
				56	#define DEBUG_TYPE "x86-cmov-converter"
				57
				58	STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
				59	STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
				60	STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
				61	STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
				62
				63	namespace {
				64	// This internal switch can be used to turn off the cmov/branch optimization.
				65	static cl::opt<bool>
				66	EnableCmovConverter("x86-cmov-converter",
				67	cl::desc("Enable the X86 cmov-to-branch optimization."),
				68	cl::init(true), cl::Hidden);
				69
Amjad Aboud	6fa6813	2017-08-08 12:17:56 +0000	[diff] [blame]	70	static cl::opt<unsigned>
				71	GainCycleThreshold("x86-cmov-converter-threshold",
				72	cl::desc("Minimum gain per loop (in cycles) threshold."),
				73	cl::init(4), cl::Hidden);
				74
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	75	/// Converts X86 cmov instructions into branches when profitable.
				76	class X86CmovConverterPass : public MachineFunctionPass {
				77	public:
				78	X86CmovConverterPass() : MachineFunctionPass(ID) {}
				79	~X86CmovConverterPass() {}
				80
				81	StringRef getPassName() const override { return "X86 cmov Conversion"; }
				82	bool runOnMachineFunction(MachineFunction &MF) override;
				83	void getAnalysisUsage(AnalysisUsage &AU) const override;
				84
				85	private:
				86	/// Pass identification, replacement for typeid.
				87	static char ID;
				88
				89	const MachineRegisterInfo *MRI;
				90	const TargetInstrInfo *TII;
				91	TargetSchedModel TSchedModel;
				92
				93	/// List of consecutive CMOV instructions.
				94	typedef SmallVector<MachineInstr *, 2> CmovGroup;
				95	typedef SmallVector<CmovGroup, 2> CmovGroups;
				96
				97	/// Collect all CMOV-group-candidates in \p CurrLoop and update \p
				98	/// CmovInstGroups accordingly.
				99	///
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	100	/// \param Blocks List of blocks to process.
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	101	/// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
				102	/// \returns true iff it found any CMOV-group-candidate.
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	103	bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
				104	CmovGroups &CmovInstGroups);
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	105
				106	/// Check if it is profitable to transform each CMOV-group-candidates into
				107	/// branch. Remove all groups that are not profitable from \p CmovInstGroups.
				108	///
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	109	/// \param Blocks List of blocks to process.
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	110	/// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
				111	/// \returns true iff any CMOV-group-candidate remain.
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	112	bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	113	CmovGroups &CmovInstGroups);
				114
				115	/// Convert the given list of consecutive CMOV instructions into a branch.
				116	///
				117	/// \param Group Consecutive CMOV instructions to be converted into branch.
				118	void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
				119	};
				120
				121	char X86CmovConverterPass::ID = 0;
				122
				123	void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
				124	MachineFunctionPass::getAnalysisUsage(AU);
				125	AU.addRequired<MachineLoopInfo>();
				126	}
				127
				128	bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
				129	if (skipFunction(*MF.getFunction()))
				130	return false;
				131	if (!EnableCmovConverter)
				132	return false;
				133
				134	DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
				135	<< "**********\n");
				136
				137	bool Changed = false;
				138	MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
				139	const TargetSubtargetInfo &STI = MF.getSubtarget();
				140	MRI = &MF.getRegInfo();
				141	TII = STI.getInstrInfo();
				142	TSchedModel.init(STI.getSchedModel(), &STI, TII);
				143
				144	//===--------------------------------------------------------------------===//
				145	// Algorithm
				146	// ---------
				147	// For each inner most loop
				148	// collectCmovCandidates() {
				149	// Find all CMOV-group-candidates.
				150	// }
				151	//
				152	// checkForProfitableCmovCandidates() {
				153	// * Calculate both loop-depth and optimized-loop-depth.
				154	// * Use these depth to check for loop transformation profitability.
				155	// * Check for CMOV-group-candidate transformation profitability.
				156	// }
				157	//
				158	// For each profitable CMOV-group-candidate
				159	// convertCmovInstsToBranches() {
				160	// * Create FalseBB, SinkBB, Conditional branch to SinkBB.
				161	// * Replace each CMOV instruction with a PHI instruction in SinkBB.
				162	// }
				163	//
				164	// Note: For more details, see each function description.
				165	//===--------------------------------------------------------------------===//
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	166
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	167	// Build up the loops in pre-order.
				168	SmallVector<MachineLoop *, 4> Loops(MLI.begin(), MLI.end());
				169	// Note that we need to check size on each iteration as we accumulate child
				170	// loops.
				171	for (int i = 0; i < (int)Loops.size(); ++i)
				172	for (MachineLoop *Child : Loops[i]->getSubLoops())
				173	Loops.push_back(Child);
				174
				175	for (MachineLoop *CurrLoop : Loops) {
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	176	// Optimize only inner most loops.
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	177	if (!CurrLoop->getSubLoops().empty())
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	178	continue;
				179
				180	// List of consecutive CMOV instructions to be processed.
				181	CmovGroups CmovInstGroups;
				182
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	183	if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	184	continue;
				185
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	186	if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
				187	CmovInstGroups))
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	188	continue;
				189
				190	Changed = true;
				191	for (auto &Group : CmovInstGroups)
				192	convertCmovInstsToBranches(Group);
				193	}
				194	return Changed;
				195	}
				196
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	197	bool X86CmovConverterPass::collectCmovCandidates(
				198	ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	199	//===--------------------------------------------------------------------===//
				200	// Collect all CMOV-group-candidates and add them into CmovInstGroups.
				201	//
				202	// CMOV-group:
				203	// CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
				204	//
				205	// CMOV-group-candidate:
				206	// CMOV-group where all the CMOV instructions are
				207	// 1. consecutive.
				208	// 2. have same condition code or opposite one.
				209	// 3. have only operand registers (X86::CMOVrr).
				210	//===--------------------------------------------------------------------===//
				211	// List of possible improvement (TODO's):
				212	// --------------------------------------
				213	// TODO: Add support for X86::CMOVrm instructions.
				214	// TODO: Add support for X86::SETcc instructions.
				215	// TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
				216	//===--------------------------------------------------------------------===//
				217
				218	// Current processed CMOV-Group.
				219	CmovGroup Group;
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	220	for (auto *MBB : Blocks) {
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	221	Group.clear();
				222	// Condition code of first CMOV instruction current processed range and its
				223	// opposite condition code.
				224	X86::CondCode FirstCC, FirstOppCC;
				225	// Indicator of a non CMOVrr instruction in the current processed range.
				226	bool FoundNonCMOVInst = false;
				227	// Indicator for current processed CMOV-group if it should be skipped.
				228	bool SkipGroup = false;
				229
				230	for (auto &I : *MBB) {
				231	X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode());
				232	// Check if we found a X86::CMOVrr instruction.
				233	if (CC != X86::COND_INVALID && !I.mayLoad()) {
				234	if (Group.empty()) {
				235	// We found first CMOV in the range, reset flags.
				236	FirstCC = CC;
				237	FirstOppCC = X86::GetOppositeBranchCondition(CC);
				238	FoundNonCMOVInst = false;
				239	SkipGroup = false;
				240	}
				241	Group.push_back(&I);
				242	// Check if it is a non-consecutive CMOV instruction or it has different
				243	// condition code than FirstCC or FirstOppCC.
				244	if (FoundNonCMOVInst \|\| (CC != FirstCC && CC != FirstOppCC))
				245	// Mark the SKipGroup indicator to skip current processed CMOV-Group.
				246	SkipGroup = true;
				247	continue;
				248	}
				249	// If Group is empty, keep looking for first CMOV in the range.
				250	if (Group.empty())
				251	continue;
				252
				253	// We found a non X86::CMOVrr instruction.
				254	FoundNonCMOVInst = true;
				255	// Check if this instruction define EFLAGS, to determine end of processed
				256	// range, as there would be no more instructions using current EFLAGS def.
				257	if (I.definesRegister(X86::EFLAGS)) {
				258	// Check if current processed CMOV-group should not be skipped and add
				259	// it as a CMOV-group-candidate.
				260	if (!SkipGroup)
				261	CmovInstGroups.push_back(Group);
				262	else
				263	++NumOfSkippedCmovGroups;
				264	Group.clear();
				265	}
				266	}
				267	// End of basic block is considered end of range, check if current processed
				268	// CMOV-group should not be skipped and add it as a CMOV-group-candidate.
				269	if (Group.empty())
				270	continue;
				271	if (!SkipGroup)
				272	CmovInstGroups.push_back(Group);
				273	else
				274	++NumOfSkippedCmovGroups;
				275	}
				276
				277	NumOfCmovGroupCandidate += CmovInstGroups.size();
				278	return !CmovInstGroups.empty();
				279	}
				280
				281	/// \returns Depth of CMOV instruction as if it was converted into branch.
				282	/// \param TrueOpDepth depth cost of CMOV true value operand.
				283	/// \param FalseOpDepth depth cost of CMOV false value operand.
				284	static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
				285	//===--------------------------------------------------------------------===//
				286	// With no info about branch weight, we assume 50% for each value operand.
				287	// Thus, depth of optimized CMOV instruction is the rounded up average of
				288	// its True-Operand-Value-Depth and False-Operand-Value-Depth.
				289	//===--------------------------------------------------------------------===//
				290	return (TrueOpDepth + FalseOpDepth + 1) / 2;
				291	}
				292
				293	bool X86CmovConverterPass::checkForProfitableCmovCandidates(
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	294	ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	295	struct DepthInfo {
				296	/// Depth of original loop.
				297	unsigned Depth;
				298	/// Depth of optimized loop.
				299	unsigned OptDepth;
				300	};
				301	/// Number of loop iterations to calculate depth for ?!
				302	static const unsigned LoopIterations = 2;
				303	DenseMap<MachineInstr *, DepthInfo> DepthMap;
				304	DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
				305	enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
				306	/// For each register type maps the register to its last def instruction.
				307	DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
				308	/// Maps register operand to its def instruction, which can be nullptr if it
				309	/// is unknown (e.g., operand is defined outside the loop).
				310	DenseMap<MachineOperand , MachineInstr > OperandToDefMap;
				311
				312	// Set depth of unknown instruction (i.e., nullptr) to zero.
				313	DepthMap[nullptr] = {0, 0};
				314
				315	SmallPtrSet<MachineInstr *, 4> CmovInstructions;
				316	for (auto &Group : CmovInstGroups)
				317	CmovInstructions.insert(Group.begin(), Group.end());
				318
				319	//===--------------------------------------------------------------------===//
				320	// Step 1: Calculate instruction depth and loop depth.
				321	// Optimized-Loop:
				322	// loop with CMOV-group-candidates converted into branches.
				323	//
				324	// Instruction-Depth:
				325	// instruction latency + max operand depth.
				326	// * For CMOV instruction in optimized loop the depth is calculated as:
				327	// CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
				328	// TODO: Find a better way to estimate the latency of the branch instruction
				329	// rather than using the CMOV latency.
				330	//
				331	// Loop-Depth:
				332	// max instruction depth of all instructions in the loop.
				333	// Note: instruction with max depth represents the critical-path in the loop.
				334	//
				335	// Loop-Depth[i]:
				336	// Loop-Depth calculated for first `i` iterations.
				337	// Note: it is enough to calculate depth for up to two iterations.
				338	//
				339	// Depth-Diff[i]:
				340	// Number of cycles saved in first 'i` iterations by optimizing the loop.
				341	//===--------------------------------------------------------------------===//
				342	for (unsigned I = 0; I < LoopIterations; ++I) {
				343	DepthInfo &MaxDepth = LoopDepth[I];
Chandler Carruth	e3b3547	2017-08-19 04:28:20 +0000	[diff] [blame^]	344	for (auto *MBB : Blocks) {
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	345	// Clear physical registers Def map.
				346	RegDefMaps[PhyRegType].clear();
				347	for (MachineInstr &MI : *MBB) {
				348	unsigned MIDepth = 0;
				349	unsigned MIDepthOpt = 0;
				350	bool IsCMOV = CmovInstructions.count(&MI);
				351	for (auto &MO : MI.uses()) {
				352	// Checks for "isUse()" as "uses()" returns also implicit definitions.
				353	if (!MO.isReg() \|\| !MO.isUse())
				354	continue;
				355	unsigned Reg = MO.getReg();
				356	auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)];
				357	if (MachineInstr *DefMI = RDM.lookup(Reg)) {
				358	OperandToDefMap[&MO] = DefMI;
				359	DepthInfo Info = DepthMap.lookup(DefMI);
				360	MIDepth = std::max(MIDepth, Info.Depth);
				361	if (!IsCMOV)
				362	MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
				363	}
				364	}
				365
				366	if (IsCMOV)
				367	MIDepthOpt = getDepthOfOptCmov(
				368	DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
				369	DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
				370
				371	// Iterates over all operands to handle implicit definitions as well.
				372	for (auto &MO : MI.operands()) {
				373	if (!MO.isReg() \|\| !MO.isDef())
				374	continue;
				375	unsigned Reg = MO.getReg();
				376	RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI;
				377	}
				378
				379	unsigned Latency = TSchedModel.computeInstrLatency(&MI);
				380	DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
				381	MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
				382	MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
				383	}
				384	}
				385	}
				386
				387	unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
				388	LoopDepth[1].Depth - LoopDepth[1].OptDepth};
				389
				390	//===--------------------------------------------------------------------===//
				391	// Step 2: Check if Loop worth to be optimized.
				392	// Worth-Optimize-Loop:
				393	// case 1: Diff[1] == Diff[0]
				394	// Critical-path is iteration independent - there is no dependency
				395	// of critical-path instructions on critical-path instructions of
				396	// previous iteration.
				397	// Thus, it is enough to check gain percent of 1st iteration -
				398	// To be conservative, the optimized loop need to have a depth of
				399	// 12.5% cycles less than original loop, per iteration.
				400	//
				401	// case 2: Diff[1] > Diff[0]
				402	// Critical-path is iteration dependent - there is dependency of
				403	// critical-path instructions on critical-path instructions of
				404	// previous iteration.
Amjad Aboud	6fa6813	2017-08-08 12:17:56 +0000	[diff] [blame]	405	// Thus, check the gain percent of the 2nd iteration (similar to the
				406	// previous case), but it is also required to check the gradient of
				407	// the gain - the change in Depth-Diff compared to the change in
				408	// Loop-Depth between 1st and 2nd iterations.
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	409	// To be conservative, the gradient need to be at least 50%.
				410	//
Amjad Aboud	6fa6813	2017-08-08 12:17:56 +0000	[diff] [blame]	411	// In addition, In order not to optimize loops with very small gain, the
				412	// gain (in cycles) after 2nd iteration should not be less than a given
				413	// threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
				414	//
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	415	// If loop is not worth optimizing, remove all CMOV-group-candidates.
				416	//===--------------------------------------------------------------------===//
Amjad Aboud	6fa6813	2017-08-08 12:17:56 +0000	[diff] [blame]	417	if (Diff[1] < GainCycleThreshold)
				418	return false;
				419
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	420	bool WorthOptLoop = false;
				421	if (Diff[1] == Diff[0])
				422	WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
				423	else if (Diff[1] > Diff[0])
				424	WorthOptLoop =
Amjad Aboud	6fa6813	2017-08-08 12:17:56 +0000	[diff] [blame]	425	(Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
				426	(Diff[1] * 8 >= LoopDepth[1].Depth);
Amjad Aboud	4563c06	2017-07-16 17:39:56 +0000	[diff] [blame]	427
				428	if (!WorthOptLoop)
				429	return false;
				430
				431	++NumOfLoopCandidate;
				432
				433	//===--------------------------------------------------------------------===//
				434	// Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
				435	// Worth-Optimize-Group:
				436	// Iff it worths to optimize all CMOV instructions in the group.
				437	//
				438	// Worth-Optimize-CMOV:
				439	// Predicted branch is faster than CMOV by the difference between depth of
				440	// condition operand and depth of taken (predicted) value operand.
				441	// To be conservative, the gain of such CMOV transformation should cover at
				442	// at least 25% of branch-misprediction-penalty.
				443	//===--------------------------------------------------------------------===//
				444	unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
				445	CmovGroups TempGroups;
				446	std::swap(TempGroups, CmovInstGroups);
				447	for (auto &Group : TempGroups) {
				448	bool WorthOpGroup = true;
				449	for (auto *MI : Group) {
				450	// Avoid CMOV instruction which value is used as a pointer to load from.
				451	// This is another conservative check to avoid converting CMOV instruction
				452	// used with tree-search like algorithm, where the branch is unpredicted.
				453	auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
				454	if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) {
				455	unsigned Op = UIs.begin()->getOpcode();
				456	if (Op == X86::MOV64rm \|\| Op == X86::MOV32rm) {
				457	WorthOpGroup = false;
				458	break;
				459	}
				460	}
				461
				462	unsigned CondCost =
				463	DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth;
				464	unsigned ValCost = getDepthOfOptCmov(
				465	DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
				466	DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
				467	if (ValCost > CondCost \|\| (CondCost - ValCost) * 4 < MispredictPenalty) {
				468	WorthOpGroup = false;
				469	break;
				470	}
				471	}
				472
				473	if (WorthOpGroup)
				474	CmovInstGroups.push_back(Group);
				475	}
				476
				477	return !CmovInstGroups.empty();
				478	}
				479
				480	static bool checkEFLAGSLive(MachineInstr *MI) {
				481	if (MI->killsRegister(X86::EFLAGS))
				482	return false;
				483
				484	// The EFLAGS operand of MI might be missing a kill marker.
				485	// Figure out whether EFLAGS operand should LIVE after MI instruction.
				486	MachineBasicBlock *BB = MI->getParent();
				487	MachineBasicBlock::iterator ItrMI = MI;
				488
				489	// Scan forward through BB for a use/def of EFLAGS.
				490	for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
				491	if (I->readsRegister(X86::EFLAGS))
				492	return true;
				493	if (I->definesRegister(X86::EFLAGS))
				494	return false;
				495	}
				496
				497	// We hit the end of the block, check whether EFLAGS is live into a successor.
				498	for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
				499	if ((*I)->isLiveIn(X86::EFLAGS))
				500	return true;
				501	}
				502
				503	return false;
				504	}
				505
				506	void X86CmovConverterPass::convertCmovInstsToBranches(
				507	SmallVectorImpl<MachineInstr *> &Group) const {
				508	assert(!Group.empty() && "No CMOV instructions to convert");
				509	++NumOfOptimizedCmovGroups;
				510
				511	// To convert a CMOVcc instruction, we actually have to insert the diamond
				512	// control-flow pattern. The incoming instruction knows the destination vreg
				513	// to set, the condition code register to branch on, the true/false values to
				514	// select between, and a branch opcode to use.
				515
				516	// Before
				517	// -----
				518	// MBB:
				519	// cond = cmp ...
				520	// v1 = CMOVge t1, f1, cond
				521	// v2 = CMOVlt t2, f2, cond
				522	// v3 = CMOVge v1, f3, cond
				523	//
				524	// After
				525	// -----
				526	// MBB:
				527	// cond = cmp ...
				528	// jge %SinkMBB
				529	//
				530	// FalseMBB:
				531	// jmp %SinkMBB
				532	//
				533	// SinkMBB:
				534	// %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
				535	// %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
				536	// ; true-value with false-value
				537	// %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
				538	// ; previous Phi instruction result
				539
				540	MachineInstr &MI = *Group.front();
				541	MachineInstr *LastCMOV = Group.back();
				542	DebugLoc DL = MI.getDebugLoc();
				543	X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode()));
				544	X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
				545	MachineBasicBlock *MBB = MI.getParent();
				546	MachineFunction::iterator It = ++MBB->getIterator();
				547	MachineFunction *F = MBB->getParent();
				548	const BasicBlock *BB = MBB->getBasicBlock();
				549
				550	MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
				551	MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
				552	F->insert(It, FalseMBB);
				553	F->insert(It, SinkMBB);
				554
				555	// If the EFLAGS register isn't dead in the terminator, then claim that it's
				556	// live into the sink and copy blocks.
				557	if (checkEFLAGSLive(LastCMOV)) {
				558	FalseMBB->addLiveIn(X86::EFLAGS);
				559	SinkMBB->addLiveIn(X86::EFLAGS);
				560	}
				561
				562	// Transfer the remainder of BB and its successor edges to SinkMBB.
				563	SinkMBB->splice(SinkMBB->begin(), MBB,
				564	std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
				565	SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
				566
				567	// Add the false and sink blocks as its successors.
				568	MBB->addSuccessor(FalseMBB);
				569	MBB->addSuccessor(SinkMBB);
				570
				571	// Create the conditional branch instruction.
				572	BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB);
				573
				574	// Add the sink block to the false block successors.
				575	FalseMBB->addSuccessor(SinkMBB);
				576
				577	MachineInstrBuilder MIB;
				578	MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
				579	MachineBasicBlock::iterator MIItEnd =
				580	std::next(MachineBasicBlock::iterator(LastCMOV));
				581	MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
				582	// As we are creating the PHIs, we have to be careful if there is more than
				583	// one. Later CMOVs may reference the results of earlier CMOVs, but later
				584	// PHIs have to reference the individual true/false inputs from earlier PHIs.
				585	// That also means that PHI construction must work forward from earlier to
				586	// later, and that the code must maintain a mapping from earlier PHI's
				587	// destination registers, and the registers that went into the PHI.
				588	DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
				589
				590	for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
				591	unsigned DestReg = MIIt->getOperand(0).getReg();
				592	unsigned Op1Reg = MIIt->getOperand(1).getReg();
				593	unsigned Op2Reg = MIIt->getOperand(2).getReg();
				594
				595	// If this CMOV we are processing is the opposite condition from the jump we
				596	// generated, then we have to swap the operands for the PHI that is going to
				597	// be generated.
				598	if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC)
				599	std::swap(Op1Reg, Op2Reg);
				600
				601	auto Op1Itr = RegRewriteTable.find(Op1Reg);
				602	if (Op1Itr != RegRewriteTable.end())
				603	Op1Reg = Op1Itr->second.first;
				604
				605	auto Op2Itr = RegRewriteTable.find(Op2Reg);
				606	if (Op2Itr != RegRewriteTable.end())
				607	Op2Reg = Op2Itr->second.second;
				608
				609	// SinkMBB:
				610	// %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
				611	// ...
				612	MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
				613	.addReg(Op1Reg)
				614	.addMBB(FalseMBB)
				615	.addReg(Op2Reg)
				616	.addMBB(MBB);
				617	(void)MIB;
				618	DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
				619	DEBUG(dbgs() << "\tTo: "; MIB->dump());
				620
				621	// Add this PHI to the rewrite table.
				622	RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
				623	}
				624
				625	// Now remove the CMOV(s).
				626	MBB->erase(MIItBegin, MIItEnd);
				627	}
				628
				629	} // End anonymous namespace.
				630
				631	FunctionPass *llvm::createX86CmovConverterPass() {
				632	return new X86CmovConverterPass();
				633	}