Blame - lib/Analysis/DataStructure/Parallelize.cpp - fp2-dev/platform/external/llvm

blob: 562a263e29aee3ad254df9987c978aaf5594a102 [file] [log] [blame]

Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	1	//===- Parallelize.cpp - Auto parallelization using DS Graphs -------------===//
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	2	//
				3	// This file implements a pass that automatically parallelizes a program,
				4	// using the Cilk multi-threaded runtime system to execute parallel code.
				5	//
				6	// The pass uses the Program Dependence Graph (class PDGIterator) to
				7	// identify parallelizable function calls, i.e., calls whose instances
				8	// can be executed in parallel with instances of other function calls.
				9	// (In the future, this should also execute different instances of the same
				10	// function call in parallel, but that requires parallelizing across
				11	// loop iterations.)
				12	//
				13	// The output of the pass is LLVM code with:
				14	// (1) all parallelizable functions renamed to flag them as parallelizable;
				15	// (2) calls to a sync() function introduced at synchronization points.
				16	// The CWriter recognizes these functions and inserts the appropriate Cilk
				17	// keywords when writing out C code. This C code must be compiled with cilk2c.
				18	//
				19	// Current algorithmic limitations:
				20	// -- no array dependence analysis
				21	// -- no parallelization for function calls in different loop iterations
				22	// (except in unlikely trivial cases)
				23	//
				24	// Limitations of using Cilk:
				25	// -- No parallelism within a function body, e.g., in a loop;
				26	// -- Simplistic synchronization model requiring all parallel threads
				27	// created within a function to block at a sync().
				28	// -- Excessive overhead at "spawned" function calls, which has no benefit
				29	// once all threads are busy (especially common when the degree of
				30	// parallelism is low).
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	31	//
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	32	//===----------------------------------------------------------------------===//
				33
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	34	#include "llvm/Transforms/Utils/DemoteRegToStack.h"
				35	#include "llvm/Analysis/PgmDependenceGraph.h"
				36	#include "llvm/Analysis/Dominators.h"
				37	#include "llvm/Analysis/DataStructure.h"
				38	#include "llvm/Analysis/DSGraph.h"
				39	#include "llvm/Module.h"
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	40	#include "llvm/Instructions.h"
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	41	#include "llvm/iTerminators.h"
				42	#include "llvm/DerivedTypes.h"
				43	#include "llvm/Support/InstVisitor.h"
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	44	#include "Support/Statistic.h"
				45	#include "Support/STLExtras.h"
				46	#include "Support/hash_set"
				47	#include "Support/hash_map"
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	48	#include <functional>
				49	#include <algorithm>
				50
				51
				52
				53	#if 0
				54	void AddToDomSet(vector<BasicBlock>& domSet, BasicBlock bb,
				55	const DominatorTree& domTree)
				56	{
				57	DominatorTreeBase::Node* bbNode = domTree.getNode(bb);
				58	const std::vector<Node*>& domKids = bbNode.getChildren();
				59	domSet.insert(domSet.end(), domKids.begin(), domKids.end());
				60	for (unsigned i = 0; i < domKids.size(); ++i)
				61	AddToDomSet(domSet, domKids[i]->getNode(), domTree);
				62	}
				63
				64	bool CheckDominance(Function& func,
				65	const CallInst& callInst1,
				66	const CallInst& callInst2)
				67	{
				68	if (callInst1 == callInst2) // makes sense if this is in a loop but
				69	return false; // we're not handling loops yet
				70
				71	// Check first if one call dominates the other
				72	DominatorSet& domSet = getAnalysis<DominatorSet>(func);
				73	if (domSet.dominates(callInst2, callInst1))
				74	{ // swap callInst1 and callInst2
				75	const CallInst& tmp = callInst2; callInst2 = callInst1; callInst1 = tmp;
				76	}
				77	else if (! domSet.dominates(callInst1, callInst2))
				78	return false; // neither dominates the other:
				79
				80	//
				81	if (! AreIndependent(func, callInst1, callInst2))
				82	return false;
				83	}
				84
				85	#endif
				86
				87
				88	//----------------------------------------------------------------------------
Chris Lattner	09a6705	2003-09-01 16:49:38 +0000	[diff] [blame^]	89	// Global constants used in marking Cilk functions and function calls.
				90	//----------------------------------------------------------------------------
				91
				92	static const char * const CilkSuffix = ".llvm2cilk";
				93	static const char * const DummySyncFuncName = "__sync.llvm2cilk";
				94
				95	//----------------------------------------------------------------------------
				96	// Routines to identify Cilk functions, calls to Cilk functions, and syncs.
				97	//----------------------------------------------------------------------------
				98
				99	static bool isCilk(const Function& F) {
				100	return (F.getName().rfind(CilkSuffix) ==
				101	F.getName().size() - std::strlen(CilkSuffix));
				102	}
				103
				104	static bool isCilkMain(const Function& F) {
				105	return F.getName() == "main" + std::string(CilkSuffix);
				106	}
				107
				108
				109	static bool isCilk(const CallInst& CI) {
				110	return CI.getCalledFunction() && isCilk(*CI.getCalledFunction());
				111	}
				112
				113	static bool isSync(const CallInst& CI) {
				114	return CI.getCalledFunction() &&
				115	CI.getCalledFunction()->getName() == DummySyncFuncName;
				116	}
				117
				118
				119	//----------------------------------------------------------------------------
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	120	// class Cilkifier
				121	//
				122	// Code generation pass that transforms code to identify where Cilk keywords
				123	// should be inserted. This relies on dis -c to print out the keywords.
				124	//----------------------------------------------------------------------------
				125
				126
				127	class Cilkifier: public InstVisitor<Cilkifier>
				128	{
				129	Function* DummySyncFunc;
				130
				131	// Data used when transforming each function.
				132	hash_set<const Instruction*> stmtsVisited; // Flags for recursive DFS
				133	hash_map<const CallInst, hash_set<CallInst> > spawnToSyncsMap;
				134
				135	// Input data for the transformation.
				136	const hash_set<Function> cilkFunctions; // Set of parallel functions
				137	PgmDependenceGraph* depGraph;
				138
				139	void DFSVisitInstr (Instruction* I,
				140	Instruction* root,
				141	hash_set<const Instruction*>& depsOfRoot);
				142
				143	public:
				144	/ctor/ Cilkifier (Module& M);
				145
				146	// Transform a single function including its name, its call sites, and syncs
				147	//
				148	void TransformFunc (Function* F,
				149	const hash_set<Function*>& cilkFunctions,
				150	PgmDependenceGraph& _depGraph);
				151
				152	// The visitor function that does most of the hard work, via DFSVisitInstr
				153	//
				154	void visitCallInst(CallInst& CI);
				155	};
				156
				157
				158	Cilkifier::Cilkifier(Module& M)
				159	{
				160	// create the dummy Sync function and add it to the Module
				161	DummySyncFunc = new Function(FunctionType::get( Type::VoidTy,
				162	std::vector<const Type*>(),
				163	/isVararg/ false),
Chris Lattner	4ad02e7	2003-04-16 20:28:45 +0000	[diff] [blame]	164	GlobalValue::ExternalLinkage, DummySyncFuncName,
				165	&M);
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	166	}
				167
				168	void Cilkifier::TransformFunc(Function* F,
				169	const hash_set<Function*>& _cilkFunctions,
				170	PgmDependenceGraph& _depGraph)
				171	{
				172	// Memoize the information for this function
				173	cilkFunctions = &_cilkFunctions;
				174	depGraph = &_depGraph;
				175
				176	// Add the marker suffix to the Function name
				177	// This should automatically mark all calls to the function also!
				178	F->setName(F->getName() + CilkSuffix);
				179
				180	// Insert sync operations for each separate spawn
				181	visit(*F);
				182
				183	// Now traverse the CFG in rPostorder and eliminate redundant syncs, i.e.,
				184	// two consecutive sync's on a straight-line path with no intervening spawn.
				185
				186	}
				187
				188
				189	void Cilkifier::DFSVisitInstr(Instruction* I,
				190	Instruction* root,
				191	hash_set<const Instruction*>& depsOfRoot)
				192	{
				193	assert(stmtsVisited.find(I) == stmtsVisited.end());
				194	stmtsVisited.insert(I);
				195
				196	// If there is a dependence from root to I, insert Sync and return
				197	if (depsOfRoot.find(I) != depsOfRoot.end())
				198	{ // Insert a sync before I and stop searching along this path.
				199	// If I is a Phi instruction, the dependence can only be an SSA dep.
				200	// and we need to insert the sync in the predecessor on the appropriate
				201	// incoming edge!
				202	CallInst* syncI = 0;
				203	if (PHINode* phiI = dyn_cast<PHINode>(I))
				204	{ // check all operands of the Phi and insert before each one
				205	for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
				206	if (phiI->getIncomingValue(i) == root)
				207	syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
				208	phiI->getIncomingBlock(i)->getTerminator());
				209	}
				210	else
				211	syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
				212
				213	// Remember the sync for each spawn to eliminate rendundant ones later
				214	spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
				215
				216	return;
				217	}
				218
				219	// else visit unvisited successors
				220	if (BranchInst* brI = dyn_cast<BranchInst>(I))
				221	{ // visit first instruction in each successor BB
				222	for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
				223	if (stmtsVisited.find(&brI->getSuccessor(i)->front())
				224	== stmtsVisited.end())
				225	DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
				226	}
				227	else
				228	if (Instruction* nextI = I->getNext())
				229	if (stmtsVisited.find(nextI) == stmtsVisited.end())
				230	DFSVisitInstr(nextI, root, depsOfRoot);
				231	}
				232
				233
				234	void Cilkifier::visitCallInst(CallInst& CI)
				235	{
				236	assert(CI.getCalledFunction() != 0 && "Only direct calls can be spawned.");
				237	if (cilkFunctions->find(CI.getCalledFunction()) == cilkFunctions->end())
				238	return; // not a spawn
				239
				240	// Find all the outgoing memory dependences.
				241	hash_set<const Instruction*> depsOfRoot;
				242	for (PgmDependenceGraph::iterator DI =
				243	depGraph->outDepBegin(CI, MemoryDeps); ! DI.fini(); ++DI)
				244	depsOfRoot.insert(&DI->getSink()->getInstr());
				245
				246	// Now find all outgoing SSA dependences to the eventual non-Phi users of
				247	// the call value (i.e., direct users that are not phis, and for any
				248	// user that is a Phi, direct non-Phi users of that Phi, and recursively).
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	249	std::vector<const PHINode*> phiUsers;
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	250	hash_set<const PHINode*> phisSeen; // ensures we don't visit a phi twice
				251	for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
				252	if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
				253	{
				254	if (phisSeen.find(phiUser) == phisSeen.end())
				255	{
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	256	phiUsers.push_back(phiUser);
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	257	phisSeen.insert(phiUser);
				258	}
				259	}
				260	else
				261	depsOfRoot.insert(cast<Instruction>(*UI));
				262
				263	// Now we've found the non-Phi users and immediate phi users.
				264	// Recursively walk the phi users and add their non-phi users.
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	265	for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	266	{
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	267	phiUser = phiUsers.back();
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	268	for (Value::use_const_iterator UI=phiUser->use_begin(),
				269	UE=phiUser->use_end(); UI != UE; ++UI)
				270	if (const PHINode* pn = dyn_cast<PHINode>(*UI))
				271	{
				272	if (phisSeen.find(pn) == phisSeen.end())
				273	{
Chris Lattner	aa92145	2003-09-01 16:42:16 +0000	[diff] [blame]	274	phiUsers.push_back(pn);
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	275	phisSeen.insert(pn);
				276	}
				277	}
				278	else
				279	depsOfRoot.insert(cast<Instruction>(*UI));
				280	}
				281
				282	// Walk paths of the CFG starting at the call instruction and insert
				283	// one sync before the first dependence on each path, if any.
				284	if (! depsOfRoot.empty())
				285	{
				286	stmtsVisited.clear(); // start a new DFS for this CallInst
				287	assert(CI.getNext() && "Call instruction cannot be a terminator!");
				288	DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
				289	}
				290
				291	// Now, eliminate all users of the SSA value of the CallInst, i.e.,
				292	// if the call instruction returns a value, delete the return value
				293	// register and replace it by a stack slot.
				294	if (CI.getType() != Type::VoidTy)
				295	DemoteRegToStack(CI);
				296	}
				297
				298
				299	//----------------------------------------------------------------------------
				300	// class FindParallelCalls
				301	//
				302	// Find all CallInst instructions that have at least one other CallInst
				303	// that is independent. These are the instructions that can produce
				304	// useful parallelism.
				305	//----------------------------------------------------------------------------
				306
Chris Lattner	8043127	2003-08-06 17:16:24 +0000	[diff] [blame]	307	class FindParallelCalls : public InstVisitor<FindParallelCalls> {
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	308	typedef hash_set<CallInst*> DependentsSet;
				309	typedef DependentsSet::iterator Dependents_iterator;
				310	typedef DependentsSet::const_iterator Dependents_const_iterator;
				311
				312	PgmDependenceGraph& depGraph; // dependence graph for the function
				313	hash_set<Instruction*> stmtsVisited; // flags for DFS walk of depGraph
				314	hash_map<CallInst*, bool > completed; // flags marking if a CI is done
				315	hash_map<CallInst*, DependentsSet> dependents; // dependent CIs for each CI
				316
				317	void VisitOutEdges(Instruction* I,
				318	CallInst* root,
				319	DependentsSet& depsOfRoot);
				320
Chris Lattner	8043127	2003-08-06 17:16:24 +0000	[diff] [blame]	321	FindParallelCalls(const FindParallelCalls &); // DO NOT IMPLEMENT
				322	void operator=(const FindParallelCalls&); // DO NOT IMPLEMENT
Vikram S. Adve	e12c74c	2002-12-10 00:43:34 +0000	[diff] [blame]	323	public:
				324	std::vector<CallInst*> parallelCalls;
				325
				326	public:
				327	/ctor/ FindParallelCalls (Function& F, PgmDependenceGraph& DG);
				328	void visitCallInst (CallInst& CI);
				329	};
				330
				331
				332	FindParallelCalls::FindParallelCalls(Function& F,
				333	PgmDependenceGraph& DG)
				334	: depGraph(DG)
				335	{
				336	// Find all CallInsts reachable from each CallInst using a recursive DFS
				337	visit(F);
				338
				339	// Now we've found all CallInsts reachable from each CallInst.
				340	// Find those CallInsts that are parallel with at least one other CallInst
				341	// by counting total inEdges and outEdges.
				342	//
				343	unsigned long totalNumCalls = completed.size();
				344
				345	if (totalNumCalls == 1)
				346	{ // Check first for the special case of a single call instruction not
				347	// in any loop. It is not parallel, even if it has no dependences
				348	// (this is why it is a special case).
				349	//
				350	// FIXME:
				351	// THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
				352	// PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
				353	//
				354	return;
				355	}
				356
				357	hash_map<CallInst*, unsigned long> numDeps;
				358	for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
				359	IE = dependents.end(); II != IE; ++II)
				360	{
				361	CallInst* fromCI = II->first;
				362	numDeps[fromCI] += II->second.size();
				363	for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
				364	DI != DE; ++DI)
				365	numDeps[DI]++; // DI can be reached from II->first
				366	}
				367
				368	for (hash_map<CallInst*, DependentsSet>::iterator
				369	II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
				370
				371	// FIXME: Remove "- 1" when considering parallelism in loops
				372	if (numDeps[II->first] < totalNumCalls - 1)
				373	parallelCalls.push_back(II->first);
				374	}
				375
				376
				377	void FindParallelCalls::VisitOutEdges(Instruction* I,
				378	CallInst* root,
				379	DependentsSet& depsOfRoot)
				380	{
				381	assert(stmtsVisited.find(I) == stmtsVisited.end() && "Stmt visited twice?");
				382	stmtsVisited.insert(I);
				383
				384	if (CallInst* CI = dyn_cast<CallInst>(I))
				385
				386	// FIXME: Ignoring parallelism in a loop. Here we're actually ignoring
				387	// a self-dependence in order to get the count comparison right above.
				388	// When we include loop parallelism, self-dependences should be included.
				389	//
				390	if (CI != root)
				391
				392	{ // CallInst root has a path to CallInst I and any calls reachable from I
				393	depsOfRoot.insert(CI);
				394	if (completed[CI])
				395	{ // We have already visited I so we know all nodes it can reach!
				396	DependentsSet& depsOfI = dependents[CI];
				397	depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
				398	return;
				399	}
				400	}
				401
				402	// If we reach here, we need to visit all children of I
				403	for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
				404	! DI.fini(); ++DI)
				405	{
				406	Instruction* sink = &DI->getSink()->getInstr();
				407	if (stmtsVisited.find(sink) == stmtsVisited.end())
				408	VisitOutEdges(sink, root, depsOfRoot);
				409	}
				410	}
				411
				412
				413	void FindParallelCalls::visitCallInst(CallInst& CI)
				414	{
				415	if (completed[&CI])
				416	return;
				417	stmtsVisited.clear(); // clear flags to do a fresh DFS
				418
				419	// Visit all children of CI using a recursive walk through dep graph
				420	DependentsSet& depsOfRoot = dependents[&CI];
				421	for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
				422	! DI.fini(); ++DI)
				423	{
				424	Instruction* sink = &DI->getSink()->getInstr();
				425	if (stmtsVisited.find(sink) == stmtsVisited.end())
				426	VisitOutEdges(sink, &CI, depsOfRoot);
				427	}
				428
				429	completed[&CI] = true;
				430	}
				431
				432
				433	//----------------------------------------------------------------------------
				434	// class Parallelize
				435	//
				436	// (1) Find candidate parallel functions: any function F s.t.
				437	// there is a call C1 to the function F that is followed or preceded
				438	// by at least one other call C2 that is independent of this one
				439	// (i.e., there is no dependence path from C1 to C2 or C2 to C1)
				440	// (2) Label such a function F as a cilk function.
				441	// (3) Convert every call to F to a spawn
				442	// (4) For every function X, insert sync statements so that
				443	// every spawn is postdominated by a sync before any statements
				444	// with a data dependence to/from the call site for the spawn
				445	//
				446	//----------------------------------------------------------------------------
				447
				448	namespace {
				449	class Parallelize: public Pass
				450	{
				451	public:
				452	/// Driver functions to transform a program
				453	///
				454	bool run(Module& M);
				455
				456	/// getAnalysisUsage - Modifies extensively so preserve nothing.
				457	/// Uses the DependenceGraph and the Top-down DS Graph (only to find
				458	/// all functions called via an indirect call).
				459	///
				460	void getAnalysisUsage(AnalysisUsage &AU) const {
				461	AU.addRequired<TDDataStructures>();
				462	AU.addRequired<MemoryDepAnalysis>(); // force this not to be released
				463	AU.addRequired<PgmDependenceGraph>(); // because it is needed by this
				464	}
				465	};
				466
				467	RegisterOpt<Parallelize> X("parallel", "Parallelize program using Cilk");
				468	}
				469
				470
				471	static Function* FindMain(Module& M)
				472	{
				473	for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
				474	if (FI->getName() == std::string("main"))
				475	return FI;
				476	return NULL;
				477	}
				478
				479
				480	bool Parallelize::run(Module& M)
				481	{
				482	hash_set<Function*> parallelFunctions;
				483	hash_set<Function*> safeParallelFunctions;
				484	hash_set<const GlobalValue*> indirectlyCalled;
				485
				486	// If there is no main (i.e., for an incomplete program), we can do nothing.
				487	// If there is a main, mark main as a parallel function.
				488	//
				489	Function* mainFunc = FindMain(M);
				490	if (!mainFunc)
				491	return false;
				492
				493	// (1) Find candidate parallel functions and mark them as Cilk functions
				494	//
				495	for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
				496	if (! FI->isExternal())
				497	{
				498	Function* F = FI;
				499	DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
				500
				501	// All the hard analysis work gets done here!
				502	//
				503	FindParallelCalls finder(*F,
				504	getAnalysis<PgmDependenceGraph>().getGraph(*F));
				505	/* getAnalysis<MemoryDepAnalysis>().getGraph(F)); /
				506
				507	// Now we know which call instructions are useful to parallelize.
				508	// Remember those callee functions.
				509	//
				510	for (std::vector<CallInst*>::iterator
				511	CII = finder.parallelCalls.begin(),
				512	CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
				513	{
				514	// Check if this is a direct call...
				515	if ((*CII)->getCalledFunction() != NULL)
				516	{ // direct call: if this is to a non-external function,
				517	// mark it as a parallelizable function
				518	if (! (*CII)->getCalledFunction()->isExternal())
				519	parallelFunctions.insert((*CII)->getCalledFunction());
				520	}
				521	else
				522	{ // Indirect call: mark all potential callees as bad
				523	std::vector<GlobalValue*> callees =
				524	tdg.getNodeForValue((*CII)->getCalledValue())
				525	.getNode()->getGlobals();
				526	indirectlyCalled.insert(callees.begin(), callees.end());
				527	}
				528	}
				529	}
				530
				531	// Remove all indirectly called functions from the list of Cilk functions.
				532	//
				533	for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
				534	PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
				535	if (indirectlyCalled.count(*PFI) == 0)
				536	safeParallelFunctions.insert(*PFI);
				537
				538	#undef CAN_USE_BIND1ST_ON_REFERENCE_TYPE_ARGS
				539	#ifdef CAN_USE_BIND1ST_ON_REFERENCE_TYPE_ARGS
				540	// Use this undecipherable STLese because erase invalidates iterators.
				541	// Otherwise we have to copy sets as above.
				542	hash_set<Function*>::iterator extrasBegin =
				543	std::remove_if(parallelFunctions.begin(), parallelFunctions.end(),
				544	compose1(std::bind2nd(std::greater<int>(), 0),
				545	bind_obj(&indirectlyCalled,
				546	&hash_set<const GlobalValue*>::count)));
				547	parallelFunctions.erase(extrasBegin, parallelFunctions.end());
				548	#endif
				549
				550	// If there are no parallel functions, we can just give up.
				551	if (safeParallelFunctions.empty())
				552	return false;
				553
				554	// Add main as a parallel function since Cilk requires this.
				555	safeParallelFunctions.insert(mainFunc);
				556
				557	// (2,3) Transform each Cilk function and all its calls simply by
				558	// adding a unique suffix to the function name.
				559	// This should identify both functions and calls to such functions
				560	// to the code generator.
				561	// (4) Also, insert calls to sync at appropriate points.
				562	//
				563	Cilkifier cilkifier(M);
				564	for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
				565	CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
				566	{
				567	cilkifier.TransformFunc(*CFI, safeParallelFunctions,
				568	getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
				569	/* getAnalysis<MemoryDepAnalysis>().getGraph(*CFI)); /
				570	}
				571
				572	return true;
				573	}