Blame - lib/CodeGen/IntrinsicLowering.cpp - fp2-dev/platform/external/llvm

blob: da6a876e83dd9dd00dd516e3997d606d3366e609 [file] [log] [blame]

Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	1	//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by the LLVM research group and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the IntrinsicLowering class.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "llvm/Constants.h"
				15	#include "llvm/DerivedTypes.h"
				16	#include "llvm/Module.h"
				17	#include "llvm/Instructions.h"
				18	#include "llvm/Type.h"
				19	#include "llvm/CodeGen/IntrinsicLowering.h"
				20	#include "llvm/Support/Streams.h"
				21	#include "llvm/Target/TargetData.h"
				22	#include "llvm/ADT/SmallVector.h"
				23	using namespace llvm;
				24
				25	template <class ArgIt>
				26	static void EnsureFunctionExists(Module &M, const char *Name,
				27	ArgIt ArgBegin, ArgIt ArgEnd,
				28	const Type *RetTy) {
				29	// Insert a correctly-typed definition now.
				30	std::vector<const Type *> ParamTys;
				31	for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
				32	ParamTys.push_back(I->getType());
				33	M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
				34	}
				35
				36	/// ReplaceCallWith - This function is used when we want to lower an intrinsic
				37	/// call to a call of an external function. This handles hard cases such as
				38	/// when there was already a prototype for the external function, and if that
				39	/// prototype doesn't match the arguments we expect to pass in.
				40	template <class ArgIt>
				41	static CallInst ReplaceCallWith(const char NewFn, CallInst *CI,
				42	ArgIt ArgBegin, ArgIt ArgEnd,
				43	const Type RetTy, Constant &FCache) {
				44	if (!FCache) {
				45	// If we haven't already looked up this function, check to see if the
				46	// program already contains a function with this name.
				47	Module *M = CI->getParent()->getParent()->getParent();
				48	// Get or insert the definition now.
				49	std::vector<const Type *> ParamTys;
				50	for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
				51	ParamTys.push_back((*I)->getType());
				52	FCache = M->getOrInsertFunction(NewFn,
				53	FunctionType::get(RetTy, ParamTys, false));
				54	}
				55
David Greene	b1c4a7b	2007-08-01 03:43:44 +0000	[diff] [blame]	56	SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
				57	CallInst *NewCI = new CallInst(FCache, Args.begin(), Args.end(),
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	58	CI->getName(), CI);
				59	if (!CI->use_empty())
				60	CI->replaceAllUsesWith(NewCI);
				61	return NewCI;
				62	}
				63
				64	void IntrinsicLowering::AddPrototypes(Module &M) {
				65	for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
				66	if (I->isDeclaration() && !I->use_empty())
				67	switch (I->getIntrinsicID()) {
				68	default: break;
				69	case Intrinsic::setjmp:
				70	EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
				71	Type::Int32Ty);
				72	break;
				73	case Intrinsic::longjmp:
				74	EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
				75	Type::VoidTy);
				76	break;
				77	case Intrinsic::siglongjmp:
				78	EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
				79	Type::VoidTy);
				80	break;
				81	case Intrinsic::memcpy_i32:
				82	case Intrinsic::memcpy_i64:
				83	M.getOrInsertFunction("memcpy", PointerType::get(Type::Int8Ty),
				84	PointerType::get(Type::Int8Ty),
				85	PointerType::get(Type::Int8Ty),
				86	TD.getIntPtrType(), (Type *)0);
				87	break;
				88	case Intrinsic::memmove_i32:
				89	case Intrinsic::memmove_i64:
				90	M.getOrInsertFunction("memmove", PointerType::get(Type::Int8Ty),
				91	PointerType::get(Type::Int8Ty),
				92	PointerType::get(Type::Int8Ty),
				93	TD.getIntPtrType(), (Type *)0);
				94	break;
				95	case Intrinsic::memset_i32:
				96	case Intrinsic::memset_i64:
				97	M.getOrInsertFunction("memset", PointerType::get(Type::Int8Ty),
				98	PointerType::get(Type::Int8Ty), Type::Int32Ty,
				99	TD.getIntPtrType(), (Type *)0);
				100	break;
				101	case Intrinsic::sqrt_f32:
				102	case Intrinsic::sqrt_f64:
				103	if(I->arg_begin()->getType() == Type::FloatTy)
				104	EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(),
				105	Type::FloatTy);
				106	else
				107	EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(),
				108	Type::DoubleTy);
				109	break;
				110	}
				111	}
				112
				113	/// LowerBSWAP - Emit the code to lower bswap of V before the specified
				114	/// instruction IP.
				115	static Value LowerBSWAP(Value V, Instruction *IP) {
				116	assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
				117
				118	unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
				119
				120	switch(BitSize) {
				121	default: assert(0 && "Unhandled type size of value to byteswap!");
				122	case 16: {
				123	Value *Tmp1 = BinaryOperator::createShl(V,
				124	ConstantInt::get(V->getType(),8),"bswap.2",IP);
				125	Value *Tmp2 = BinaryOperator::createLShr(V,
				126	ConstantInt::get(V->getType(),8),"bswap.1",IP);
				127	V = BinaryOperator::createOr(Tmp1, Tmp2, "bswap.i16", IP);
				128	break;
				129	}
				130	case 32: {
				131	Value *Tmp4 = BinaryOperator::createShl(V,
				132	ConstantInt::get(V->getType(),24),"bswap.4", IP);
				133	Value *Tmp3 = BinaryOperator::createShl(V,
				134	ConstantInt::get(V->getType(),8),"bswap.3",IP);
				135	Value *Tmp2 = BinaryOperator::createLShr(V,
				136	ConstantInt::get(V->getType(),8),"bswap.2",IP);
				137	Value *Tmp1 = BinaryOperator::createLShr(V,
				138	ConstantInt::get(V->getType(),24),"bswap.1", IP);
				139	Tmp3 = BinaryOperator::createAnd(Tmp3,
				140	ConstantInt::get(Type::Int32Ty, 0xFF0000),
				141	"bswap.and3", IP);
				142	Tmp2 = BinaryOperator::createAnd(Tmp2,
				143	ConstantInt::get(Type::Int32Ty, 0xFF00),
				144	"bswap.and2", IP);
				145	Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or1", IP);
				146	Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or2", IP);
				147	V = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.i32", IP);
				148	break;
				149	}
				150	case 64: {
				151	Value *Tmp8 = BinaryOperator::createShl(V,
				152	ConstantInt::get(V->getType(),56),"bswap.8", IP);
				153	Value *Tmp7 = BinaryOperator::createShl(V,
				154	ConstantInt::get(V->getType(),40),"bswap.7", IP);
				155	Value *Tmp6 = BinaryOperator::createShl(V,
				156	ConstantInt::get(V->getType(),24),"bswap.6", IP);
				157	Value *Tmp5 = BinaryOperator::createShl(V,
				158	ConstantInt::get(V->getType(),8),"bswap.5", IP);
				159	Value* Tmp4 = BinaryOperator::createLShr(V,
				160	ConstantInt::get(V->getType(),8),"bswap.4", IP);
				161	Value* Tmp3 = BinaryOperator::createLShr(V,
				162	ConstantInt::get(V->getType(),24),"bswap.3", IP);
				163	Value* Tmp2 = BinaryOperator::createLShr(V,
				164	ConstantInt::get(V->getType(),40),"bswap.2", IP);
				165	Value* Tmp1 = BinaryOperator::createLShr(V,
				166	ConstantInt::get(V->getType(),56),"bswap.1", IP);
				167	Tmp7 = BinaryOperator::createAnd(Tmp7,
				168	ConstantInt::get(Type::Int64Ty,
				169	0xFF000000000000ULL),
				170	"bswap.and7", IP);
				171	Tmp6 = BinaryOperator::createAnd(Tmp6,
				172	ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
				173	"bswap.and6", IP);
				174	Tmp5 = BinaryOperator::createAnd(Tmp5,
				175	ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
				176	"bswap.and5", IP);
				177	Tmp4 = BinaryOperator::createAnd(Tmp4,
				178	ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
				179	"bswap.and4", IP);
				180	Tmp3 = BinaryOperator::createAnd(Tmp3,
				181	ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
				182	"bswap.and3", IP);
				183	Tmp2 = BinaryOperator::createAnd(Tmp2,
				184	ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
				185	"bswap.and2", IP);
				186	Tmp8 = BinaryOperator::createOr(Tmp8, Tmp7, "bswap.or1", IP);
				187	Tmp6 = BinaryOperator::createOr(Tmp6, Tmp5, "bswap.or2", IP);
				188	Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or3", IP);
				189	Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or4", IP);
				190	Tmp8 = BinaryOperator::createOr(Tmp8, Tmp6, "bswap.or5", IP);
				191	Tmp4 = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.or6", IP);
				192	V = BinaryOperator::createOr(Tmp8, Tmp4, "bswap.i64", IP);
				193	break;
				194	}
				195	}
				196	return V;
				197	}
				198
				199	/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
				200	/// instruction IP.
				201	static Value LowerCTPOP(Value V, Instruction *IP) {
				202	assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
				203
				204	static const uint64_t MaskValues[6] = {
				205	0x5555555555555555ULL, 0x3333333333333333ULL,
				206	0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
				207	0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
				208	};
				209
				210	unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
				211	unsigned WordSize = (BitSize + 63) / 64;
				212	Value *Count = ConstantInt::get(V->getType(), 0);
				213
				214	for (unsigned n = 0; n < WordSize; ++n) {
				215	Value *PartValue = V;
				216	for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
				217	i <<= 1, ++ct) {
				218	Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
				219	Value *LHS = BinaryOperator::createAnd(
				220	PartValue, MaskCst, "cppop.and1", IP);
				221	Value *VShift = BinaryOperator::createLShr(PartValue,
				222	ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
				223	Value *RHS = BinaryOperator::createAnd(VShift, MaskCst, "cppop.and2", IP);
				224	PartValue = BinaryOperator::createAdd(LHS, RHS, "ctpop.step", IP);
				225	}
				226	Count = BinaryOperator::createAdd(PartValue, Count, "ctpop.part", IP);
				227	if (BitSize > 64) {
				228	V = BinaryOperator::createLShr(V, ConstantInt::get(V->getType(), 64),
				229	"ctpop.part.sh", IP);
				230	BitSize -= 64;
				231	}
				232	}
				233
Chris Lattner	f2cf7f0	2007-08-06 16:36:18 +0000	[diff] [blame]	234	return Count;
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	235	}
				236
				237	/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
				238	/// instruction IP.
				239	static Value LowerCTLZ(Value V, Instruction *IP) {
				240
				241	unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
				242	for (unsigned i = 1; i < BitSize; i <<= 1) {
				243	Value *ShVal = ConstantInt::get(V->getType(), i);
				244	ShVal = BinaryOperator::createLShr(V, ShVal, "ctlz.sh", IP);
				245	V = BinaryOperator::createOr(V, ShVal, "ctlz.step", IP);
				246	}
				247
				248	V = BinaryOperator::createNot(V, "", IP);
				249	return LowerCTPOP(V, IP);
				250	}
				251
				252	/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
				253	/// three integer arguments. The first argument is the Value from which the
				254	/// bits will be selected. It may be of any bit width. The second and third
				255	/// arguments specify a range of bits to select with the second argument
				256	/// specifying the low bit and the third argument specifying the high bit. Both
				257	/// must be type i32. The result is the corresponding selected bits from the
				258	/// Value in the same width as the Value (first argument). If the low bit index
				259	/// is higher than the high bit index then the inverse selection is done and
				260	/// the bits are returned in inverse order.
				261	/// @brief Lowering of llvm.part.select intrinsic.
				262	static Instruction LowerPartSelect(CallInst CI) {
				263	// Make sure we're dealing with a part select intrinsic here
				264	Function *F = CI->getCalledFunction();
				265	const FunctionType *FT = F->getFunctionType();
				266	if (!F->isDeclaration() \|\| !FT->getReturnType()->isInteger() \|\|
				267	FT->getNumParams() != 3 \|\| !FT->getParamType(0)->isInteger() \|\|
				268	!FT->getParamType(1)->isInteger() \|\| !FT->getParamType(2)->isInteger())
				269	return CI;
				270
				271	// Get the intrinsic implementation function by converting all the . to _
				272	// in the intrinsic's function name and then reconstructing the function
				273	// declaration.
				274	std::string Name(F->getName());
				275	for (unsigned i = 4; i < Name.length(); ++i)
				276	if (Name[i] == '.')
				277	Name[i] = '_';
				278	Module* M = F->getParent();
				279	F = cast<Function>(M->getOrInsertFunction(Name, FT));
				280	F->setLinkage(GlobalValue::WeakLinkage);
				281
				282	// If we haven't defined the impl function yet, do so now
				283	if (F->isDeclaration()) {
				284
				285	// Get the arguments to the function
				286	Function::arg_iterator args = F->arg_begin();
				287	Value* Val = args++; Val->setName("Val");
				288	Value* Lo = args++; Lo->setName("Lo");
				289	Value* Hi = args++; Hi->setName("High");
				290
				291	// We want to select a range of bits here such that [Hi, Lo] is shifted
				292	// down to the low bits. However, it is quite possible that Hi is smaller
				293	// than Lo in which case the bits have to be reversed.
				294
				295	// Create the blocks we will need for the two cases (forward, reverse)
				296	BasicBlock* CurBB = new BasicBlock("entry", F);
				297	BasicBlock *RevSize = new BasicBlock("revsize", CurBB->getParent());
				298	BasicBlock *FwdSize = new BasicBlock("fwdsize", CurBB->getParent());
				299	BasicBlock *Compute = new BasicBlock("compute", CurBB->getParent());
				300	BasicBlock *Reverse = new BasicBlock("reverse", CurBB->getParent());
				301	BasicBlock *RsltBlk = new BasicBlock("result", CurBB->getParent());
				302
				303	// Cast Hi and Lo to the size of Val so the widths are all the same
				304	if (Hi->getType() != Val->getType())
				305	Hi = CastInst::createIntegerCast(Hi, Val->getType(), false,
				306	"tmp", CurBB);
				307	if (Lo->getType() != Val->getType())
				308	Lo = CastInst::createIntegerCast(Lo, Val->getType(), false,
				309	"tmp", CurBB);
				310
				311	// Compute a few things that both cases will need, up front.
				312	Constant* Zero = ConstantInt::get(Val->getType(), 0);
				313	Constant* One = ConstantInt::get(Val->getType(), 1);
				314	Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
				315
				316	// Compare the Hi and Lo bit positions. This is used to determine
				317	// which case we have (forward or reverse)
				318	ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
				319	new BranchInst(RevSize, FwdSize, Cmp, CurBB);
				320
				321	// First, copmute the number of bits in the forward case.
				322	Instruction* FBitSize =
				323	BinaryOperator::createSub(Hi, Lo,"fbits", FwdSize);
				324	new BranchInst(Compute, FwdSize);
				325
				326	// Second, compute the number of bits in the reverse case.
				327	Instruction* RBitSize =
				328	BinaryOperator::createSub(Lo, Hi, "rbits", RevSize);
				329	new BranchInst(Compute, RevSize);
				330
				331	// Now, compute the bit range. Start by getting the bitsize and the shift
				332	// amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
				333	// the number of bits we want in the range. We shift the bits down to the
				334	// least significant bits, apply the mask to zero out unwanted high bits,
				335	// and we have computed the "forward" result. It may still need to be
				336	// reversed.
				337
				338	// Get the BitSize from one of the two subtractions
				339	PHINode *BitSize = new PHINode(Val->getType(), "bits", Compute);
				340	BitSize->reserveOperandSpace(2);
				341	BitSize->addIncoming(FBitSize, FwdSize);
				342	BitSize->addIncoming(RBitSize, RevSize);
				343
				344	// Get the ShiftAmount as the smaller of Hi/Lo
				345	PHINode *ShiftAmt = new PHINode(Val->getType(), "shiftamt", Compute);
				346	ShiftAmt->reserveOperandSpace(2);
				347	ShiftAmt->addIncoming(Lo, FwdSize);
				348	ShiftAmt->addIncoming(Hi, RevSize);
				349
				350	// Increment the bit size
				351	Instruction *BitSizePlusOne =
				352	BinaryOperator::createAdd(BitSize, One, "bits", Compute);
				353
				354	// Create a Mask to zero out the high order bits.
				355	Instruction* Mask =
				356	BinaryOperator::createShl(AllOnes, BitSizePlusOne, "mask", Compute);
				357	Mask = BinaryOperator::createNot(Mask, "mask", Compute);
				358
				359	// Shift the bits down and apply the mask
				360	Instruction* FRes =
				361	BinaryOperator::createLShr(Val, ShiftAmt, "fres", Compute);
				362	FRes = BinaryOperator::createAnd(FRes, Mask, "fres", Compute);
				363	new BranchInst(Reverse, RsltBlk, Cmp, Compute);
				364
				365	// In the Reverse block we have the mask already in FRes but we must reverse
				366	// it by shifting FRes bits right and putting them in RRes by shifting them
				367	// in from left.
				368
				369	// First set up our loop counters
				370	PHINode *Count = new PHINode(Val->getType(), "count", Reverse);
				371	Count->reserveOperandSpace(2);
				372	Count->addIncoming(BitSizePlusOne, Compute);
				373
				374	// Next, get the value that we are shifting.
				375	PHINode *BitsToShift = new PHINode(Val->getType(), "val", Reverse);
				376	BitsToShift->reserveOperandSpace(2);
				377	BitsToShift->addIncoming(FRes, Compute);
				378
				379	// Finally, get the result of the last computation
				380	PHINode *RRes = new PHINode(Val->getType(), "rres", Reverse);
				381	RRes->reserveOperandSpace(2);
				382	RRes->addIncoming(Zero, Compute);
				383
				384	// Decrement the counter
				385	Instruction *Decr = BinaryOperator::createSub(Count, One, "decr", Reverse);
				386	Count->addIncoming(Decr, Reverse);
				387
				388	// Compute the Bit that we want to move
				389	Instruction *Bit =
				390	BinaryOperator::createAnd(BitsToShift, One, "bit", Reverse);
				391
				392	// Compute the new value for next iteration.
				393	Instruction *NewVal =
				394	BinaryOperator::createLShr(BitsToShift, One, "rshift", Reverse);
				395	BitsToShift->addIncoming(NewVal, Reverse);
				396
				397	// Shift the bit into the low bits of the result.
				398	Instruction *NewRes =
				399	BinaryOperator::createShl(RRes, One, "lshift", Reverse);
				400	NewRes = BinaryOperator::createOr(NewRes, Bit, "addbit", Reverse);
				401	RRes->addIncoming(NewRes, Reverse);
				402
				403	// Terminate loop if we've moved all the bits.
				404	ICmpInst *Cond =
				405	new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
				406	new BranchInst(RsltBlk, Reverse, Cond, Reverse);
				407
				408	// Finally, in the result block, select one of the two results with a PHI
				409	// node and return the result;
				410	CurBB = RsltBlk;
				411	PHINode *BitSelect = new PHINode(Val->getType(), "part_select", CurBB);
				412	BitSelect->reserveOperandSpace(2);
				413	BitSelect->addIncoming(FRes, Compute);
				414	BitSelect->addIncoming(NewRes, Reverse);
				415	new ReturnInst(BitSelect, CurBB);
				416	}
				417
				418	// Return a call to the implementation function
				419	Value *Args[] = {
				420	CI->getOperand(1),
				421	CI->getOperand(2),
				422	CI->getOperand(3)
				423	};
David Greene	b1c4a7b	2007-08-01 03:43:44 +0000	[diff] [blame]	424	return new CallInst(F, Args, Args+sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	425	}
				426
				427	/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
				428	/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
				429	/// The first two arguments can be any bit width. The result is the same width
				430	/// as %Value. The operation replaces bits between %Low and %High with the value
				431	/// in %Replacement. If %Replacement is not the same width, it is truncated or
				432	/// zero extended as appropriate to fit the bits being replaced. If %Low is
				433	/// greater than %High then the inverse set of bits are replaced.
				434	/// @brief Lowering of llvm.bit.part.set intrinsic.
				435	static Instruction LowerPartSet(CallInst CI) {
				436	// Make sure we're dealing with a part select intrinsic here
				437	Function *F = CI->getCalledFunction();
				438	const FunctionType *FT = F->getFunctionType();
				439	if (!F->isDeclaration() \|\| !FT->getReturnType()->isInteger() \|\|
				440	FT->getNumParams() != 4 \|\| !FT->getParamType(0)->isInteger() \|\|
				441	!FT->getParamType(1)->isInteger() \|\| !FT->getParamType(2)->isInteger() \|\|
				442	!FT->getParamType(3)->isInteger())
				443	return CI;
				444
				445	// Get the intrinsic implementation function by converting all the . to _
				446	// in the intrinsic's function name and then reconstructing the function
				447	// declaration.
				448	std::string Name(F->getName());
				449	for (unsigned i = 4; i < Name.length(); ++i)
				450	if (Name[i] == '.')
				451	Name[i] = '_';
				452	Module* M = F->getParent();
				453	F = cast<Function>(M->getOrInsertFunction(Name, FT));
				454	F->setLinkage(GlobalValue::WeakLinkage);
				455
				456	// If we haven't defined the impl function yet, do so now
				457	if (F->isDeclaration()) {
				458	// Get the arguments for the function.
				459	Function::arg_iterator args = F->arg_begin();
				460	Value* Val = args++; Val->setName("Val");
				461	Value* Rep = args++; Rep->setName("Rep");
				462	Value* Lo = args++; Lo->setName("Lo");
				463	Value* Hi = args++; Hi->setName("Hi");
				464
				465	// Get some types we need
				466	const IntegerType* ValTy = cast<IntegerType>(Val->getType());
				467	const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
				468	uint32_t ValBits = ValTy->getBitWidth();
				469	uint32_t RepBits = RepTy->getBitWidth();
				470
				471	// Constant Definitions
				472	ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
				473	ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
				474	ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
				475	ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
				476	ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
				477	ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
				478	ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
				479
				480	// Basic blocks we fill in below.
				481	BasicBlock* entry = new BasicBlock("entry", F, 0);
				482	BasicBlock* large = new BasicBlock("large", F, 0);
				483	BasicBlock* small = new BasicBlock("small", F, 0);
				484	BasicBlock* reverse = new BasicBlock("reverse", F, 0);
				485	BasicBlock* result = new BasicBlock("result", F, 0);
				486
				487	// BASIC BLOCK: entry
				488	// First, get the number of bits that we're placing as an i32
				489	ICmpInst* is_forward =
				490	new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
				491	SelectInst* Hi_pn = new SelectInst(is_forward, Hi, Lo, "", entry);
				492	SelectInst* Lo_pn = new SelectInst(is_forward, Lo, Hi, "", entry);
				493	BinaryOperator* NumBits = BinaryOperator::createSub(Hi_pn, Lo_pn, "",entry);
				494	NumBits = BinaryOperator::createAdd(NumBits, One, "", entry);
				495	// Now, convert Lo and Hi to ValTy bit width
				496	if (ValBits > 32) {
				497	Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
				498	} else if (ValBits < 32) {
				499	Lo = new TruncInst(Lo_pn, ValTy, "", entry);
				500	}
				501	// Determine if the replacement bits are larger than the number of bits we
				502	// are replacing and deal with it.
				503	ICmpInst* is_large =
				504	new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
				505	new BranchInst(large, small, is_large, entry);
				506
				507	// BASIC BLOCK: large
				508	Instruction* MaskBits =
				509	BinaryOperator::createSub(RepBitWidth, NumBits, "", large);
				510	MaskBits = CastInst::createIntegerCast(MaskBits, RepMask->getType(),
				511	false, "", large);
				512	BinaryOperator* Mask1 =
				513	BinaryOperator::createLShr(RepMask, MaskBits, "", large);
				514	BinaryOperator* Rep2 = BinaryOperator::createAnd(Mask1, Rep, "", large);
				515	new BranchInst(small, large);
				516
				517	// BASIC BLOCK: small
				518	PHINode* Rep3 = new PHINode(RepTy, "", small);
				519	Rep3->reserveOperandSpace(2);
				520	Rep3->addIncoming(Rep2, large);
				521	Rep3->addIncoming(Rep, entry);
				522	Value* Rep4 = Rep3;
				523	if (ValBits > RepBits)
				524	Rep4 = new ZExtInst(Rep3, ValTy, "", small);
				525	else if (ValBits < RepBits)
				526	Rep4 = new TruncInst(Rep3, ValTy, "", small);
				527	new BranchInst(result, reverse, is_forward, small);
				528
				529	// BASIC BLOCK: reverse (reverses the bits of the replacement)
				530	// Set up our loop counter as a PHI so we can decrement on each iteration.
				531	// We will loop for the number of bits in the replacement value.
				532	PHINode *Count = new PHINode(Type::Int32Ty, "count", reverse);
				533	Count->reserveOperandSpace(2);
				534	Count->addIncoming(NumBits, small);
				535
				536	// Get the value that we are shifting bits out of as a PHI because
				537	// we'll change this with each iteration.
				538	PHINode *BitsToShift = new PHINode(Val->getType(), "val", reverse);
				539	BitsToShift->reserveOperandSpace(2);
				540	BitsToShift->addIncoming(Rep4, small);
				541
				542	// Get the result of the last computation or zero on first iteration
				543	PHINode *RRes = new PHINode(Val->getType(), "rres", reverse);
				544	RRes->reserveOperandSpace(2);
				545	RRes->addIncoming(ValZero, small);
				546
				547	// Decrement the loop counter by one
				548	Instruction *Decr = BinaryOperator::createSub(Count, One, "", reverse);
				549	Count->addIncoming(Decr, reverse);
				550
				551	// Get the bit that we want to move into the result
				552	Value *Bit = BinaryOperator::createAnd(BitsToShift, ValOne, "", reverse);
				553
				554	// Compute the new value of the bits to shift for the next iteration.
				555	Value *NewVal = BinaryOperator::createLShr(BitsToShift, ValOne,"", reverse);
				556	BitsToShift->addIncoming(NewVal, reverse);
				557
				558	// Shift the bit we extracted into the low bit of the result.
				559	Instruction *NewRes = BinaryOperator::createShl(RRes, ValOne, "", reverse);
				560	NewRes = BinaryOperator::createOr(NewRes, Bit, "", reverse);
				561	RRes->addIncoming(NewRes, reverse);
				562
				563	// Terminate loop if we've moved all the bits.
				564	ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
				565	new BranchInst(result, reverse, Cond, reverse);
				566
				567	// BASIC BLOCK: result
				568	PHINode *Rplcmnt = new PHINode(Val->getType(), "", result);
				569	Rplcmnt->reserveOperandSpace(2);
				570	Rplcmnt->addIncoming(NewRes, reverse);
				571	Rplcmnt->addIncoming(Rep4, small);
				572	Value* t0 = CastInst::createIntegerCast(NumBits,ValTy,false,"",result);
				573	Value* t1 = BinaryOperator::createShl(ValMask, Lo, "", result);
				574	Value* t2 = BinaryOperator::createNot(t1, "", result);
				575	Value* t3 = BinaryOperator::createShl(t1, t0, "", result);
				576	Value* t4 = BinaryOperator::createOr(t2, t3, "", result);
				577	Value* t5 = BinaryOperator::createAnd(t4, Val, "", result);
				578	Value* t6 = BinaryOperator::createShl(Rplcmnt, Lo, "", result);
				579	Value* Rslt = BinaryOperator::createOr(t5, t6, "part_set", result);
				580	new ReturnInst(Rslt, result);
				581	}
				582
				583	// Return a call to the implementation function
				584	Value *Args[] = {
				585	CI->getOperand(1),
				586	CI->getOperand(2),
				587	CI->getOperand(3),
				588	CI->getOperand(4)
				589	};
David Greene	b1c4a7b	2007-08-01 03:43:44 +0000	[diff] [blame]	590	return new CallInst(F, Args, Args+sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
Dan Gohman	f17a25c	2007-07-18 16:29:46 +0000	[diff] [blame]	591	}
				592
				593
				594	void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
				595	Function *Callee = CI->getCalledFunction();
				596	assert(Callee && "Cannot lower an indirect call!");
				597
				598	switch (Callee->getIntrinsicID()) {
				599	case Intrinsic::not_intrinsic:
				600	cerr << "Cannot lower a call to a non-intrinsic function '"
				601	<< Callee->getName() << "'!\n";
				602	abort();
				603	default:
				604	cerr << "Error: Code generator does not support intrinsic function '"
				605	<< Callee->getName() << "'!\n";
				606	abort();
				607
				608	// The setjmp/longjmp intrinsics should only exist in the code if it was
				609	// never optimized (ie, right out of the CFE), or if it has been hacked on
				610	// by the lowerinvoke pass. In both cases, the right thing to do is to
				611	// convert the call to an explicit setjmp or longjmp call.
				612	case Intrinsic::setjmp: {
				613	static Constant *SetjmpFCache = 0;
				614	Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
				615	Type::Int32Ty, SetjmpFCache);
				616	if (CI->getType() != Type::VoidTy)
				617	CI->replaceAllUsesWith(V);
				618	break;
				619	}
				620	case Intrinsic::sigsetjmp:
				621	if (CI->getType() != Type::VoidTy)
				622	CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
				623	break;
				624
				625	case Intrinsic::longjmp: {
				626	static Constant *LongjmpFCache = 0;
				627	ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
				628	Type::VoidTy, LongjmpFCache);
				629	break;
				630	}
				631
				632	case Intrinsic::siglongjmp: {
				633	// Insert the call to abort
				634	static Constant *AbortFCache = 0;
				635	ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
				636	Type::VoidTy, AbortFCache);
				637	break;
				638	}
				639	case Intrinsic::ctpop:
				640	CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
				641	break;
				642
				643	case Intrinsic::bswap:
				644	CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
				645	break;
				646
				647	case Intrinsic::ctlz:
				648	CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
				649	break;
				650
				651	case Intrinsic::cttz: {
				652	// cttz(x) -> ctpop(~X & (X-1))
				653	Value *Src = CI->getOperand(1);
				654	Value *NotSrc = BinaryOperator::createNot(Src, Src->getName()+".not", CI);
				655	Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
				656	SrcM1 = BinaryOperator::createSub(Src, SrcM1, "", CI);
				657	Src = LowerCTPOP(BinaryOperator::createAnd(NotSrc, SrcM1, "", CI), CI);
				658	CI->replaceAllUsesWith(Src);
				659	break;
				660	}
				661
				662	case Intrinsic::part_select:
				663	CI->replaceAllUsesWith(LowerPartSelect(CI));
				664	break;
				665
				666	case Intrinsic::part_set:
				667	CI->replaceAllUsesWith(LowerPartSet(CI));
				668	break;
				669
				670	case Intrinsic::stacksave:
				671	case Intrinsic::stackrestore: {
				672	static bool Warned = false;
				673	if (!Warned)
				674	cerr << "WARNING: this target does not support the llvm.stack"
				675	<< (Callee->getIntrinsicID() == Intrinsic::stacksave ?
				676	"save" : "restore") << " intrinsic.\n";
				677	Warned = true;
				678	if (Callee->getIntrinsicID() == Intrinsic::stacksave)
				679	CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
				680	break;
				681	}
				682
				683	case Intrinsic::returnaddress:
				684	case Intrinsic::frameaddress:
				685	cerr << "WARNING: this target does not support the llvm."
				686	<< (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
				687	"return" : "frame") << "address intrinsic.\n";
				688	CI->replaceAllUsesWith(ConstantPointerNull::get(
				689	cast<PointerType>(CI->getType())));
				690	break;
				691
				692	case Intrinsic::prefetch:
				693	break; // Simply strip out prefetches on unsupported architectures
				694
				695	case Intrinsic::pcmarker:
				696	break; // Simply strip out pcmarker on unsupported architectures
				697	case Intrinsic::readcyclecounter: {
				698	cerr << "WARNING: this target does not support the llvm.readcyclecoun"
				699	<< "ter intrinsic. It is being lowered to a constant 0\n";
				700	CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
				701	break;
				702	}
				703
				704	case Intrinsic::dbg_stoppoint:
				705	case Intrinsic::dbg_region_start:
				706	case Intrinsic::dbg_region_end:
				707	case Intrinsic::dbg_func_start:
				708	case Intrinsic::dbg_declare:
				709	break; // Simply strip out debugging intrinsics
				710
				711	case Intrinsic::eh_exception:
				712	case Intrinsic::eh_selector:
				713	CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
				714	break;
				715
				716	case Intrinsic::eh_typeid_for:
				717	// Return something different to eh_selector.
				718	CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
				719	break;
				720
				721	case Intrinsic::var_annotation:
				722	break; // Strip out annotate intrinsic
				723
				724	case Intrinsic::memcpy_i32:
				725	case Intrinsic::memcpy_i64: {
				726	static Constant *MemcpyFCache = 0;
				727	Value *Size = CI->getOperand(3);
				728	const Type *IntPtr = TD.getIntPtrType();
				729	if (Size->getType()->getPrimitiveSizeInBits() <
				730	IntPtr->getPrimitiveSizeInBits())
				731	Size = new ZExtInst(Size, IntPtr, "", CI);
				732	else if (Size->getType()->getPrimitiveSizeInBits() >
				733	IntPtr->getPrimitiveSizeInBits())
				734	Size = new TruncInst(Size, IntPtr, "", CI);
				735	Value *Ops[3];
				736	Ops[0] = CI->getOperand(1);
				737	Ops[1] = CI->getOperand(2);
				738	Ops[2] = Size;
				739	ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
				740	MemcpyFCache);
				741	break;
				742	}
				743	case Intrinsic::memmove_i32:
				744	case Intrinsic::memmove_i64: {
				745	static Constant *MemmoveFCache = 0;
				746	Value *Size = CI->getOperand(3);
				747	const Type *IntPtr = TD.getIntPtrType();
				748	if (Size->getType()->getPrimitiveSizeInBits() <
				749	IntPtr->getPrimitiveSizeInBits())
				750	Size = new ZExtInst(Size, IntPtr, "", CI);
				751	else if (Size->getType()->getPrimitiveSizeInBits() >
				752	IntPtr->getPrimitiveSizeInBits())
				753	Size = new TruncInst(Size, IntPtr, "", CI);
				754	Value *Ops[3];
				755	Ops[0] = CI->getOperand(1);
				756	Ops[1] = CI->getOperand(2);
				757	Ops[2] = Size;
				758	ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
				759	MemmoveFCache);
				760	break;
				761	}
				762	case Intrinsic::memset_i32:
				763	case Intrinsic::memset_i64: {
				764	static Constant *MemsetFCache = 0;
				765	Value *Size = CI->getOperand(3);
				766	const Type *IntPtr = TD.getIntPtrType();
				767	if (Size->getType()->getPrimitiveSizeInBits() <
				768	IntPtr->getPrimitiveSizeInBits())
				769	Size = new ZExtInst(Size, IntPtr, "", CI);
				770	else if (Size->getType()->getPrimitiveSizeInBits() >
				771	IntPtr->getPrimitiveSizeInBits())
				772	Size = new TruncInst(Size, IntPtr, "", CI);
				773	Value *Ops[3];
				774	Ops[0] = CI->getOperand(1);
				775	// Extend the amount to i32.
				776	Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
				777	Ops[2] = Size;
				778	ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
				779	MemsetFCache);
				780	break;
				781	}
				782	case Intrinsic::sqrt_f32: {
				783	static Constant *sqrtfFCache = 0;
				784	ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
				785	Type::FloatTy, sqrtfFCache);
				786	break;
				787	}
				788	case Intrinsic::sqrt_f64: {
				789	static Constant *sqrtFCache = 0;
				790	ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
				791	Type::DoubleTy, sqrtFCache);
				792	break;
				793	}
				794	}
				795
				796	assert(CI->use_empty() &&
				797	"Lowering should have eliminated any uses of the intrinsic call!");
				798	CI->eraseFromParent();
				799	}