Blame - src/IceTargetLoweringARM32.cpp - platform/external/swiftshader

blob: de39aacab7efd8ce8e23fa5f568c3126a22807cc [file] [log] [blame]

Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1	//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
				2	//
				3	// The Subzero Code Generator
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the TargetLoweringARM32 class, which consists almost
				11	// entirely of the lowering sequence for each high-level instruction.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "llvm/Support/MathExtras.h"
				16
				17	#include "IceCfg.h"
				18	#include "IceCfgNode.h"
				19	#include "IceClFlags.h"
				20	#include "IceDefs.h"
				21	#include "IceELFObjectWriter.h"
				22	#include "IceGlobalInits.h"
				23	#include "IceInstARM32.h"
				24	#include "IceLiveness.h"
				25	#include "IceOperand.h"
				26	#include "IceRegistersARM32.h"
				27	#include "IceTargetLoweringARM32.def"
				28	#include "IceTargetLoweringARM32.h"
				29	#include "IceUtils.h"
				30
				31	namespace Ice {
				32
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	33	namespace {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	34
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	35	void UnimplementedError(const ClFlags &Flags) {
				36	if (!Flags.getSkipUnimplemented()) {
				37	// Use llvm_unreachable instead of report_fatal_error, which gives better
				38	// stack traces.
				39	llvm_unreachable("Not yet implemented");
				40	abort();
				41	}
				42	}
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	43
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	44	// The following table summarizes the logic for lowering the icmp instruction
				45	// for i32 and narrower types. Each icmp condition has a clear mapping to an
				46	// ARM32 conditional move instruction.
				47
				48	const struct TableIcmp32_ {
				49	CondARM32::Cond Mapping;
				50	} TableIcmp32[] = {
				51	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				52	{ CondARM32::C_32 } \
				53	,
				54	ICMPARM32_TABLE
				55	#undef X
				56	};
				57	const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
				58
				59	// The following table summarizes the logic for lowering the icmp instruction
				60	// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
				61	// The operands may need to be swapped, and there is a slight difference
				62	// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
				63	const struct TableIcmp64_ {
				64	bool IsSigned;
				65	bool Swapped;
				66	CondARM32::Cond C1, C2;
				67	} TableIcmp64[] = {
				68	#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
				69	{ is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
				70	,
				71	ICMPARM32_TABLE
				72	#undef X
				73	};
				74	const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
				75
				76	CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
				77	size_t Index = static_cast<size_t>(Cond);
				78	assert(Index < TableIcmp32Size);
				79	return TableIcmp32[Index].Mapping;
				80	}
				81
				82	// In some cases, there are x-macros tables for both high-level and
				83	// low-level instructions/operands that use the same enum key value.
				84	// The tables are kept separate to maintain a proper separation
				85	// between abstraction layers. There is a risk that the tables could
				86	// get out of sync if enum values are reordered or if entries are
				87	// added or deleted. The following dummy namespaces use
				88	// static_asserts to ensure everything is kept in sync.
				89
				90	// Validate the enum values in ICMPARM32_TABLE.
				91	namespace dummy1 {
				92	// Define a temporary set of enum values based on low-level table
				93	// entries.
				94	enum _tmp_enum {
				95	#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
				96	ICMPARM32_TABLE
				97	#undef X
				98	_num
				99	};
				100	// Define a set of constants based on high-level table entries.
				101	#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
				102	ICEINSTICMP_TABLE
				103	#undef X
				104	// Define a set of constants based on low-level table entries, and
				105	// ensure the table entry keys are consistent.
				106	#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
				107	static const int _table2_##val = _tmp_##val; \
				108	static_assert( \
				109	_table1_##val == _table2_##val, \
				110	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				111	ICMPARM32_TABLE
				112	#undef X
				113	// Repeat the static asserts with respect to the high-level table
				114	// entries in case the high-level table has extra entries.
				115	#define X(tag, str) \
				116	static_assert( \
				117	_table1_##tag == _table2_##tag, \
				118	"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
				119	ICEINSTICMP_TABLE
				120	#undef X
				121	} // end of namespace dummy1
				122
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	123	// Stack alignment
				124	const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
				125
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	126	// Value is in bytes. Return Value adjusted to the next highest multiple
				127	// of the stack alignment.
				128	uint32_t applyStackAlignment(uint32_t Value) {
				129	return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
				130	}
				131
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	132	// Value is in bytes. Return Value adjusted to the next highest multiple
				133	// of the stack alignment required for the given type.
				134	uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
				135	// Use natural alignment, except that normally (non-NaCl) ARM only
				136	// aligns vectors to 8 bytes.
				137	// TODO(jvoung): Check this ...
				138	size_t typeAlignInBytes = typeWidthInBytes(Ty);
				139	if (isVectorType(Ty))
				140	typeAlignInBytes = 8;
				141	return Utils::applyAlignment(Value, typeAlignInBytes);
				142	}
				143
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	144	} // end of anonymous namespace
				145
Jim Stichnoth	eafb56c	2015-06-22 10:35:22 -0700	[diff] [blame^]	146	TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) {
Jan Voung	d062f73	2015-06-15 17:17:31 -0700	[diff] [blame]	147	static_assert(
				148	(ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
				149	(TargetInstructionSet::ARM32InstructionSet_End -
				150	TargetInstructionSet::ARM32InstructionSet_Begin),
				151	"ARM32InstructionSet range different from TargetInstructionSet");
				152	if (Func->getContext()->getFlags().getTargetInstructionSet() !=
				153	TargetInstructionSet::BaseInstructionSet) {
				154	InstructionSet = static_cast<ARM32InstructionSet>(
				155	(Func->getContext()->getFlags().getTargetInstructionSet() -
				156	TargetInstructionSet::ARM32InstructionSet_Begin) +
				157	ARM32InstructionSet::Begin);
				158	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	159	// TODO: Don't initialize IntegerRegisters and friends every time.
				160	// Instead, initialize in some sort of static initializer for the
				161	// class.
				162	llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
				163	llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
				164	llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
				165	llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
				166	ScratchRegs.resize(RegARM32::Reg_NUM);
				167	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				168	isFP) \
				169	IntegerRegisters[RegARM32::val] = isInt; \
				170	FloatRegisters[RegARM32::val] = isFP; \
				171	VectorRegisters[RegARM32::val] = isFP; \
				172	ScratchRegs[RegARM32::val] = scratch;
				173	REGARM32_TABLE;
				174	#undef X
				175	TypeToRegisterSet[IceType_void] = InvalidRegisters;
				176	TypeToRegisterSet[IceType_i1] = IntegerRegisters;
				177	TypeToRegisterSet[IceType_i8] = IntegerRegisters;
				178	TypeToRegisterSet[IceType_i16] = IntegerRegisters;
				179	TypeToRegisterSet[IceType_i32] = IntegerRegisters;
				180	TypeToRegisterSet[IceType_i64] = IntegerRegisters;
				181	TypeToRegisterSet[IceType_f32] = FloatRegisters;
				182	TypeToRegisterSet[IceType_f64] = FloatRegisters;
				183	TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
				184	TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
				185	TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
				186	TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
				187	TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
				188	TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
				189	TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
				190	}
				191
				192	void TargetARM32::translateO2() {
				193	TimerMarker T(TimerStack::TT_O2, Func);
				194
				195	// TODO(stichnot): share passes with X86?
				196	// https://code.google.com/p/nativeclient/issues/detail?id=4094
				197
				198	if (!Ctx->getFlags().getPhiEdgeSplit()) {
				199	// Lower Phi instructions.
				200	Func->placePhiLoads();
				201	if (Func->hasError())
				202	return;
				203	Func->placePhiStores();
				204	if (Func->hasError())
				205	return;
				206	Func->deletePhis();
				207	if (Func->hasError())
				208	return;
				209	Func->dump("After Phi lowering");
				210	}
				211
				212	// Address mode optimization.
				213	Func->getVMetadata()->init(VMK_SingleDefs);
				214	Func->doAddressOpt();
				215
				216	// Argument lowering
				217	Func->doArgLowering();
				218
				219	// Target lowering. This requires liveness analysis for some parts
				220	// of the lowering decisions, such as compare/branch fusing. If
				221	// non-lightweight liveness analysis is used, the instructions need
				222	// to be renumbered first. TODO: This renumbering should only be
				223	// necessary if we're actually calculating live intervals, which we
				224	// only do for register allocation.
				225	Func->renumberInstructions();
				226	if (Func->hasError())
				227	return;
				228
				229	// TODO: It should be sufficient to use the fastest liveness
				230	// calculation, i.e. livenessLightweight(). However, for some
				231	// reason that slows down the rest of the translation. Investigate.
				232	Func->liveness(Liveness_Basic);
				233	if (Func->hasError())
				234	return;
				235	Func->dump("After ARM32 address mode opt");
				236
				237	Func->genCode();
				238	if (Func->hasError())
				239	return;
				240	Func->dump("After ARM32 codegen");
				241
				242	// Register allocation. This requires instruction renumbering and
				243	// full liveness analysis.
				244	Func->renumberInstructions();
				245	if (Func->hasError())
				246	return;
				247	Func->liveness(Liveness_Intervals);
				248	if (Func->hasError())
				249	return;
				250	// Validate the live range computations. The expensive validation
				251	// call is deliberately only made when assertions are enabled.
				252	assert(Func->validateLiveness());
				253	// The post-codegen dump is done here, after liveness analysis and
				254	// associated cleanup, to make the dump cleaner and more useful.
				255	Func->dump("After initial ARM32 codegen");
				256	Func->getVMetadata()->init(VMK_All);
				257	regAlloc(RAK_Global);
				258	if (Func->hasError())
				259	return;
				260	Func->dump("After linear scan regalloc");
				261
				262	if (Ctx->getFlags().getPhiEdgeSplit()) {
				263	Func->advancedPhiLowering();
				264	Func->dump("After advanced Phi lowering");
				265	}
				266
				267	// Stack frame mapping.
				268	Func->genFrame();
				269	if (Func->hasError())
				270	return;
				271	Func->dump("After stack frame mapping");
				272
				273	Func->contractEmptyNodes();
				274	Func->reorderNodes();
				275
				276	// Branch optimization. This needs to be done just before code
				277	// emission. In particular, no transformations that insert or
				278	// reorder CfgNodes should be done after branch optimization. We go
				279	// ahead and do it before nop insertion to reduce the amount of work
				280	// needed for searching for opportunities.
				281	Func->doBranchOpt();
				282	Func->dump("After branch optimization");
				283
				284	// Nop insertion
				285	if (Ctx->getFlags().shouldDoNopInsertion()) {
				286	Func->doNopInsertion();
				287	}
				288	}
				289
				290	void TargetARM32::translateOm1() {
				291	TimerMarker T(TimerStack::TT_Om1, Func);
				292
				293	// TODO: share passes with X86?
				294
				295	Func->placePhiLoads();
				296	if (Func->hasError())
				297	return;
				298	Func->placePhiStores();
				299	if (Func->hasError())
				300	return;
				301	Func->deletePhis();
				302	if (Func->hasError())
				303	return;
				304	Func->dump("After Phi lowering");
				305
				306	Func->doArgLowering();
				307
				308	Func->genCode();
				309	if (Func->hasError())
				310	return;
				311	Func->dump("After initial ARM32 codegen");
				312
				313	regAlloc(RAK_InfOnly);
				314	if (Func->hasError())
				315	return;
				316	Func->dump("After regalloc of infinite-weight variables");
				317
				318	Func->genFrame();
				319	if (Func->hasError())
				320	return;
				321	Func->dump("After stack frame mapping");
				322
				323	// Nop insertion
				324	if (Ctx->getFlags().shouldDoNopInsertion()) {
				325	Func->doNopInsertion();
				326	}
				327	}
				328
				329	bool TargetARM32::doBranchOpt(Inst I, const CfgNode NextNode) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	330	if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
				331	return Br->optimizeBranch(NextNode);
				332	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	333	return false;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	334	}
				335
				336	IceString TargetARM32::RegNames[] = {
				337	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				338	isFP) \
				339	name,
				340	REGARM32_TABLE
				341	#undef X
				342	};
				343
				344	IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
				345	assert(RegNum < RegARM32::Reg_NUM);
				346	(void)Ty;
				347	return RegNames[RegNum];
				348	}
				349
				350	Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
				351	if (Ty == IceType_void)
				352	Ty = IceType_i32;
				353	if (PhysicalRegisters[Ty].empty())
				354	PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
				355	assert(RegNum < PhysicalRegisters[Ty].size());
				356	Variable *Reg = PhysicalRegisters[Ty][RegNum];
				357	if (Reg == nullptr) {
				358	Reg = Func->makeVariable(Ty);
				359	Reg->setRegNum(RegNum);
				360	PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	361	// Specially mark SP and LR as an "argument" so that it is considered
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	362	// live upon function entry.
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	363	if (RegNum == RegARM32::Reg_sp \|\| RegNum == RegARM32::Reg_lr) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	364	Func->addImplicitArg(Reg);
				365	Reg->setIgnoreLiveness();
				366	}
				367	}
				368	return Reg;
				369	}
				370
				371	void TargetARM32::emitVariable(const Variable *Var) const {
				372	Ostream &Str = Ctx->getStrEmit();
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	373	if (Var->hasReg()) {
				374	Str << getRegName(Var->getRegNum(), Var->getType());
				375	return;
				376	}
				377	if (Var->getWeight().isInf()) {
				378	llvm::report_fatal_error(
				379	"Infinite-weight Variable has no register assigned");
				380	}
				381	int32_t Offset = Var->getStackOffset();
				382	if (!hasFramePointer())
				383	Offset += getStackAdjustment();
				384	// TODO(jvoung): Handle out of range. Perhaps we need a scratch register
				385	// to materialize a larger offset.
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	386	constexpr bool SignExt = false;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	387	if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
				388	llvm::report_fatal_error("Illegal stack offset");
				389	}
				390	const Type FrameSPTy = IceType_i32;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	391	Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
				392	if (Offset != 0) {
				393	Str << ", " << getConstantPrefix() << Offset;
				394	}
				395	Str << "]";
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	396	}
				397
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	398	bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
				399	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				400	return false;
				401	int32_t RegLo, RegHi;
				402	// Always start i64 registers at an even register, so this may end
				403	// up padding away a register.
				404	if (NumGPRRegsUsed % 2 != 0) {
				405	++NumGPRRegsUsed;
				406	}
				407	RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
				408	++NumGPRRegsUsed;
				409	RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
				410	++NumGPRRegsUsed;
				411	// If this bumps us past the boundary, don't allocate to a register
				412	// and leave any previously speculatively consumed registers as consumed.
				413	if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
				414	return false;
				415	Regs->first = RegLo;
				416	Regs->second = RegHi;
				417	return true;
				418	}
				419
				420	bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
				421	if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
				422	return false;
				423	*Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
				424	++NumGPRRegsUsed;
				425	return true;
				426	}
				427
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	428	void TargetARM32::lowerArguments() {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	429	VarList &Args = Func->getArgs();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	430	TargetARM32::CallingConv CC;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	431
				432	// For each register argument, replace Arg in the argument list with the
				433	// home register. Then generate an instruction in the prolog to copy the
				434	// home register to the assigned location of Arg.
				435	Context.init(Func->getEntryNode());
				436	Context.setInsertPoint(Context.getCur());
				437
				438	for (SizeT I = 0, E = Args.size(); I < E; ++I) {
				439	Variable *Arg = Args[I];
				440	Type Ty = Arg->getType();
				441	// TODO(jvoung): handle float/vector types.
				442	if (isVectorType(Ty)) {
				443	UnimplementedError(Func->getContext()->getFlags());
				444	continue;
				445	} else if (isFloatingType(Ty)) {
				446	UnimplementedError(Func->getContext()->getFlags());
				447	continue;
				448	} else if (Ty == IceType_i64) {
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	449	std::pair<int32_t, int32_t> RegPair;
				450	if (!CC.I64InRegs(&RegPair))
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	451	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	452	Variable *RegisterArg = Func->makeVariable(Ty);
				453	Variable *RegisterLo = Func->makeVariable(IceType_i32);
				454	Variable *RegisterHi = Func->makeVariable(IceType_i32);
				455	if (ALLOW_DUMP) {
				456	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				457	RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
				458	RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
				459	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	460	RegisterLo->setRegNum(RegPair.first);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	461	RegisterLo->setIsArg();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	462	RegisterHi->setRegNum(RegPair.second);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	463	RegisterHi->setIsArg();
				464	RegisterArg->setLoHi(RegisterLo, RegisterHi);
				465	RegisterArg->setIsArg();
				466	Arg->setIsArg(false);
				467
				468	Args[I] = RegisterArg;
				469	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				470	continue;
				471	} else {
				472	assert(Ty == IceType_i32);
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	473	int32_t RegNum;
				474	if (!CC.I32InReg(&RegNum))
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	475	continue;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	476	Variable *RegisterArg = Func->makeVariable(Ty);
				477	if (ALLOW_DUMP) {
				478	RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
				479	}
				480	RegisterArg->setRegNum(RegNum);
				481	RegisterArg->setIsArg();
				482	Arg->setIsArg(false);
				483
				484	Args[I] = RegisterArg;
				485	Context.insert(InstAssign::create(Func, Arg, RegisterArg));
				486	}
				487	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	488	}
				489
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	490	// Helper function for addProlog().
				491	//
				492	// This assumes Arg is an argument passed on the stack. This sets the
				493	// frame offset for Arg and updates InArgsSizeBytes according to Arg's
				494	// width. For an I64 arg that has been split into Lo and Hi components,
				495	// it calls itself recursively on the components, taking care to handle
				496	// Lo first because of the little-endian architecture. Lastly, this
				497	// function generates an instruction to copy Arg into its assigned
				498	// register if applicable.
				499	void TargetARM32::finishArgumentLowering(Variable Arg, Variable FramePtr,
				500	size_t BasicFrameOffset,
				501	size_t &InArgsSizeBytes) {
				502	Variable *Lo = Arg->getLo();
				503	Variable *Hi = Arg->getHi();
				504	Type Ty = Arg->getType();
				505	if (Lo && Hi && Ty == IceType_i64) {
				506	assert(Lo->getType() != IceType_i64); // don't want infinite recursion
				507	assert(Hi->getType() != IceType_i64); // don't want infinite recursion
				508	finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				509	finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
				510	return;
				511	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	512	InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	513	Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
				514	InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
				515	// If the argument variable has been assigned a register, we need to load
				516	// the value from the stack slot.
				517	if (Arg->hasReg()) {
				518	assert(Ty != IceType_i64);
				519	OperandARM32Mem *Mem = OperandARM32Mem::create(
				520	Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
				521	Ctx->getConstantInt32(Arg->getStackOffset())));
				522	if (isVectorType(Arg->getType())) {
				523	UnimplementedError(Func->getContext()->getFlags());
				524	} else {
				525	_ldr(Arg, Mem);
				526	}
				527	// This argument-copying instruction uses an explicit
				528	// OperandARM32Mem operand instead of a Variable, so its
				529	// fill-from-stack operation has to be tracked separately for
				530	// statistics.
				531	Ctx->statsUpdateFills();
				532	}
				533	}
				534
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	535	Type TargetARM32::stackSlotType() { return IceType_i32; }
				536
				537	void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	538	// Stack frame layout:
				539	//
				540	// +------------------------+
				541	// \| 1. preserved registers \|
				542	// +------------------------+
				543	// \| 2. padding \|
				544	// +------------------------+
				545	// \| 3. global spill area \|
				546	// +------------------------+
				547	// \| 4. padding \|
				548	// +------------------------+
				549	// \| 5. local spill area \|
				550	// +------------------------+
				551	// \| 6. padding \|
				552	// +------------------------+
				553	// \| 7. allocas \|
				554	// +------------------------+
				555	//
				556	// The following variables record the size in bytes of the given areas:
				557	// * PreservedRegsSizeBytes: area 1
				558	// * SpillAreaPaddingBytes: area 2
				559	// * GlobalsSize: area 3
				560	// * GlobalsAndSubsequentPaddingSize: areas 3 - 4
				561	// * LocalsSpillAreaSize: area 5
				562	// * SpillAreaSizeBytes: areas 2 - 6
				563	// Determine stack frame offsets for each Variable without a
				564	// register assignment. This can be done as one variable per stack
				565	// slot. Or, do coalescing by running the register allocator again
				566	// with an infinite set of registers (as a side effect, this gives
				567	// variables a second chance at physical register assignment).
				568	//
				569	// A middle ground approach is to leverage sparsity and allocate one
				570	// block of space on the frame for globals (variables with
				571	// multi-block lifetime), and one block to share for locals
				572	// (single-block lifetime).
				573
				574	Context.init(Node);
				575	Context.setInsertPoint(Context.getCur());
				576
				577	llvm::SmallBitVector CalleeSaves =
				578	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				579	RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
				580	VarList SortedSpilledVariables;
				581	size_t GlobalsSize = 0;
				582	// If there is a separate locals area, this represents that area.
				583	// Otherwise it counts any variable not counted by GlobalsSize.
				584	SpillAreaSizeBytes = 0;
				585	// If there is a separate locals area, this specifies the alignment
				586	// for it.
				587	uint32_t LocalsSlotsAlignmentBytes = 0;
				588	// The entire spill locations area gets aligned to largest natural
				589	// alignment of the variables that have a spill slot.
				590	uint32_t SpillAreaAlignmentBytes = 0;
				591	// For now, we don't have target-specific variables that need special
				592	// treatment (no stack-slot-linked SpillVariable type).
				593	std::function<bool(Variable *)> TargetVarHook =
				594	[](Variable *) { return false; };
				595
				596	// Compute the list of spilled variables and bounds for GlobalsSize, etc.
				597	getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
				598	&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
				599	&LocalsSlotsAlignmentBytes, TargetVarHook);
				600	uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
				601	SpillAreaSizeBytes += GlobalsSize;
				602
				603	// Add push instructions for preserved registers.
				604	// On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
				605	// Unlike x86, ARM also has callee-saved float/vector registers.
				606	// The "vpush" instruction can handle a whole list of float/vector
				607	// registers, but it only handles contiguous sequences of registers
				608	// by specifying the start and the length.
				609	VarList GPRsToPreserve;
				610	GPRsToPreserve.reserve(CalleeSaves.size());
				611	uint32_t NumCallee = 0;
				612	size_t PreservedRegsSizeBytes = 0;
				613	// Consider FP and LR as callee-save / used as needed.
				614	if (UsesFramePointer) {
				615	CalleeSaves[RegARM32::Reg_fp] = true;
				616	assert(RegsUsed[RegARM32::Reg_fp] == false);
				617	RegsUsed[RegARM32::Reg_fp] = true;
				618	}
				619	if (!MaybeLeafFunc) {
				620	CalleeSaves[RegARM32::Reg_lr] = true;
				621	RegsUsed[RegARM32::Reg_lr] = true;
				622	}
				623	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				624	if (CalleeSaves[i] && RegsUsed[i]) {
				625	// TODO(jvoung): do separate vpush for each floating point
				626	// register segment and += 4, or 8 depending on type.
				627	++NumCallee;
				628	PreservedRegsSizeBytes += 4;
				629	GPRsToPreserve.push_back(getPhysicalRegister(i));
				630	}
				631	}
				632	Ctx->statsUpdateRegistersSaved(NumCallee);
				633	if (!GPRsToPreserve.empty())
				634	_push(GPRsToPreserve);
				635
				636	// Generate "mov FP, SP" if needed.
				637	if (UsesFramePointer) {
				638	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				639	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				640	_mov(FP, SP);
				641	// Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
				642	Context.insert(InstFakeUse::create(Func, FP));
				643	}
				644
				645	// Align the variables area. SpillAreaPaddingBytes is the size of
				646	// the region after the preserved registers and before the spill areas.
				647	// LocalsSlotsPaddingBytes is the amount of padding between the globals
				648	// and locals area if they are separate.
				649	assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
				650	assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
				651	uint32_t SpillAreaPaddingBytes = 0;
				652	uint32_t LocalsSlotsPaddingBytes = 0;
				653	alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
				654	GlobalsSize, LocalsSlotsAlignmentBytes,
				655	&SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
				656	SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
				657	uint32_t GlobalsAndSubsequentPaddingSize =
				658	GlobalsSize + LocalsSlotsPaddingBytes;
				659
				660	// Align SP if necessary.
				661	if (NeedsStackAlignment) {
				662	uint32_t StackOffset = PreservedRegsSizeBytes;
				663	uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
				664	SpillAreaSizeBytes = StackSize - StackOffset;
				665	}
				666
				667	// Generate "sub sp, SpillAreaSizeBytes"
				668	if (SpillAreaSizeBytes) {
				669	// Use the IP inter-procedural scratch register if needed to legalize
				670	// the immediate.
				671	Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				672	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				673	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				674	_sub(SP, SP, SubAmount);
				675	}
				676	Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
				677
				678	resetStackAdjustment();
				679
				680	// Fill in stack offsets for stack args, and copy args into registers
				681	// for those that were register-allocated. Args are pushed right to
				682	// left, so Arg[0] is closest to the stack/frame pointer.
				683	Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
				684	size_t BasicFrameOffset = PreservedRegsSizeBytes;
				685	if (!UsesFramePointer)
				686	BasicFrameOffset += SpillAreaSizeBytes;
				687
				688	const VarList &Args = Func->getArgs();
				689	size_t InArgsSizeBytes = 0;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	690	TargetARM32::CallingConv CC;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	691	for (Variable *Arg : Args) {
				692	Type Ty = Arg->getType();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	693	bool InRegs = false;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	694	// Skip arguments passed in registers.
				695	if (isVectorType(Ty)) {
				696	UnimplementedError(Func->getContext()->getFlags());
				697	continue;
				698	} else if (isFloatingType(Ty)) {
				699	UnimplementedError(Func->getContext()->getFlags());
				700	continue;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	701	} else if (Ty == IceType_i64) {
				702	std::pair<int32_t, int32_t> DummyRegs;
				703	InRegs = CC.I64InRegs(&DummyRegs);
				704	} else {
				705	assert(Ty == IceType_i32);
				706	int32_t DummyReg;
				707	InRegs = CC.I32InReg(&DummyReg);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	708	}
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	709	if (!InRegs)
				710	finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	711	}
				712
				713	// Fill in stack offsets for locals.
				714	assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
				715	SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
				716	UsesFramePointer);
				717	this->HasComputedFrame = true;
				718
				719	if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
				720	OstreamLocker L(Func->getContext());
				721	Ostream &Str = Func->getContext()->getStrDump();
				722
				723	Str << "Stack layout:\n";
				724	uint32_t SPAdjustmentPaddingSize =
				725	SpillAreaSizeBytes - LocalsSpillAreaSize -
				726	GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
				727	Str << " in-args = " << InArgsSizeBytes << " bytes\n"
				728	<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
				729	<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
				730	<< " globals spill area = " << GlobalsSize << " bytes\n"
				731	<< " globals-locals spill areas intermediate padding = "
				732	<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
				733	<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
				734	<< " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
				735
				736	Str << "Stack details:\n"
				737	<< " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
				738	<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
				739	<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
				740	<< " bytes\n"
				741	<< " is FP based = " << UsesFramePointer << "\n";
				742	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	743	}
				744
				745	void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	746	InstList &Insts = Node->getInsts();
				747	InstList::reverse_iterator RI, E;
				748	for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
				749	if (llvm::isa<InstARM32Ret>(*RI))
				750	break;
				751	}
				752	if (RI == E)
				753	return;
				754
				755	// Convert the reverse_iterator position into its corresponding
				756	// (forward) iterator position.
				757	InstList::iterator InsertPoint = RI.base();
				758	--InsertPoint;
				759	Context.init(Node);
				760	Context.setInsertPoint(InsertPoint);
				761
				762	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				763	if (UsesFramePointer) {
				764	Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
				765	// For late-stage liveness analysis (e.g. asm-verbose mode),
				766	// adding a fake use of SP before the assignment of SP=FP keeps
				767	// previous SP adjustments from being dead-code eliminated.
				768	Context.insert(InstFakeUse::create(Func, SP));
				769	_mov(SP, FP);
				770	} else {
				771	// add SP, SpillAreaSizeBytes
				772	if (SpillAreaSizeBytes) {
				773	// Use the IP inter-procedural scratch register if needed to legalize
				774	// the immediate. It shouldn't be live at this point.
				775	Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
				776	Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);
				777	_add(SP, SP, AddAmount);
				778	}
				779	}
				780
				781	// Add pop instructions for preserved registers.
				782	llvm::SmallBitVector CalleeSaves =
				783	getRegisterSet(RegSet_CalleeSave, RegSet_None);
				784	VarList GPRsToRestore;
				785	GPRsToRestore.reserve(CalleeSaves.size());
				786	// Consider FP and LR as callee-save / used as needed.
				787	if (UsesFramePointer) {
				788	CalleeSaves[RegARM32::Reg_fp] = true;
				789	}
				790	if (!MaybeLeafFunc) {
				791	CalleeSaves[RegARM32::Reg_lr] = true;
				792	}
				793	// Pop registers in ascending order just like push
				794	// (instead of in reverse order).
				795	for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
				796	if (CalleeSaves[i] && RegsUsed[i]) {
				797	GPRsToRestore.push_back(getPhysicalRegister(i));
				798	}
				799	}
				800	if (!GPRsToRestore.empty())
				801	_pop(GPRsToRestore);
				802
				803	if (!Ctx->getFlags().getUseSandboxing())
				804	return;
				805
				806	// Change the original ret instruction into a sandboxed return sequence.
				807	// bundle_lock
				808	// bic lr, #0xc000000f
				809	// bx lr
				810	// bundle_unlock
				811	// This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
				812	// restrict to the lower 1GB as well.
				813	Operand *RetMask =
				814	legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg \| Legal_Flex);
				815	Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
				816	Variable *RetValue = nullptr;
				817	if (RI->getSrcSize())
				818	RetValue = llvm::cast<Variable>(RI->getSrc(0));
				819	_bundle_lock();
				820	_bic(LR, LR, RetMask);
				821	_ret(LR, RetValue);
				822	_bundle_unlock();
				823	RI->setDeleted();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	824	}
				825
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	826	void TargetARM32::split64(Variable *Var) {
				827	assert(Var->getType() == IceType_i64);
				828	Variable *Lo = Var->getLo();
				829	Variable *Hi = Var->getHi();
				830	if (Lo) {
				831	assert(Hi);
				832	return;
				833	}
				834	assert(Hi == nullptr);
				835	Lo = Func->makeVariable(IceType_i32);
				836	Hi = Func->makeVariable(IceType_i32);
				837	if (ALLOW_DUMP) {
				838	Lo->setName(Func, Var->getName(Func) + "__lo");
				839	Hi->setName(Func, Var->getName(Func) + "__hi");
				840	}
				841	Var->setLoHi(Lo, Hi);
				842	if (Var->getIsArg()) {
				843	Lo->setIsArg();
				844	Hi->setIsArg();
				845	}
				846	}
				847
				848	Operand TargetARM32::loOperand(Operand Operand) {
				849	assert(Operand->getType() == IceType_i64);
				850	if (Operand->getType() != IceType_i64)
				851	return Operand;
				852	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				853	split64(Var);
				854	return Var->getLo();
				855	}
				856	if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
				857	return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
				858	}
				859	if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
				860	// Conservatively disallow memory operands with side-effects (pre/post
				861	// increment) in case of duplication.
				862	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				863	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				864	if (Mem->isRegReg()) {
				865	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				866	Mem->getIndex(), Mem->getShiftOp(),
				867	Mem->getShiftAmt(), Mem->getAddrMode());
				868	} else {
				869	return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
				870	Mem->getOffset(), Mem->getAddrMode());
				871	}
				872	}
				873	llvm_unreachable("Unsupported operand type");
				874	return nullptr;
				875	}
				876
				877	Operand TargetARM32::hiOperand(Operand Operand) {
				878	assert(Operand->getType() == IceType_i64);
				879	if (Operand->getType() != IceType_i64)
				880	return Operand;
				881	if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
				882	split64(Var);
				883	return Var->getHi();
				884	}
				885	if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
				886	return Ctx->getConstantInt32(
				887	static_cast<uint32_t>(Const->getValue() >> 32));
				888	}
				889	if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
				890	// Conservatively disallow memory operands with side-effects
				891	// in case of duplication.
				892	assert(Mem->getAddrMode() == OperandARM32Mem::Offset \|\|
				893	Mem->getAddrMode() == OperandARM32Mem::NegOffset);
				894	const Type SplitType = IceType_i32;
				895	if (Mem->isRegReg()) {
				896	// We have to make a temp variable T, and add 4 to either Base or Index.
				897	// The Index may be shifted, so adding 4 can mean something else.
				898	// Thus, prefer T := Base + 4, and use T as the new Base.
				899	Variable *Base = Mem->getBase();
				900	Constant *Four = Ctx->getConstantInt32(4);
				901	Variable *NewBase = Func->makeVariable(Base->getType());
				902	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
				903	Base, Four));
				904	return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
				905	Mem->getShiftOp(), Mem->getShiftAmt(),
				906	Mem->getAddrMode());
				907	} else {
				908	Variable *Base = Mem->getBase();
				909	ConstantInteger32 *Offset = Mem->getOffset();
				910	assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
				911	int32_t NextOffsetVal = Offset->getValue() + 4;
				912	const bool SignExt = false;
				913	if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
				914	// We have to make a temp variable and add 4 to either Base or Offset.
				915	// If we add 4 to Offset, this will convert a non-RegReg addressing
				916	// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
				917	// RegReg addressing modes, prefer adding to base and replacing instead.
				918	// Thus we leave the old offset alone.
				919	Constant *Four = Ctx->getConstantInt32(4);
				920	Variable *NewBase = Func->makeVariable(Base->getType());
				921	lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
				922	NewBase, Base, Four));
				923	Base = NewBase;
				924	} else {
				925	Offset =
				926	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
				927	}
				928	return OperandARM32Mem::create(Func, SplitType, Base, Offset,
				929	Mem->getAddrMode());
				930	}
				931	}
				932	llvm_unreachable("Unsupported operand type");
				933	return nullptr;
				934	}
				935
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	936	llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
				937	RegSetMask Exclude) const {
				938	llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
				939
				940	#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
				941	isFP) \
				942	if (scratch && (Include & RegSet_CallerSave)) \
				943	Registers[RegARM32::val] = true; \
				944	if (preserved && (Include & RegSet_CalleeSave)) \
				945	Registers[RegARM32::val] = true; \
				946	if (stackptr && (Include & RegSet_StackPointer)) \
				947	Registers[RegARM32::val] = true; \
				948	if (frameptr && (Include & RegSet_FramePointer)) \
				949	Registers[RegARM32::val] = true; \
				950	if (scratch && (Exclude & RegSet_CallerSave)) \
				951	Registers[RegARM32::val] = false; \
				952	if (preserved && (Exclude & RegSet_CalleeSave)) \
				953	Registers[RegARM32::val] = false; \
				954	if (stackptr && (Exclude & RegSet_StackPointer)) \
				955	Registers[RegARM32::val] = false; \
				956	if (frameptr && (Exclude & RegSet_FramePointer)) \
				957	Registers[RegARM32::val] = false;
				958
				959	REGARM32_TABLE
				960
				961	#undef X
				962
				963	return Registers;
				964	}
				965
				966	void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
				967	UsesFramePointer = true;
				968	// Conservatively require the stack to be aligned. Some stack
				969	// adjustment operations implemented below assume that the stack is
				970	// aligned before the alloca. All the alloca code ensures that the
				971	// stack alignment is preserved after the alloca. The stack alignment
				972	// restriction can be relaxed in some cases.
				973	NeedsStackAlignment = true;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	974
				975	// TODO(stichnot): minimize the number of adjustments of SP, etc.
				976	Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
				977	Variable *Dest = Inst->getDest();
				978	uint32_t AlignmentParam = Inst->getAlignInBytes();
				979	// For default align=0, set it to the real value 1, to avoid any
				980	// bit-manipulation problems below.
				981	AlignmentParam = std::max(AlignmentParam, 1u);
				982
				983	// LLVM enforces power of 2 alignment.
				984	assert(llvm::isPowerOf2_32(AlignmentParam));
				985	assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
				986
				987	uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
				988	if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
				989	alignRegisterPow2(SP, Alignment);
				990	}
				991	Operand *TotalSize = Inst->getSizeInBytes();
				992	if (const auto *ConstantTotalSize =
				993	llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
				994	uint32_t Value = ConstantTotalSize->getValue();
				995	Value = Utils::applyAlignment(Value, Alignment);
				996	Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
				997	_sub(SP, SP, SubAmount);
				998	} else {
				999	// Non-constant sizes need to be adjusted to the next highest
				1000	// multiple of the required alignment at runtime.
				1001	TotalSize = legalize(TotalSize);
				1002	Variable *T = makeReg(IceType_i32);
				1003	_mov(T, TotalSize);
				1004	Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
				1005	_add(T, T, AddAmount);
				1006	alignRegisterPow2(T, Alignment);
				1007	_sub(SP, SP, T);
				1008	}
				1009	_mov(Dest, SP);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1010	}
				1011
				1012	void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1013	Variable *Dest = Inst->getDest();
				1014	// TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
				1015	// to legalize Src0 to flex or Src1 to flex and there is a reversible
				1016	// instruction. E.g., reverse subtract with immediate, register vs
				1017	// register, immediate.
				1018	// Or it may be the case that the operands aren't swapped, but the
				1019	// bits can be flipped and a different operation applied.
				1020	// E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1021	Operand *Src0 = Inst->getSrc(0);
				1022	Operand *Src1 = Inst->getSrc(1);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1023	if (Dest->getType() == IceType_i64) {
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1024	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1025	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1026	Variable *Src0RLo = legalizeToVar(loOperand(Src0));
				1027	Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
				1028	Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1029	Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1030	Variable *T_Lo = makeReg(DestLo->getType());
				1031	Variable *T_Hi = makeReg(DestHi->getType());
				1032	switch (Inst->getOp()) {
				1033	case InstArithmetic::_num:
				1034	llvm_unreachable("Unknown arithmetic operator");
				1035	break;
				1036	case InstArithmetic::Add:
				1037	_adds(T_Lo, Src0RLo, Src1Lo);
				1038	_mov(DestLo, T_Lo);
				1039	_adc(T_Hi, Src0RHi, Src1Hi);
				1040	_mov(DestHi, T_Hi);
				1041	break;
				1042	case InstArithmetic::And:
				1043	_and(T_Lo, Src0RLo, Src1Lo);
				1044	_mov(DestLo, T_Lo);
				1045	_and(T_Hi, Src0RHi, Src1Hi);
				1046	_mov(DestHi, T_Hi);
				1047	break;
				1048	case InstArithmetic::Or:
				1049	_orr(T_Lo, Src0RLo, Src1Lo);
				1050	_mov(DestLo, T_Lo);
				1051	_orr(T_Hi, Src0RHi, Src1Hi);
				1052	_mov(DestHi, T_Hi);
				1053	break;
				1054	case InstArithmetic::Xor:
				1055	_eor(T_Lo, Src0RLo, Src1Lo);
				1056	_mov(DestLo, T_Lo);
				1057	_eor(T_Hi, Src0RHi, Src1Hi);
				1058	_mov(DestHi, T_Hi);
				1059	break;
				1060	case InstArithmetic::Sub:
				1061	_subs(T_Lo, Src0RLo, Src1Lo);
				1062	_mov(DestLo, T_Lo);
				1063	_sbc(T_Hi, Src0RHi, Src1Hi);
				1064	_mov(DestHi, T_Hi);
				1065	break;
				1066	case InstArithmetic::Mul: {
				1067	// GCC 4.8 does:
				1068	// a=b*c ==>
				1069	// t_acc =(mul) (b.lo * c.hi)
				1070	// t_acc =(mla) (c.lo * b.hi) + t_acc
				1071	// t.hi,t.lo =(umull) b.lo * c.lo
				1072	// t.hi += t_acc
				1073	// a.lo = t.lo
				1074	// a.hi = t.hi
				1075	//
				1076	// LLVM does:
				1077	// t.hi,t.lo =(umull) b.lo * c.lo
				1078	// t.hi =(mla) (b.lo * c.hi) + t.hi
				1079	// t.hi =(mla) (b.hi * c.lo) + t.hi
				1080	// a.lo = t.lo
				1081	// a.hi = t.hi
				1082	//
				1083	// LLVM's lowering has fewer instructions, but more register pressure:
				1084	// t.lo is live from beginning to end, while GCC delays the two-dest
				1085	// instruction till the end, and kills c.hi immediately.
				1086	Variable *T_Acc = makeReg(IceType_i32);
				1087	Variable *T_Acc1 = makeReg(IceType_i32);
				1088	Variable *T_Hi1 = makeReg(IceType_i32);
				1089	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1090	Variable *Src1RHi = legalizeToVar(Src1Hi);
				1091	_mul(T_Acc, Src0RLo, Src1RHi);
				1092	_mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
				1093	_umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
				1094	_add(T_Hi, T_Hi1, T_Acc1);
				1095	_mov(DestLo, T_Lo);
				1096	_mov(DestHi, T_Hi);
				1097	} break;
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1098	case InstArithmetic::Shl: {
				1099	// a=b<<c ==>
				1100	// GCC 4.8 does:
				1101	// sub t_c1, c.lo, #32
				1102	// lsl t_hi, b.hi, c.lo
				1103	// orr t_hi, t_hi, b.lo, lsl t_c1
				1104	// rsb t_c2, c.lo, #32
				1105	// orr t_hi, t_hi, b.lo, lsr t_c2
				1106	// lsl t_lo, b.lo, c.lo
				1107	// a.lo = t_lo
				1108	// a.hi = t_hi
				1109	// Can be strength-reduced for constant-shifts, but we don't do
				1110	// that for now.
				1111	// Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
				1112	// On ARM, shifts only take the lower 8 bits of the shift register,
				1113	// and saturate to the range 0-32, so the negative value will
				1114	// saturate to 32.
				1115	Variable *T_Hi = makeReg(IceType_i32);
				1116	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1117	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1118	Variable *T_C1 = makeReg(IceType_i32);
				1119	Variable *T_C2 = makeReg(IceType_i32);
				1120	_sub(T_C1, Src1RLo, ThirtyTwo);
				1121	_lsl(T_Hi, Src0RHi, Src1RLo);
				1122	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1123	OperandARM32::LSL, T_C1));
				1124	_rsb(T_C2, Src1RLo, ThirtyTwo);
				1125	_orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1126	OperandARM32::LSR, T_C2));
				1127	_mov(DestHi, T_Hi);
				1128	Variable *T_Lo = makeReg(IceType_i32);
				1129	// _mov seems to sometimes have better register preferencing than lsl.
				1130	// Otherwise mov w/ lsl shifted register is a pseudo-instruction
				1131	// that maps to lsl.
				1132	_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
				1133	OperandARM32::LSL, Src1RLo));
				1134	_mov(DestLo, T_Lo);
				1135	} break;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1136	case InstArithmetic::Lshr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1137	// a=b>>c (unsigned) ==>
				1138	// GCC 4.8 does:
				1139	// rsb t_c1, c.lo, #32
				1140	// lsr t_lo, b.lo, c.lo
				1141	// orr t_lo, t_lo, b.hi, lsl t_c1
				1142	// sub t_c2, c.lo, #32
				1143	// orr t_lo, t_lo, b.hi, lsr t_c2
				1144	// lsr t_hi, b.hi, c.lo
				1145	// a.lo = t_lo
				1146	// a.hi = t_hi
				1147	case InstArithmetic::Ashr: {
				1148	// a=b>>c (signed) ==> ...
				1149	// Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
				1150	// and the next orr should be conditioned on PLUS. The last two
				1151	// right shifts should also be arithmetic.
				1152	bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
				1153	Variable *T_Lo = makeReg(IceType_i32);
				1154	Variable *Src1RLo = legalizeToVar(Src1Lo);
				1155	Constant *ThirtyTwo = Ctx->getConstantInt32(32);
				1156	Variable *T_C1 = makeReg(IceType_i32);
				1157	Variable *T_C2 = makeReg(IceType_i32);
				1158	_rsb(T_C1, Src1RLo, ThirtyTwo);
				1159	_lsr(T_Lo, Src0RLo, Src1RLo);
				1160	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1161	OperandARM32::LSL, T_C1));
				1162	OperandARM32::ShiftKind RShiftKind;
				1163	CondARM32::Cond Pred;
				1164	if (IsAshr) {
				1165	_subs(T_C2, Src1RLo, ThirtyTwo);
				1166	RShiftKind = OperandARM32::ASR;
				1167	Pred = CondARM32::PL;
				1168	} else {
				1169	_sub(T_C2, Src1RLo, ThirtyTwo);
				1170	RShiftKind = OperandARM32::LSR;
				1171	Pred = CondARM32::AL;
				1172	}
				1173	_orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1174	RShiftKind, T_C2),
				1175	Pred);
				1176	_mov(DestLo, T_Lo);
				1177	Variable *T_Hi = makeReg(IceType_i32);
				1178	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
				1179	RShiftKind, Src1RLo));
				1180	_mov(DestHi, T_Hi);
				1181	} break;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1182	case InstArithmetic::Udiv:
				1183	case InstArithmetic::Sdiv:
				1184	case InstArithmetic::Urem:
				1185	case InstArithmetic::Srem:
				1186	UnimplementedError(Func->getContext()->getFlags());
				1187	break;
				1188	case InstArithmetic::Fadd:
				1189	case InstArithmetic::Fsub:
				1190	case InstArithmetic::Fmul:
				1191	case InstArithmetic::Fdiv:
				1192	case InstArithmetic::Frem:
				1193	llvm_unreachable("FP instruction with i64 type");
				1194	break;
				1195	}
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1196	} else if (isVectorType(Dest->getType())) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1197	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1198	} else { // Dest->getType() is non-i64 scalar
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1199	Variable *Src0R = legalizeToVar(Inst->getSrc(0));
				1200	Src1 = legalize(Inst->getSrc(1), Legal_Reg \| Legal_Flex);
				1201	Variable *T = makeReg(Dest->getType());
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1202	switch (Inst->getOp()) {
				1203	case InstArithmetic::_num:
				1204	llvm_unreachable("Unknown arithmetic operator");
				1205	break;
				1206	case InstArithmetic::Add: {
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1207	_add(T, Src0R, Src1);
				1208	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1209	} break;
Jan Voung	2971997	2015-05-19 11:24:51 -0700	[diff] [blame]	1210	case InstArithmetic::And: {
				1211	_and(T, Src0R, Src1);
				1212	_mov(Dest, T);
				1213	} break;
				1214	case InstArithmetic::Or: {
				1215	_orr(T, Src0R, Src1);
				1216	_mov(Dest, T);
				1217	} break;
				1218	case InstArithmetic::Xor: {
				1219	_eor(T, Src0R, Src1);
				1220	_mov(Dest, T);
				1221	} break;
				1222	case InstArithmetic::Sub: {
				1223	_sub(T, Src0R, Src1);
				1224	_mov(Dest, T);
				1225	} break;
				1226	case InstArithmetic::Mul: {
				1227	Variable *Src1R = legalizeToVar(Src1);
				1228	_mul(T, Src0R, Src1R);
				1229	_mov(Dest, T);
				1230	} break;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1231	case InstArithmetic::Shl:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1232	_lsl(T, Src0R, Src1);
				1233	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1234	break;
				1235	case InstArithmetic::Lshr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1236	_lsr(T, Src0R, Src1);
				1237	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1238	break;
				1239	case InstArithmetic::Ashr:
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1240	_asr(T, Src0R, Src1);
				1241	_mov(Dest, T);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1242	break;
				1243	case InstArithmetic::Udiv:
				1244	UnimplementedError(Func->getContext()->getFlags());
				1245	break;
				1246	case InstArithmetic::Sdiv:
				1247	UnimplementedError(Func->getContext()->getFlags());
				1248	break;
				1249	case InstArithmetic::Urem:
				1250	UnimplementedError(Func->getContext()->getFlags());
				1251	break;
				1252	case InstArithmetic::Srem:
				1253	UnimplementedError(Func->getContext()->getFlags());
				1254	break;
				1255	case InstArithmetic::Fadd:
				1256	UnimplementedError(Func->getContext()->getFlags());
				1257	break;
				1258	case InstArithmetic::Fsub:
				1259	UnimplementedError(Func->getContext()->getFlags());
				1260	break;
				1261	case InstArithmetic::Fmul:
				1262	UnimplementedError(Func->getContext()->getFlags());
				1263	break;
				1264	case InstArithmetic::Fdiv:
				1265	UnimplementedError(Func->getContext()->getFlags());
				1266	break;
				1267	case InstArithmetic::Frem:
				1268	UnimplementedError(Func->getContext()->getFlags());
				1269	break;
				1270	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1271	}
				1272	}
				1273
				1274	void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1275	Variable *Dest = Inst->getDest();
				1276	Operand *Src0 = Inst->getSrc(0);
				1277	assert(Dest->getType() == Src0->getType());
				1278	if (Dest->getType() == IceType_i64) {
				1279	Src0 = legalize(Src0);
				1280	Operand *Src0Lo = loOperand(Src0);
				1281	Operand *Src0Hi = hiOperand(Src0);
				1282	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1283	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1284	Variable T_Lo = nullptr, T_Hi = nullptr;
				1285	_mov(T_Lo, Src0Lo);
				1286	_mov(DestLo, T_Lo);
				1287	_mov(T_Hi, Src0Hi);
				1288	_mov(DestHi, T_Hi);
				1289	} else {
				1290	Operand *SrcR;
				1291	if (Dest->hasReg()) {
				1292	// If Dest already has a physical register, then legalize the
				1293	// Src operand into a Variable with the same register
				1294	// assignment. This is mostly a workaround for advanced phi
				1295	// lowering's ad-hoc register allocation which assumes no
				1296	// register allocation is needed when at least one of the
				1297	// operands is non-memory.
				1298	// TODO(jvoung): check this for ARM.
				1299	SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
				1300	} else {
				1301	// Dest could be a stack operand. Since we could potentially need
				1302	// to do a Store (and store can only have Register operands),
				1303	// legalize this to a register.
				1304	SrcR = legalize(Src0, Legal_Reg);
				1305	}
				1306	if (isVectorType(Dest->getType())) {
				1307	UnimplementedError(Func->getContext()->getFlags());
				1308	} else {
				1309	_mov(Dest, SrcR);
				1310	}
				1311	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1312	}
				1313
				1314	void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1315	if (Inst->isUnconditional()) {
				1316	_br(Inst->getTargetUnconditional());
				1317	return;
				1318	}
				1319	Operand *Cond = Inst->getCondition();
				1320	// TODO(jvoung): Handle folding opportunities.
				1321
				1322	Variable *Src0R = legalizeToVar(Cond);
				1323	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1324	_cmp(Src0R, Zero);
				1325	_br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1326	}
				1327
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1328	void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1329	MaybeLeafFunc = false;
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1330	NeedsStackAlignment = true;
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	1331
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1332	// Assign arguments to registers and stack. Also reserve stack.
				1333	TargetARM32::CallingConv CC;
				1334	// Pair of Arg Operand -> GPR number assignments.
				1335	llvm::SmallVector<std::pair<Operand *, int32_t>,
				1336	TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
				1337	// Pair of Arg Operand -> stack offset.
				1338	llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
				1339	int32_t ParameterAreaSizeBytes = 0;
				1340
				1341	// Classify each argument operand according to the location where the
				1342	// argument is passed.
				1343	for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
				1344	Operand *Arg = Instr->getArg(i);
				1345	Type Ty = Arg->getType();
				1346	bool InRegs = false;
				1347	if (isVectorType(Ty)) {
				1348	UnimplementedError(Func->getContext()->getFlags());
				1349	} else if (isFloatingType(Ty)) {
				1350	UnimplementedError(Func->getContext()->getFlags());
				1351	} else if (Ty == IceType_i64) {
				1352	std::pair<int32_t, int32_t> Regs;
				1353	if (CC.I64InRegs(&Regs)) {
				1354	InRegs = true;
				1355	Operand *Lo = loOperand(Arg);
				1356	Operand *Hi = hiOperand(Arg);
				1357	GPRArgs.push_back(std::make_pair(Lo, Regs.first));
				1358	GPRArgs.push_back(std::make_pair(Hi, Regs.second));
				1359	}
				1360	} else {
				1361	assert(Ty == IceType_i32);
				1362	int32_t Reg;
				1363	if (CC.I32InReg(&Reg)) {
				1364	InRegs = true;
				1365	GPRArgs.push_back(std::make_pair(Arg, Reg));
				1366	}
				1367	}
				1368
				1369	if (!InRegs) {
				1370	ParameterAreaSizeBytes =
				1371	applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
				1372	StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
				1373	ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
				1374	}
				1375	}
				1376
				1377	// Adjust the parameter area so that the stack is aligned. It is
				1378	// assumed that the stack is already aligned at the start of the
				1379	// calling sequence.
				1380	ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
				1381
				1382	// Subtract the appropriate amount for the argument area. This also
				1383	// takes care of setting the stack adjustment during emission.
				1384	//
				1385	// TODO: If for some reason the call instruction gets dead-code
				1386	// eliminated after lowering, we would need to ensure that the
				1387	// pre-call and the post-call esp adjustment get eliminated as well.
				1388	if (ParameterAreaSizeBytes) {
				1389	Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
				1390	Legal_Reg \| Legal_Flex);
				1391	_adjust_stack(ParameterAreaSizeBytes, SubAmount);
				1392	}
				1393
				1394	// Copy arguments that are passed on the stack to the appropriate
				1395	// stack locations.
				1396	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				1397	for (auto &StackArg : StackArgs) {
				1398	ConstantInteger32 *Loc =
				1399	llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
				1400	Type Ty = StackArg.first->getType();
				1401	OperandARM32Mem *Addr;
				1402	constexpr bool SignExt = false;
				1403	if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
				1404	Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
				1405	} else {
				1406	Variable *NewBase = Func->makeVariable(SP->getType());
				1407	lowerArithmetic(
				1408	InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
				1409	Addr = formMemoryOperand(NewBase, Ty);
				1410	}
				1411	lowerStore(InstStore::create(Func, StackArg.first, Addr));
				1412	}
				1413
				1414	// Copy arguments to be passed in registers to the appropriate registers.
				1415	for (auto &GPRArg : GPRArgs) {
				1416	Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
				1417	// Generate a FakeUse of register arguments so that they do not get
				1418	// dead code eliminated as a result of the FakeKill of scratch
				1419	// registers after the call.
				1420	Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1421	}
				1422
				1423	// Generate the call instruction. Assign its result to a temporary
				1424	// with high register allocation weight.
				1425	Variable *Dest = Instr->getDest();
				1426	// ReturnReg doubles as ReturnRegLo as necessary.
				1427	Variable *ReturnReg = nullptr;
				1428	Variable *ReturnRegHi = nullptr;
				1429	if (Dest) {
				1430	switch (Dest->getType()) {
				1431	case IceType_NUM:
				1432	llvm_unreachable("Invalid Call dest type");
				1433	break;
				1434	case IceType_void:
				1435	break;
				1436	case IceType_i1:
				1437	case IceType_i8:
				1438	case IceType_i16:
				1439	case IceType_i32:
				1440	ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
				1441	break;
				1442	case IceType_i64:
				1443	ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
				1444	ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
				1445	break;
				1446	case IceType_f32:
				1447	case IceType_f64:
				1448	// Use S and D regs.
				1449	UnimplementedError(Func->getContext()->getFlags());
				1450	break;
				1451	case IceType_v4i1:
				1452	case IceType_v8i1:
				1453	case IceType_v16i1:
				1454	case IceType_v16i8:
				1455	case IceType_v8i16:
				1456	case IceType_v4i32:
				1457	case IceType_v4f32:
				1458	// Use Q regs.
				1459	UnimplementedError(Func->getContext()->getFlags());
				1460	break;
				1461	}
				1462	}
				1463	Operand *CallTarget = Instr->getCallTarget();
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1464	// TODO(jvoung): Handle sandboxing.
				1465	// const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
				1466
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1467	// Allow ConstantRelocatable to be left alone as a direct call,
				1468	// but force other constants like ConstantInteger32 to be in
				1469	// a register and make it an indirect call.
				1470	if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
				1471	CallTarget = legalize(CallTarget, Legal_Reg);
				1472	}
				1473	Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
				1474	Context.insert(NewCall);
				1475	if (ReturnRegHi)
				1476	Context.insert(InstFakeDef::create(Func, ReturnRegHi));
				1477
Jan Voung	b0a8c24	2015-06-18 15:00:14 -0700	[diff] [blame]	1478	// Add the appropriate offset to SP. The call instruction takes care
				1479	// of resetting the stack offset during emission.
				1480	if (ParameterAreaSizeBytes) {
				1481	Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
				1482	Legal_Reg \| Legal_Flex);
				1483	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				1484	_add(SP, SP, AddAmount);
				1485	}
				1486
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1487	// Insert a register-kill pseudo instruction.
				1488	Context.insert(InstFakeKill::create(Func, NewCall));
				1489
				1490	// Generate a FakeUse to keep the call live if necessary.
				1491	if (Instr->hasSideEffects() && ReturnReg) {
				1492	Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
				1493	Context.insert(FakeUse);
				1494	}
				1495
				1496	if (!Dest)
				1497	return;
				1498
				1499	// Assign the result of the call to Dest.
				1500	if (ReturnReg) {
				1501	if (ReturnRegHi) {
				1502	assert(Dest->getType() == IceType_i64);
				1503	split64(Dest);
				1504	Variable *DestLo = Dest->getLo();
				1505	Variable *DestHi = Dest->getHi();
				1506	_mov(DestLo, ReturnReg);
				1507	_mov(DestHi, ReturnRegHi);
				1508	} else {
				1509	assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|
				1510	Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|
				1511	isVectorType(Dest->getType()));
				1512	if (isFloatingType(Dest->getType()) \|\| isVectorType(Dest->getType())) {
				1513	UnimplementedError(Func->getContext()->getFlags());
				1514	} else {
				1515	_mov(Dest, ReturnReg);
				1516	}
				1517	}
				1518	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1519	}
				1520
				1521	void TargetARM32::lowerCast(const InstCast *Inst) {
				1522	InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1523	Variable *Dest = Inst->getDest();
				1524	Operand *Src0 = Inst->getSrc(0);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1525	switch (CastKind) {
				1526	default:
				1527	Func->setError("Cast type not supported");
				1528	return;
				1529	case InstCast::Sext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1530	if (isVectorType(Dest->getType())) {
				1531	UnimplementedError(Func->getContext()->getFlags());
				1532	} else if (Dest->getType() == IceType_i64) {
				1533	// t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
				1534	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1535	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1536	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1537	Variable *T_Lo = makeReg(DestLo->getType());
				1538	if (Src0->getType() == IceType_i32) {
				1539	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1540	_mov(T_Lo, Src0RF);
				1541	} else if (Src0->getType() == IceType_i1) {
				1542	Variable *Src0R = legalizeToVar(Src0);
				1543	_lsl(T_Lo, Src0R, ShiftAmt);
				1544	_asr(T_Lo, T_Lo, ShiftAmt);
				1545	} else {
				1546	Variable *Src0R = legalizeToVar(Src0);
				1547	_sxt(T_Lo, Src0R);
				1548	}
				1549	_mov(DestLo, T_Lo);
				1550	Variable *T_Hi = makeReg(DestHi->getType());
				1551	if (Src0->getType() != IceType_i1) {
				1552	_mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
				1553	OperandARM32::ASR, ShiftAmt));
				1554	} else {
				1555	// For i1, the asr instruction is already done above.
				1556	_mov(T_Hi, T_Lo);
				1557	}
				1558	_mov(DestHi, T_Hi);
				1559	} else if (Src0->getType() == IceType_i1) {
				1560	// GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
				1561	// lsl t1, src_reg, 31
				1562	// asr t1, t1, 31
				1563	// dst = t1
				1564	Variable *Src0R = legalizeToVar(Src0);
				1565	Constant *ShiftAmt = Ctx->getConstantInt32(31);
				1566	Variable *T = makeReg(Dest->getType());
				1567	_lsl(T, Src0R, ShiftAmt);
				1568	_asr(T, T, ShiftAmt);
				1569	_mov(Dest, T);
				1570	} else {
				1571	// t1 = sxt src; dst = t1
				1572	Variable *Src0R = legalizeToVar(Src0);
				1573	Variable *T = makeReg(Dest->getType());
				1574	_sxt(T, Src0R);
				1575	_mov(Dest, T);
				1576	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1577	break;
				1578	}
				1579	case InstCast::Zext: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1580	if (isVectorType(Dest->getType())) {
				1581	UnimplementedError(Func->getContext()->getFlags());
				1582	} else if (Dest->getType() == IceType_i64) {
				1583	// t1=uxtb src; dst.lo=t1; dst.hi=0
				1584	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1585	Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
				1586	Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
				1587	Variable *T_Lo = makeReg(DestLo->getType());
				1588	// i32 and i1 can just take up the whole register.
				1589	// i32 doesn't need uxt, while i1 will have an and mask later anyway.
				1590	if (Src0->getType() == IceType_i32 \|\| Src0->getType() == IceType_i1) {
				1591	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1592	_mov(T_Lo, Src0RF);
				1593	} else {
				1594	Variable *Src0R = legalizeToVar(Src0);
				1595	_uxt(T_Lo, Src0R);
				1596	}
				1597	if (Src0->getType() == IceType_i1) {
				1598	Constant *One = Ctx->getConstantInt32(1);
				1599	_and(T_Lo, T_Lo, One);
				1600	}
				1601	_mov(DestLo, T_Lo);
				1602	Variable *T_Hi = makeReg(DestLo->getType());
				1603	_mov(T_Hi, Zero);
				1604	_mov(DestHi, T_Hi);
				1605	} else if (Src0->getType() == IceType_i1) {
				1606	// t = Src0; t &= 1; Dest = t
				1607	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1608	Constant *One = Ctx->getConstantInt32(1);
				1609	Variable *T = makeReg(Dest->getType());
				1610	// Just use _mov instead of _uxt since all registers are 32-bit.
				1611	// _uxt requires the source to be a register so could have required
				1612	// a _mov from legalize anyway.
				1613	_mov(T, Src0RF);
				1614	_and(T, T, One);
				1615	_mov(Dest, T);
				1616	} else {
				1617	// t1 = uxt src; dst = t1
				1618	Variable *Src0R = legalizeToVar(Src0);
				1619	Variable *T = makeReg(Dest->getType());
				1620	_uxt(T, Src0R);
				1621	_mov(Dest, T);
				1622	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1623	break;
				1624	}
				1625	case InstCast::Trunc: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1626	if (isVectorType(Dest->getType())) {
				1627	UnimplementedError(Func->getContext()->getFlags());
				1628	} else {
				1629	Operand *Src0 = Inst->getSrc(0);
				1630	if (Src0->getType() == IceType_i64)
				1631	Src0 = loOperand(Src0);
				1632	Operand *Src0RF = legalize(Src0, Legal_Reg \| Legal_Flex);
				1633	// t1 = trunc Src0RF; Dest = t1
				1634	Variable *T = makeReg(Dest->getType());
				1635	_mov(T, Src0RF);
				1636	if (Dest->getType() == IceType_i1)
				1637	_and(T, T, Ctx->getConstantInt1(1));
				1638	_mov(Dest, T);
				1639	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1640	break;
				1641	}
				1642	case InstCast::Fptrunc:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1643	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1644	break;
				1645	case InstCast::Fpext: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1646	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1647	break;
				1648	}
				1649	case InstCast::Fptosi:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1650	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1651	break;
				1652	case InstCast::Fptoui:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1653	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1654	break;
				1655	case InstCast::Sitofp:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1656	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1657	break;
				1658	case InstCast::Uitofp: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1659	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1660	break;
				1661	}
				1662	case InstCast::Bitcast: {
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1663	Operand *Src0 = Inst->getSrc(0);
				1664	if (Dest->getType() == Src0->getType()) {
				1665	InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
				1666	lowerAssign(Assign);
				1667	return;
				1668	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1669	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1670	break;
				1671	}
				1672	}
				1673	}
				1674
				1675	void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
				1676	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1677	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1678	}
				1679
				1680	void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
				1681	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1682	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1683	}
				1684
				1685	void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1686	Variable *Dest = Inst->getDest();
				1687	Operand *Src0 = Inst->getSrc(0);
				1688	Operand *Src1 = Inst->getSrc(1);
				1689
				1690	if (isVectorType(Dest->getType())) {
				1691	UnimplementedError(Func->getContext()->getFlags());
				1692	return;
				1693	}
				1694
				1695	// a=icmp cond, b, c ==>
				1696	// GCC does:
				1697	// cmp b.hi, c.hi or cmp b.lo, c.lo
				1698	// cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
				1699	// mov.<C1> t, #1 mov.<C1> t, #1
				1700	// mov.<C2> t, #0 mov.<C2> t, #0
				1701	// mov a, t mov a, t
				1702	// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
				1703	// is used for signed compares. In some cases, b and c need to be swapped
				1704	// as well.
				1705	//
				1706	// LLVM does:
				1707	// for EQ and NE:
				1708	// eor t1, b.hi, c.hi
				1709	// eor t2, b.lo, c.hi
				1710	// orrs t, t1, t2
				1711	// mov.<C> t, #1
				1712	// mov a, t
				1713	//
				1714	// that's nice in that it's just as short but has fewer dependencies
				1715	// for better ILP at the cost of more registers.
				1716	//
				1717	// Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
				1718	// two unconditional mov #0, two cmps, two conditional mov #1,
				1719	// and one conditonal reg mov. That has few dependencies for good ILP,
				1720	// but is a longer sequence.
				1721	//
				1722	// So, we are going with the GCC version since it's usually better (except
				1723	// perhaps for eq/ne). We could revisit special-casing eq/ne later.
				1724	Constant *Zero = Ctx->getConstantZero(IceType_i32);
				1725	Constant *One = Ctx->getConstantInt32(1);
				1726	if (Src0->getType() == IceType_i64) {
				1727	InstIcmp::ICond Conditon = Inst->getCondition();
				1728	size_t Index = static_cast<size_t>(Conditon);
				1729	assert(Index < TableIcmp64Size);
				1730	Variable Src0Lo, Src0Hi;
				1731	Operand Src1LoRF, Src1HiRF;
				1732	if (TableIcmp64[Index].Swapped) {
				1733	Src0Lo = legalizeToVar(loOperand(Src1));
				1734	Src0Hi = legalizeToVar(hiOperand(Src1));
				1735	Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);
				1736	Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);
				1737	} else {
				1738	Src0Lo = legalizeToVar(loOperand(Src0));
				1739	Src0Hi = legalizeToVar(hiOperand(Src0));
				1740	Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);
				1741	Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);
				1742	}
				1743	Variable *T = makeReg(IceType_i32);
				1744	if (TableIcmp64[Index].IsSigned) {
				1745	Variable *ScratchReg = makeReg(IceType_i32);
				1746	_cmp(Src0Lo, Src1LoRF);
				1747	_sbcs(ScratchReg, Src0Hi, Src1HiRF);
				1748	// ScratchReg isn't going to be used, but we need the
				1749	// side-effect of setting flags from this operation.
				1750	Context.insert(InstFakeUse::create(Func, ScratchReg));
				1751	} else {
				1752	_cmp(Src0Hi, Src1HiRF);
				1753	_cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
				1754	}
				1755	_mov(T, One, TableIcmp64[Index].C1);
				1756	_mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
				1757	_mov(Dest, T);
				1758	return;
				1759	}
				1760
				1761	// a=icmp cond b, c ==>
				1762	// GCC does:
				1763	// <u/s>xtb tb, b
				1764	// <u/s>xtb tc, c
				1765	// cmp tb, tc
				1766	// mov.C1 t, #0
				1767	// mov.C2 t, #1
				1768	// mov a, t
				1769	// where the unsigned/sign extension is not needed for 32-bit.
				1770	// They also have special cases for EQ and NE. E.g., for NE:
				1771	// <extend to tb, tc>
				1772	// subs t, tb, tc
				1773	// movne t, #1
				1774	// mov a, t
				1775	//
				1776	// LLVM does:
				1777	// lsl tb, b, #<N>
				1778	// mov t, #0
				1779	// cmp tb, c, lsl #<N>
				1780	// mov.<C> t, #1
				1781	// mov a, t
				1782	//
				1783	// the left shift is by 0, 16, or 24, which allows the comparison to focus
				1784	// on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
				1785	// For the unsigned case, for some reason it does similar to GCC and does
				1786	// a uxtb first. It's not clear to me why that special-casing is needed.
				1787	//
				1788	// We'll go with the LLVM way for now, since it's shorter and has just as
				1789	// few dependencies.
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1790	int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
				1791	assert(ShiftAmt >= 0);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1792	Constant *ShiftConst = nullptr;
				1793	Variable *Src0R = nullptr;
				1794	Variable *T = makeReg(IceType_i32);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1795	if (ShiftAmt) {
				1796	ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1797	Src0R = makeReg(IceType_i32);
				1798	_lsl(Src0R, legalizeToVar(Src0), ShiftConst);
				1799	} else {
				1800	Src0R = legalizeToVar(Src0);
				1801	}
				1802	_mov(T, Zero);
Jan Voung	66c3d5e	2015-06-04 17:02:31 -0700	[diff] [blame]	1803	if (ShiftAmt) {
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1804	Variable *Src1R = legalizeToVar(Src1);
				1805	OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
				1806	Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
				1807	_cmp(Src0R, Src1RShifted);
				1808	} else {
				1809	Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);
				1810	_cmp(Src0R, Src1RF);
				1811	}
				1812	_mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
				1813	_mov(Dest, T);
				1814	return;
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1815	}
				1816
				1817	void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
				1818	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1819	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1820	}
				1821
				1822	void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
				1823	switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
				1824	case Intrinsics::AtomicCmpxchg: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1825	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1826	return;
				1827	}
				1828	case Intrinsics::AtomicFence:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1829	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1830	return;
				1831	case Intrinsics::AtomicFenceAll:
				1832	// NOTE: FenceAll should prevent and load/store from being moved
				1833	// across the fence (both atomic and non-atomic). The InstARM32Mfence
				1834	// instruction is currently marked coarsely as "HasSideEffects".
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1835	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1836	return;
				1837	case Intrinsics::AtomicIsLockFree: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1838	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1839	return;
				1840	}
				1841	case Intrinsics::AtomicLoad: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1842	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1843	return;
				1844	}
				1845	case Intrinsics::AtomicRMW:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1846	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1847	return;
				1848	case Intrinsics::AtomicStore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1849	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1850	return;
				1851	}
				1852	case Intrinsics::Bswap: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1853	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1854	return;
				1855	}
				1856	case Intrinsics::Ctpop: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1857	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1858	return;
				1859	}
				1860	case Intrinsics::Ctlz: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1861	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1862	return;
				1863	}
				1864	case Intrinsics::Cttz: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1865	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1866	return;
				1867	}
				1868	case Intrinsics::Fabs: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1869	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1870	return;
				1871	}
				1872	case Intrinsics::Longjmp: {
				1873	InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
				1874	Call->addArg(Instr->getArg(0));
				1875	Call->addArg(Instr->getArg(1));
				1876	lowerCall(Call);
				1877	return;
				1878	}
				1879	case Intrinsics::Memcpy: {
				1880	// In the future, we could potentially emit an inline memcpy/memset, etc.
				1881	// for intrinsic calls w/ a known length.
				1882	InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
				1883	Call->addArg(Instr->getArg(0));
				1884	Call->addArg(Instr->getArg(1));
				1885	Call->addArg(Instr->getArg(2));
				1886	lowerCall(Call);
				1887	return;
				1888	}
				1889	case Intrinsics::Memmove: {
				1890	InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
				1891	Call->addArg(Instr->getArg(0));
				1892	Call->addArg(Instr->getArg(1));
				1893	Call->addArg(Instr->getArg(2));
				1894	lowerCall(Call);
				1895	return;
				1896	}
				1897	case Intrinsics::Memset: {
				1898	// The value operand needs to be extended to a stack slot size
				1899	// because the PNaCl ABI requires arguments to be at least 32 bits
				1900	// wide.
				1901	Operand *ValOp = Instr->getArg(1);
				1902	assert(ValOp->getType() == IceType_i8);
				1903	Variable *ValExt = Func->makeVariable(stackSlotType());
				1904	lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
				1905	InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
				1906	Call->addArg(Instr->getArg(0));
				1907	Call->addArg(ValExt);
				1908	Call->addArg(Instr->getArg(2));
				1909	lowerCall(Call);
				1910	return;
				1911	}
				1912	case Intrinsics::NaClReadTP: {
				1913	if (Ctx->getFlags().getUseSandboxing()) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1914	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1915	} else {
				1916	InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
				1917	lowerCall(Call);
				1918	}
				1919	return;
				1920	}
				1921	case Intrinsics::Setjmp: {
				1922	InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
				1923	Call->addArg(Instr->getArg(0));
				1924	lowerCall(Call);
				1925	return;
				1926	}
				1927	case Intrinsics::Sqrt: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1928	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1929	return;
				1930	}
				1931	case Intrinsics::Stacksave: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1932	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1933	return;
				1934	}
				1935	case Intrinsics::Stackrestore: {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1936	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1937	return;
				1938	}
				1939	case Intrinsics::Trap:
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1940	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1941	return;
				1942	case Intrinsics::UnknownIntrinsic:
				1943	Func->setError("Should not be lowering UnknownIntrinsic");
				1944	return;
				1945	}
				1946	return;
				1947	}
				1948
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	1949	void TargetARM32::lowerLoad(const InstLoad *Load) {
				1950	// A Load instruction can be treated the same as an Assign
				1951	// instruction, after the source operand is transformed into an
				1952	// OperandARM32Mem operand.
				1953	Type Ty = Load->getDest()->getType();
				1954	Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
				1955	Variable *DestLoad = Load->getDest();
				1956
				1957	// TODO(jvoung): handled folding opportunities. Sign and zero extension
				1958	// can be folded into a load.
				1959	InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
				1960	lowerAssign(Assign);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1961	}
				1962
				1963	void TargetARM32::doAddressOptLoad() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1964	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1965	}
				1966
				1967	void TargetARM32::randomlyInsertNop(float Probability) {
				1968	RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
				1969	if (RNG.getTrueWithProbability(Probability)) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1970	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	1971	}
				1972	}
				1973
				1974	void TargetARM32::lowerPhi(const InstPhi * /Inst/) {
				1975	Func->setError("Phi found in regular instruction list");
				1976	}
				1977
				1978	void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1979	Variable *Reg = nullptr;
				1980	if (Inst->hasRetValue()) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1981	Operand *Src0 = Inst->getRetValue();
				1982	if (Src0->getType() == IceType_i64) {
				1983	Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
				1984	Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
				1985	Reg = R0;
				1986	Context.insert(InstFakeUse::create(Func, R1));
				1987	} else if (isScalarFloatingType(Src0->getType())) {
				1988	UnimplementedError(Func->getContext()->getFlags());
				1989	} else if (isVectorType(Src0->getType())) {
				1990	UnimplementedError(Func->getContext()->getFlags());
				1991	} else {
				1992	Operand *Src0F = legalize(Src0, Legal_Reg \| Legal_Flex);
Jan Voung	3bfd99a	2015-05-22 16:35:25 -0700	[diff] [blame]	1993	_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	1994	}
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	1995	}
				1996	// Add a ret instruction even if sandboxing is enabled, because
				1997	// addEpilog explicitly looks for a ret instruction as a marker for
				1998	// where to insert the frame removal instructions.
				1999	// addEpilog is responsible for restoring the "lr" register as needed
				2000	// prior to this ret instruction.
				2001	_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
				2002	// Add a fake use of sp to make sure sp stays alive for the entire
				2003	// function. Otherwise post-call sp adjustments get dead-code
				2004	// eliminated. TODO: Are there more places where the fake use
				2005	// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
				2006	// have a ret instruction.
				2007	Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
				2008	Context.insert(InstFakeUse::create(Func, SP));
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2009	}
				2010
				2011	void TargetARM32::lowerSelect(const InstSelect *Inst) {
				2012	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2013	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2014	}
				2015
				2016	void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2017	Operand *Value = Inst->getData();
				2018	Operand *Addr = Inst->getAddr();
				2019	OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
				2020	Type Ty = NewAddr->getType();
				2021
				2022	if (Ty == IceType_i64) {
				2023	Variable *ValueHi = legalizeToVar(hiOperand(Value));
				2024	Variable *ValueLo = legalizeToVar(loOperand(Value));
				2025	_str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
				2026	_str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
				2027	} else if (isVectorType(Ty)) {
				2028	UnimplementedError(Func->getContext()->getFlags());
				2029	} else {
				2030	Variable *ValueR = legalizeToVar(Value);
				2031	_str(ValueR, NewAddr);
				2032	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2033	}
				2034
				2035	void TargetARM32::doAddressOptStore() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2036	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2037	}
				2038
				2039	void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
				2040	(void)Inst;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2041	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2042	}
				2043
				2044	void TargetARM32::lowerUnreachable(const InstUnreachable * /Inst/) {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2045	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2046	}
				2047
				2048	// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
				2049	// preserve integrity of liveness analysis. Undef values are also
				2050	// turned into zeroes, since loOperand() and hiOperand() don't expect
				2051	// Undef input.
				2052	void TargetARM32::prelowerPhis() {
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2053	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2054	}
				2055
				2056	// Lower the pre-ordered list of assignments into mov instructions.
				2057	// Also has to do some ad-hoc register allocation as necessary.
				2058	void TargetARM32::lowerPhiAssignments(CfgNode *Node,
				2059	const AssignList &Assignments) {
				2060	(void)Node;
				2061	(void)Assignments;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2062	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2063	}
				2064
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2065	Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
				2066	Variable *Reg = makeReg(Ty, RegNum);
				2067	UnimplementedError(Func->getContext()->getFlags());
				2068	return Reg;
				2069	}
				2070
				2071	// Helper for legalize() to emit the right code to lower an operand to a
				2072	// register of the appropriate type.
				2073	Variable TargetARM32::copyToReg(Operand Src, int32_t RegNum) {
				2074	Type Ty = Src->getType();
				2075	Variable *Reg = makeReg(Ty, RegNum);
				2076	if (isVectorType(Ty)) {
				2077	UnimplementedError(Func->getContext()->getFlags());
				2078	} else {
				2079	// Mov's Src operand can really only be the flexible second operand type
				2080	// or a register. Users should guarantee that.
				2081	_mov(Reg, Src);
				2082	}
				2083	return Reg;
				2084	}
				2085
				2086	Operand TargetARM32::legalize(Operand From, LegalMask Allowed,
				2087	int32_t RegNum) {
				2088	// Assert that a physical register is allowed. To date, all calls
				2089	// to legalize() allow a physical register. Legal_Flex converts
				2090	// registers to the right type OperandARM32FlexReg as needed.
				2091	assert(Allowed & Legal_Reg);
				2092	// Go through the various types of operands:
				2093	// OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
				2094	// Given the above assertion, if type of operand is not legal
				2095	// (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
				2096	// to a register.
				2097	if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
				2098	// Before doing anything with a Mem operand, we need to ensure
				2099	// that the Base and Index components are in physical registers.
				2100	Variable *Base = Mem->getBase();
				2101	Variable *Index = Mem->getIndex();
				2102	Variable *RegBase = nullptr;
				2103	Variable *RegIndex = nullptr;
				2104	if (Base) {
				2105	RegBase = legalizeToVar(Base);
				2106	}
				2107	if (Index) {
				2108	RegIndex = legalizeToVar(Index);
				2109	}
				2110	// Create a new operand if there was a change.
				2111	if (Base != RegBase \|\| Index != RegIndex) {
				2112	// There is only a reg +/- reg or reg + imm form.
				2113	// Figure out which to re-create.
				2114	if (Mem->isRegReg()) {
				2115	Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase, RegIndex,
				2116	Mem->getShiftOp(), Mem->getShiftAmt(),
				2117	Mem->getAddrMode());
				2118	} else {
				2119	Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase,
				2120	Mem->getOffset(), Mem->getAddrMode());
				2121	}
				2122	}
				2123	if (!(Allowed & Legal_Mem)) {
				2124	Type Ty = Mem->getType();
				2125	Variable *Reg = makeReg(Ty, RegNum);
				2126	_ldr(Reg, Mem);
				2127	From = Reg;
				2128	} else {
				2129	From = Mem;
				2130	}
				2131	return From;
				2132	}
				2133
				2134	if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
				2135	if (!(Allowed & Legal_Flex)) {
				2136	if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
				2137	if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
				2138	From = FlexReg->getReg();
				2139	// Fall through and let From be checked as a Variable below,
				2140	// where it may or may not need a register.
				2141	} else {
				2142	return copyToReg(Flex, RegNum);
				2143	}
				2144	} else {
				2145	return copyToReg(Flex, RegNum);
				2146	}
				2147	} else {
				2148	return From;
				2149	}
				2150	}
				2151
				2152	if (llvm::isa<Constant>(From)) {
				2153	if (llvm::isa<ConstantUndef>(From)) {
				2154	// Lower undefs to zero. Another option is to lower undefs to an
				2155	// uninitialized register; however, using an uninitialized register
				2156	// results in less predictable code.
				2157	if (isVectorType(From->getType()))
				2158	return makeVectorOfZeros(From->getType(), RegNum);
				2159	From = Ctx->getConstantZero(From->getType());
				2160	}
				2161	// There should be no constants of vector type (other than undef).
				2162	assert(!isVectorType(From->getType()));
				2163	bool CanBeFlex = Allowed & Legal_Flex;
				2164	if (auto C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
				2165	uint32_t RotateAmt;
				2166	uint32_t Immed_8;
				2167	uint32_t Value = static_cast<uint32_t>(C32->getValue());
				2168	// Check if the immediate will fit in a Flexible second operand,
				2169	// if a Flexible second operand is allowed. We need to know the exact
				2170	// value, so that rules out relocatable constants.
				2171	// Also try the inverse and use MVN if possible.
				2172	if (CanBeFlex &&
				2173	OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
				2174	return OperandARM32FlexImm::create(Func, From->getType(), Immed_8,
				2175	RotateAmt);
				2176	} else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
				2177	~Value, &RotateAmt, &Immed_8)) {
				2178	auto InvertedFlex = OperandARM32FlexImm::create(Func, From->getType(),
				2179	Immed_8, RotateAmt);
				2180	Type Ty = From->getType();
				2181	Variable *Reg = makeReg(Ty, RegNum);
				2182	_mvn(Reg, InvertedFlex);
				2183	return Reg;
				2184	} else {
				2185	// Do a movw/movt to a register.
				2186	Type Ty = From->getType();
				2187	Variable *Reg = makeReg(Ty, RegNum);
				2188	uint32_t UpperBits = (Value >> 16) & 0xFFFF;
				2189	_movw(Reg,
				2190	UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
				2191	if (UpperBits != 0) {
				2192	_movt(Reg, Ctx->getConstantInt32(UpperBits));
				2193	}
				2194	return Reg;
				2195	}
				2196	} else if (auto C = llvm::dyn_cast<ConstantRelocatable>(From)) {
				2197	Type Ty = From->getType();
				2198	Variable *Reg = makeReg(Ty, RegNum);
				2199	_movw(Reg, C);
				2200	_movt(Reg, C);
				2201	return Reg;
				2202	} else {
				2203	// Load floats/doubles from literal pool.
				2204	UnimplementedError(Func->getContext()->getFlags());
				2205	From = copyToReg(From, RegNum);
				2206	}
				2207	return From;
				2208	}
				2209
				2210	if (auto Var = llvm::dyn_cast<Variable>(From)) {
				2211	// Check if the variable is guaranteed a physical register. This
				2212	// can happen either when the variable is pre-colored or when it is
				2213	// assigned infinite weight.
				2214	bool MustHaveRegister = (Var->hasReg() \|\| Var->getWeight().isInf());
				2215	// We need a new physical register for the operand if:
				2216	// Mem is not allowed and Var isn't guaranteed a physical
				2217	// register, or
				2218	// RegNum is required and Var->getRegNum() doesn't match.
				2219	if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|
				2220	(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
				2221	From = copyToReg(From, RegNum);
				2222	}
				2223	return From;
				2224	}
				2225	llvm_unreachable("Unhandled operand kind in legalize()");
				2226
				2227	return From;
				2228	}
				2229
				2230	// Provide a trivial wrapper to legalize() for this common usage.
				2231	Variable TargetARM32::legalizeToVar(Operand From, int32_t RegNum) {
				2232	return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
				2233	}
				2234
Jan Voung	befd03a	2015-06-02 11:03:03 -0700	[diff] [blame]	2235	OperandARM32Mem TargetARM32::formMemoryOperand(Operand Operand, Type Ty) {
				2236	OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
				2237	// It may be the case that address mode optimization already creates
				2238	// an OperandARM32Mem, so in that case it wouldn't need another level
				2239	// of transformation.
				2240	if (Mem) {
				2241	return llvm::cast<OperandARM32Mem>(legalize(Mem));
				2242	}
				2243	// If we didn't do address mode optimization, then we only
				2244	// have a base/offset to work with. ARM always requires a base
				2245	// register, so just use that to hold the operand.
				2246	Variable *Base = legalizeToVar(Operand);
				2247	return OperandARM32Mem::create(
				2248	Func, Ty, Base,
				2249	llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
				2250	}
				2251
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2252	Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
				2253	// There aren't any 64-bit integer registers for ARM32.
				2254	assert(Type != IceType_i64);
				2255	Variable *Reg = Func->makeVariable(Type);
				2256	if (RegNum == Variable::NoRegister)
				2257	Reg->setWeightInfinite();
				2258	else
				2259	Reg->setRegNum(RegNum);
				2260	return Reg;
				2261	}
				2262
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2263	void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
				2264	assert(llvm::isPowerOf2_32(Align));
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2265	uint32_t RotateAmt;
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2266	uint32_t Immed_8;
				2267	Operand *Mask;
				2268	// Use AND or BIC to mask off the bits, depending on which immediate fits
				2269	// (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung	0fa6c5a	2015-06-01 11:04:04 -0700	[diff] [blame]	2270	// works better. Thus, this rounds down to the alignment.
Jan Voung	55500db	2015-05-26 14:25:40 -0700	[diff] [blame]	2271	if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
				2272	Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg \| Legal_Flex);
				2273	_bic(Reg, Reg, Mask);
				2274	} else {
				2275	Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg \| Legal_Flex);
				2276	_and(Reg, Reg, Mask);
				2277	}
				2278	}
				2279
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2280	void TargetARM32::postLower() {
				2281	if (Ctx->getFlags().getOptLevel() == Opt_m1)
				2282	return;
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2283	inferTwoAddress();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2284	}
				2285
				2286	void TargetARM32::makeRandomRegisterPermutation(
				2287	llvm::SmallVectorImpl<int32_t> &Permutation,
				2288	const llvm::SmallBitVector &ExcludeRegisters) const {
				2289	(void)Permutation;
				2290	(void)ExcludeRegisters;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2291	UnimplementedError(Func->getContext()->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2292	}
				2293
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2294	void TargetARM32::emit(const ConstantInteger32 *C) const {
				2295	if (!ALLOW_DUMP)
				2296	return;
				2297	Ostream &Str = Ctx->getStrEmit();
				2298	Str << getConstantPrefix() << C->getValue();
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2299	}
				2300
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2301	void TargetARM32::emit(const ConstantInteger64 *) const {
				2302	llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2303	}
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2304
				2305	void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2306	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2307	UnimplementedError(Ctx->getFlags());
				2308	}
				2309
				2310	void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voung	b3401d2	2015-05-18 09:38:21 -0700	[diff] [blame]	2311	(void)C;
Jan Voung	76bb0be	2015-05-14 09:26:19 -0700	[diff] [blame]	2312	UnimplementedError(Ctx->getFlags());
				2313	}
				2314
				2315	void TargetARM32::emit(const ConstantUndef *) const {
				2316	llvm::report_fatal_error("undef value encountered by emitter.");
				2317	}
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2318
				2319	TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
				2320	: TargetDataLowering(Ctx) {}
				2321
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2322	void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
				2323	const IceString &SectionSuffix) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2324	switch (Ctx->getFlags().getOutFileType()) {
				2325	case FT_Elf: {
				2326	ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2327	Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2328	} break;
				2329	case FT_Asm:
				2330	case FT_Iasm: {
				2331	const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
				2332	OstreamLocker L(Ctx);
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2333	for (const VariableDeclaration *Var : Vars) {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2334	if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto	8b1a705	2015-06-17 13:20:08 -0700	[diff] [blame]	2335	emitGlobal(*Var, SectionSuffix);
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2336	}
				2337	}
				2338	} break;
				2339	}
				2340	}
				2341
John Porto	0f86d03	2015-06-15 07:44:27 -0700	[diff] [blame]	2342	void TargetDataARM32::lowerConstants() {
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2343	if (Ctx->getFlags().getDisableTranslation())
				2344	return;
Jan Voung	b2d5084	2015-05-12 09:53:50 -0700	[diff] [blame]	2345	UnimplementedError(Ctx->getFlags());
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2346	}
				2347
Jan Voung	fb79284	2015-06-11 15:27:50 -0700	[diff] [blame]	2348	TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
				2349	: TargetHeaderLowering(Ctx) {}
				2350
				2351	void TargetHeaderARM32::lower() {
				2352	OstreamLocker L(Ctx);
				2353	Ostream &Str = Ctx->getStrEmit();
				2354	Str << ".syntax unified\n";
				2355	// Emit build attributes in format: .eabi_attribute TAG, VALUE.
				2356	// See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
				2357	// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
				2358	//
				2359	// Tag_conformance should be be emitted first in a file-scope
				2360	// sub-subsection of the first public subsection of the attributes.
				2361	Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
				2362	// Chromebooks are at least A15, but do A9 for higher compat.
				2363	Str << ".cpu cortex-a9\n"
				2364	<< ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
				2365	<< ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
				2366	Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
				2367	<< ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
				2368	// TODO(jvoung): check other CPU features like HW div.
				2369	Str << ".fpu neon\n"
				2370	<< ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
				2371	<< ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
				2372	<< ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
				2373	<< ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
				2374	<< ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
				2375	<< ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
				2376	<< ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
				2377	<< ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
				2378	<< ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
				2379	<< ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
				2380	<< ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
				2381	<< ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
				2382	// Technically R9 is used for TLS with Sandboxing, and we reserve it.
				2383	// However, for compatibility with current NaCl LLVM, don't claim that.
				2384	Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
				2385	}
				2386
Jan Voung	b36ad9b	2015-04-21 17:01:49 -0700	[diff] [blame]	2387	} // end of namespace Ice