blob: 423572c51319aae678cd43d0797318e78b67f904 [file] [log] [blame]
Jan Voungb36ad9b2015-04-21 17:01:49 -07001//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringARM32 class, which consists almost
12/// entirely of the lowering sequence for each high-level instruction.
13///
Jan Voungb36ad9b2015-04-21 17:01:49 -070014//===----------------------------------------------------------------------===//
15
John Porto67f8de92015-06-25 10:14:17 -070016#include "IceTargetLoweringARM32.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070017
18#include "IceCfg.h"
19#include "IceCfgNode.h"
20#include "IceClFlags.h"
21#include "IceDefs.h"
22#include "IceELFObjectWriter.h"
23#include "IceGlobalInits.h"
24#include "IceInstARM32.h"
25#include "IceLiveness.h"
26#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070027#include "IcePhiLoweringImpl.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070028#include "IceRegistersARM32.h"
29#include "IceTargetLoweringARM32.def"
Jan Voungb36ad9b2015-04-21 17:01:49 -070030#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070031#include "llvm/Support/MathExtras.h"
Jan Voungb36ad9b2015-04-21 17:01:49 -070032
33namespace Ice {
34
Jan Voungb2d50842015-05-12 09:53:50 -070035namespace {
Jan Voung3bfd99a2015-05-22 16:35:25 -070036
Jan Voungb2d50842015-05-12 09:53:50 -070037void UnimplementedError(const ClFlags &Flags) {
38 if (!Flags.getSkipUnimplemented()) {
39 // Use llvm_unreachable instead of report_fatal_error, which gives better
40 // stack traces.
41 llvm_unreachable("Not yet implemented");
42 abort();
43 }
44}
Jan Voungb3401d22015-05-18 09:38:21 -070045
Jan Voung3bfd99a2015-05-22 16:35:25 -070046// The following table summarizes the logic for lowering the icmp instruction
47// for i32 and narrower types. Each icmp condition has a clear mapping to an
48// ARM32 conditional move instruction.
49
50const struct TableIcmp32_ {
51 CondARM32::Cond Mapping;
52} TableIcmp32[] = {
53#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
54 { CondARM32::C_32 } \
55 ,
56 ICMPARM32_TABLE
57#undef X
58};
59const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
60
61// The following table summarizes the logic for lowering the icmp instruction
62// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
63// The operands may need to be swapped, and there is a slight difference
64// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
65const struct TableIcmp64_ {
66 bool IsSigned;
67 bool Swapped;
68 CondARM32::Cond C1, C2;
69} TableIcmp64[] = {
70#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
71 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
72 ,
73 ICMPARM32_TABLE
74#undef X
75};
76const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
77
78CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
79 size_t Index = static_cast<size_t>(Cond);
80 assert(Index < TableIcmp32Size);
81 return TableIcmp32[Index].Mapping;
82}
83
84// In some cases, there are x-macros tables for both high-level and
85// low-level instructions/operands that use the same enum key value.
86// The tables are kept separate to maintain a proper separation
87// between abstraction layers. There is a risk that the tables could
88// get out of sync if enum values are reordered or if entries are
89// added or deleted. The following dummy namespaces use
90// static_asserts to ensure everything is kept in sync.
91
92// Validate the enum values in ICMPARM32_TABLE.
93namespace dummy1 {
94// Define a temporary set of enum values based on low-level table
95// entries.
96enum _tmp_enum {
97#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
98 ICMPARM32_TABLE
99#undef X
100 _num
101};
102// Define a set of constants based on high-level table entries.
103#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
104ICEINSTICMP_TABLE
105#undef X
106// Define a set of constants based on low-level table entries, and
107// ensure the table entry keys are consistent.
108#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
109 static const int _table2_##val = _tmp_##val; \
110 static_assert( \
111 _table1_##val == _table2_##val, \
112 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
113ICMPARM32_TABLE
114#undef X
115// Repeat the static asserts with respect to the high-level table
116// entries in case the high-level table has extra entries.
117#define X(tag, str) \
118 static_assert( \
119 _table1_##tag == _table2_##tag, \
120 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
121ICEINSTICMP_TABLE
122#undef X
123} // end of namespace dummy1
124
Jan Voung55500db2015-05-26 14:25:40 -0700125// Stack alignment
126const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
127
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700128// Value is in bytes. Return Value adjusted to the next highest multiple
129// of the stack alignment.
130uint32_t applyStackAlignment(uint32_t Value) {
131 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
132}
133
Jan Voungb0a8c242015-06-18 15:00:14 -0700134// Value is in bytes. Return Value adjusted to the next highest multiple
135// of the stack alignment required for the given type.
136uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
137 // Use natural alignment, except that normally (non-NaCl) ARM only
138 // aligns vectors to 8 bytes.
139 // TODO(jvoung): Check this ...
140 size_t typeAlignInBytes = typeWidthInBytes(Ty);
141 if (isVectorType(Ty))
142 typeAlignInBytes = 8;
143 return Utils::applyAlignment(Value, typeAlignInBytes);
144}
145
Jan Voung6ec369e2015-06-30 11:03:15 -0700146// Conservatively check if at compile time we know that the operand is
147// definitely a non-zero integer.
148bool isGuaranteedNonzeroInt(const Operand *Op) {
149 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
150 return Const->getValue() != 0;
151 }
152 return false;
153}
154
Jan Voungb2d50842015-05-12 09:53:50 -0700155} // end of anonymous namespace
156
Jan Voung6ec369e2015-06-30 11:03:15 -0700157TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
Jan Voungd062f732015-06-15 17:17:31 -0700158 static_assert(
159 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
160 (TargetInstructionSet::ARM32InstructionSet_End -
161 TargetInstructionSet::ARM32InstructionSet_Begin),
162 "ARM32InstructionSet range different from TargetInstructionSet");
Jan Voung6ec369e2015-06-30 11:03:15 -0700163 if (Flags.getTargetInstructionSet() !=
Jan Voungd062f732015-06-15 17:17:31 -0700164 TargetInstructionSet::BaseInstructionSet) {
165 InstructionSet = static_cast<ARM32InstructionSet>(
Jan Voung6ec369e2015-06-30 11:03:15 -0700166 (Flags.getTargetInstructionSet() -
Jan Voungd062f732015-06-15 17:17:31 -0700167 TargetInstructionSet::ARM32InstructionSet_Begin) +
168 ARM32InstructionSet::Begin);
169 }
Jan Voung6ec369e2015-06-30 11:03:15 -0700170}
171
172TargetARM32::TargetARM32(Cfg *Func)
173 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700174 // TODO: Don't initialize IntegerRegisters and friends every time.
175 // Instead, initialize in some sort of static initializer for the
176 // class.
Jan Voung86ebec12015-08-09 07:58:35 -0700177 // Limit this size (or do all bitsets need to be the same width)???
Jan Voungb36ad9b2015-04-21 17:01:49 -0700178 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
Jan Voung86ebec12015-08-09 07:58:35 -0700179 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
180 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
Jan Voungb36ad9b2015-04-21 17:01:49 -0700181 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
182 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
183 ScratchRegs.resize(RegARM32::Reg_NUM);
184#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
John Porto5300bfe2015-09-08 09:03:22 -0700185 isFP32, isFP64, isVec128, alias_init) \
Jan Voungb36ad9b2015-04-21 17:01:49 -0700186 IntegerRegisters[RegARM32::val] = isInt; \
Jan Voung86ebec12015-08-09 07:58:35 -0700187 Float32Registers[RegARM32::val] = isFP32; \
188 Float64Registers[RegARM32::val] = isFP64; \
189 VectorRegisters[RegARM32::val] = isVec128; \
John Portobb0a5fe2015-09-04 11:23:41 -0700190 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
John Porto5300bfe2015-09-08 09:03:22 -0700191 for (SizeT RegAlias : alias_init) { \
192 assert(!RegisterAliases[RegARM32::val][RegAlias] && \
193 "Duplicate alias for " #val); \
194 RegisterAliases[RegARM32::val].set(RegAlias); \
195 } \
196 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
197 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
Jan Voungb36ad9b2015-04-21 17:01:49 -0700198 ScratchRegs[RegARM32::val] = scratch;
199 REGARM32_TABLE;
200#undef X
201 TypeToRegisterSet[IceType_void] = InvalidRegisters;
202 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
203 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
204 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
205 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
206 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
Jan Voung86ebec12015-08-09 07:58:35 -0700207 TypeToRegisterSet[IceType_f32] = Float32Registers;
208 TypeToRegisterSet[IceType_f64] = Float64Registers;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700209 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
210 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
211 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
212 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
213 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
214 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
215 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
216}
217
218void TargetARM32::translateO2() {
219 TimerMarker T(TimerStack::TT_O2, Func);
220
221 // TODO(stichnot): share passes with X86?
222 // https://code.google.com/p/nativeclient/issues/detail?id=4094
223
224 if (!Ctx->getFlags().getPhiEdgeSplit()) {
225 // Lower Phi instructions.
226 Func->placePhiLoads();
227 if (Func->hasError())
228 return;
229 Func->placePhiStores();
230 if (Func->hasError())
231 return;
232 Func->deletePhis();
233 if (Func->hasError())
234 return;
235 Func->dump("After Phi lowering");
236 }
237
238 // Address mode optimization.
239 Func->getVMetadata()->init(VMK_SingleDefs);
240 Func->doAddressOpt();
241
242 // Argument lowering
243 Func->doArgLowering();
244
245 // Target lowering. This requires liveness analysis for some parts
246 // of the lowering decisions, such as compare/branch fusing. If
247 // non-lightweight liveness analysis is used, the instructions need
248 // to be renumbered first. TODO: This renumbering should only be
249 // necessary if we're actually calculating live intervals, which we
250 // only do for register allocation.
251 Func->renumberInstructions();
252 if (Func->hasError())
253 return;
254
255 // TODO: It should be sufficient to use the fastest liveness
256 // calculation, i.e. livenessLightweight(). However, for some
257 // reason that slows down the rest of the translation. Investigate.
258 Func->liveness(Liveness_Basic);
259 if (Func->hasError())
260 return;
261 Func->dump("After ARM32 address mode opt");
262
263 Func->genCode();
264 if (Func->hasError())
265 return;
266 Func->dump("After ARM32 codegen");
267
268 // Register allocation. This requires instruction renumbering and
269 // full liveness analysis.
270 Func->renumberInstructions();
271 if (Func->hasError())
272 return;
273 Func->liveness(Liveness_Intervals);
274 if (Func->hasError())
275 return;
276 // Validate the live range computations. The expensive validation
277 // call is deliberately only made when assertions are enabled.
278 assert(Func->validateLiveness());
279 // The post-codegen dump is done here, after liveness analysis and
280 // associated cleanup, to make the dump cleaner and more useful.
281 Func->dump("After initial ARM32 codegen");
282 Func->getVMetadata()->init(VMK_All);
283 regAlloc(RAK_Global);
284 if (Func->hasError())
285 return;
286 Func->dump("After linear scan regalloc");
287
288 if (Ctx->getFlags().getPhiEdgeSplit()) {
289 Func->advancedPhiLowering();
290 Func->dump("After advanced Phi lowering");
291 }
292
293 // Stack frame mapping.
294 Func->genFrame();
295 if (Func->hasError())
296 return;
297 Func->dump("After stack frame mapping");
298
Jan Voung28068ad2015-07-31 12:58:46 -0700299 legalizeStackSlots();
300 if (Func->hasError())
301 return;
302 Func->dump("After legalizeStackSlots");
303
Jan Voungb36ad9b2015-04-21 17:01:49 -0700304 Func->contractEmptyNodes();
305 Func->reorderNodes();
306
307 // Branch optimization. This needs to be done just before code
308 // emission. In particular, no transformations that insert or
309 // reorder CfgNodes should be done after branch optimization. We go
310 // ahead and do it before nop insertion to reduce the amount of work
311 // needed for searching for opportunities.
312 Func->doBranchOpt();
313 Func->dump("After branch optimization");
314
315 // Nop insertion
316 if (Ctx->getFlags().shouldDoNopInsertion()) {
317 Func->doNopInsertion();
318 }
319}
320
321void TargetARM32::translateOm1() {
322 TimerMarker T(TimerStack::TT_Om1, Func);
323
324 // TODO: share passes with X86?
325
326 Func->placePhiLoads();
327 if (Func->hasError())
328 return;
329 Func->placePhiStores();
330 if (Func->hasError())
331 return;
332 Func->deletePhis();
333 if (Func->hasError())
334 return;
335 Func->dump("After Phi lowering");
336
337 Func->doArgLowering();
338
339 Func->genCode();
340 if (Func->hasError())
341 return;
342 Func->dump("After initial ARM32 codegen");
343
344 regAlloc(RAK_InfOnly);
345 if (Func->hasError())
346 return;
347 Func->dump("After regalloc of infinite-weight variables");
348
349 Func->genFrame();
350 if (Func->hasError())
351 return;
352 Func->dump("After stack frame mapping");
353
Jan Voung28068ad2015-07-31 12:58:46 -0700354 legalizeStackSlots();
355 if (Func->hasError())
356 return;
357 Func->dump("After legalizeStackSlots");
358
Jan Voungb36ad9b2015-04-21 17:01:49 -0700359 // Nop insertion
360 if (Ctx->getFlags().shouldDoNopInsertion()) {
361 Func->doNopInsertion();
362 }
363}
364
365bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
Jan Voung3bfd99a2015-05-22 16:35:25 -0700366 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
367 return Br->optimizeBranch(NextNode);
368 }
Jan Voungb2d50842015-05-12 09:53:50 -0700369 return false;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700370}
371
Jan Voungb36ad9b2015-04-21 17:01:49 -0700372IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
373 assert(RegNum < RegARM32::Reg_NUM);
374 (void)Ty;
Jan Voung0dab0322015-07-21 14:29:34 -0700375 static const char *RegNames[] = {
376#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
John Porto5300bfe2015-09-08 09:03:22 -0700377 isFP32, isFP64, isVec128, alias_init) \
Jan Voung0dab0322015-07-21 14:29:34 -0700378 name,
379 REGARM32_TABLE
380#undef X
381 };
382
Jan Voungb36ad9b2015-04-21 17:01:49 -0700383 return RegNames[RegNum];
384}
385
386Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
387 if (Ty == IceType_void)
388 Ty = IceType_i32;
389 if (PhysicalRegisters[Ty].empty())
390 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
391 assert(RegNum < PhysicalRegisters[Ty].size());
392 Variable *Reg = PhysicalRegisters[Ty][RegNum];
393 if (Reg == nullptr) {
394 Reg = Func->makeVariable(Ty);
395 Reg->setRegNum(RegNum);
396 PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voungb2d50842015-05-12 09:53:50 -0700397 // Specially mark SP and LR as an "argument" so that it is considered
Jan Voungb36ad9b2015-04-21 17:01:49 -0700398 // live upon function entry.
Jan Voungb2d50842015-05-12 09:53:50 -0700399 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700400 Func->addImplicitArg(Reg);
401 Reg->setIgnoreLiveness();
402 }
403 }
404 return Reg;
405}
406
Andrew Scull86df4e92015-07-30 13:54:44 -0700407void TargetARM32::emitJumpTable(const Cfg *Func,
408 const InstJumpTable *JumpTable) const {
409 (void)JumpTable;
410 UnimplementedError(Func->getContext()->getFlags());
411}
412
Jan Voungb36ad9b2015-04-21 17:01:49 -0700413void TargetARM32::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700414 if (!BuildDefs::dump())
415 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700416 Ostream &Str = Ctx->getStrEmit();
Jan Voungb2d50842015-05-12 09:53:50 -0700417 if (Var->hasReg()) {
418 Str << getRegName(Var->getRegNum(), Var->getType());
419 return;
420 }
Andrew Scull11c9a322015-08-28 14:24:14 -0700421 if (Var->mustHaveReg()) {
Jan Voungb2d50842015-05-12 09:53:50 -0700422 llvm::report_fatal_error(
423 "Infinite-weight Variable has no register assigned");
424 }
425 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700426 int32_t BaseRegNum = Var->getBaseRegNum();
427 if (BaseRegNum == Variable::NoRegister) {
428 BaseRegNum = getFrameOrStackReg();
429 if (!hasFramePointer())
430 Offset += getStackAdjustment();
431 }
432 if (!isLegalVariableStackOffset(Offset)) {
Jan Voungb2d50842015-05-12 09:53:50 -0700433 llvm::report_fatal_error("Illegal stack offset");
434 }
Jan Voung28068ad2015-07-31 12:58:46 -0700435 const Type FrameSPTy = stackSlotType();
436 Str << "[" << getRegName(BaseRegNum, FrameSPTy);
Jan Voungb3401d22015-05-18 09:38:21 -0700437 if (Offset != 0) {
438 Str << ", " << getConstantPrefix() << Offset;
439 }
440 Str << "]";
Jan Voungb36ad9b2015-04-21 17:01:49 -0700441}
442
Jan Voungb0a8c242015-06-18 15:00:14 -0700443bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
444 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
445 return false;
446 int32_t RegLo, RegHi;
447 // Always start i64 registers at an even register, so this may end
448 // up padding away a register.
Jan Voung86ebec12015-08-09 07:58:35 -0700449 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
Jan Voungb0a8c242015-06-18 15:00:14 -0700450 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
451 ++NumGPRRegsUsed;
452 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
453 ++NumGPRRegsUsed;
454 // If this bumps us past the boundary, don't allocate to a register
455 // and leave any previously speculatively consumed registers as consumed.
456 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
457 return false;
458 Regs->first = RegLo;
459 Regs->second = RegHi;
460 return true;
461}
462
463bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
464 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
465 return false;
466 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
467 ++NumGPRRegsUsed;
468 return true;
469}
470
Jan Voung86ebec12015-08-09 07:58:35 -0700471bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
472 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)
473 return false;
474 if (isVectorType(Ty)) {
475 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
John Porto5300bfe2015-09-08 09:03:22 -0700476 // Q registers are declared in reverse order, so
477 // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract
478 // NumFPRegUnits from Reg_q0. Same thing goes for D registers.
479 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
480 "ARM32 Q registers are possibly declared incorrectly.");
481 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
Jan Voung86ebec12015-08-09 07:58:35 -0700482 NumFPRegUnits += 4;
483 // If this bumps us past the boundary, don't allocate to a register
484 // and leave any previously speculatively consumed registers as consumed.
485 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
486 return false;
487 } else if (Ty == IceType_f64) {
John Porto5300bfe2015-09-08 09:03:22 -0700488 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
489 "ARM32 D registers are possibly declared incorrectly.");
Jan Voung86ebec12015-08-09 07:58:35 -0700490 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
John Porto5300bfe2015-09-08 09:03:22 -0700491 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
Jan Voung86ebec12015-08-09 07:58:35 -0700492 NumFPRegUnits += 2;
493 // If this bumps us past the boundary, don't allocate to a register
494 // and leave any previously speculatively consumed registers as consumed.
495 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
496 return false;
497 } else {
John Porto5300bfe2015-09-08 09:03:22 -0700498 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
499 "ARM32 S registers are possibly declared incorrectly.");
Jan Voung86ebec12015-08-09 07:58:35 -0700500 assert(Ty == IceType_f32);
501 *Reg = RegARM32::Reg_s0 + NumFPRegUnits;
502 ++NumFPRegUnits;
503 }
504 return true;
505}
506
Jan Voungb36ad9b2015-04-21 17:01:49 -0700507void TargetARM32::lowerArguments() {
Jan Voungb3401d22015-05-18 09:38:21 -0700508 VarList &Args = Func->getArgs();
Jan Voungb0a8c242015-06-18 15:00:14 -0700509 TargetARM32::CallingConv CC;
Jan Voungb3401d22015-05-18 09:38:21 -0700510
511 // For each register argument, replace Arg in the argument list with the
512 // home register. Then generate an instruction in the prolog to copy the
513 // home register to the assigned location of Arg.
514 Context.init(Func->getEntryNode());
515 Context.setInsertPoint(Context.getCur());
516
517 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
518 Variable *Arg = Args[I];
519 Type Ty = Arg->getType();
Jan Voung86ebec12015-08-09 07:58:35 -0700520 if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -0700521 std::pair<int32_t, int32_t> RegPair;
522 if (!CC.I64InRegs(&RegPair))
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700523 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700524 Variable *RegisterArg = Func->makeVariable(Ty);
525 Variable *RegisterLo = Func->makeVariable(IceType_i32);
526 Variable *RegisterHi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700527 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700528 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
529 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
530 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
531 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700532 RegisterLo->setRegNum(RegPair.first);
Jan Voungb3401d22015-05-18 09:38:21 -0700533 RegisterLo->setIsArg();
Jan Voungb0a8c242015-06-18 15:00:14 -0700534 RegisterHi->setRegNum(RegPair.second);
Jan Voungb3401d22015-05-18 09:38:21 -0700535 RegisterHi->setIsArg();
536 RegisterArg->setLoHi(RegisterLo, RegisterHi);
537 RegisterArg->setIsArg();
538 Arg->setIsArg(false);
539
540 Args[I] = RegisterArg;
541 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
542 continue;
543 } else {
Jan Voungb0a8c242015-06-18 15:00:14 -0700544 int32_t RegNum;
Jan Voung86ebec12015-08-09 07:58:35 -0700545 if (isVectorType(Ty) || isFloatingType(Ty)) {
546 if (!CC.FPInReg(Ty, &RegNum))
547 continue;
548 } else {
549 assert(Ty == IceType_i32);
550 if (!CC.I32InReg(&RegNum))
551 continue;
552 }
Jan Voungb3401d22015-05-18 09:38:21 -0700553 Variable *RegisterArg = Func->makeVariable(Ty);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700554 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -0700555 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
556 }
557 RegisterArg->setRegNum(RegNum);
558 RegisterArg->setIsArg();
559 Arg->setIsArg(false);
560
561 Args[I] = RegisterArg;
562 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
Jan Voung86ebec12015-08-09 07:58:35 -0700563 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700564 }
565 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700566}
567
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700568// Helper function for addProlog().
569//
570// This assumes Arg is an argument passed on the stack. This sets the
571// frame offset for Arg and updates InArgsSizeBytes according to Arg's
572// width. For an I64 arg that has been split into Lo and Hi components,
573// it calls itself recursively on the components, taking care to handle
574// Lo first because of the little-endian architecture. Lastly, this
575// function generates an instruction to copy Arg into its assigned
576// register if applicable.
577void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
578 size_t BasicFrameOffset,
579 size_t &InArgsSizeBytes) {
580 Variable *Lo = Arg->getLo();
581 Variable *Hi = Arg->getHi();
582 Type Ty = Arg->getType();
583 if (Lo && Hi && Ty == IceType_i64) {
584 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
585 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
586 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
587 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
588 return;
589 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700590 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700591 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
592 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
593 // If the argument variable has been assigned a register, we need to load
594 // the value from the stack slot.
595 if (Arg->hasReg()) {
596 assert(Ty != IceType_i64);
597 OperandARM32Mem *Mem = OperandARM32Mem::create(
598 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
599 Ctx->getConstantInt32(Arg->getStackOffset())));
600 if (isVectorType(Arg->getType())) {
Jan Voung86ebec12015-08-09 07:58:35 -0700601 // Use vld1.$elem or something?
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700602 UnimplementedError(Func->getContext()->getFlags());
Jan Voung86ebec12015-08-09 07:58:35 -0700603 } else if (isFloatingType(Arg->getType())) {
604 _vldr(Arg, Mem);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700605 } else {
606 _ldr(Arg, Mem);
607 }
608 // This argument-copying instruction uses an explicit
609 // OperandARM32Mem operand instead of a Variable, so its
610 // fill-from-stack operation has to be tracked separately for
611 // statistics.
612 Ctx->statsUpdateFills();
613 }
614}
615
Jan Voungb36ad9b2015-04-21 17:01:49 -0700616Type TargetARM32::stackSlotType() { return IceType_i32; }
617
618void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700619 // Stack frame layout:
620 //
621 // +------------------------+
622 // | 1. preserved registers |
623 // +------------------------+
624 // | 2. padding |
Jan Voung28068ad2015-07-31 12:58:46 -0700625 // +------------------------+ <--- FramePointer (if used)
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700626 // | 3. global spill area |
627 // +------------------------+
628 // | 4. padding |
629 // +------------------------+
630 // | 5. local spill area |
631 // +------------------------+
632 // | 6. padding |
633 // +------------------------+
634 // | 7. allocas |
Jan Voung28068ad2015-07-31 12:58:46 -0700635 // +------------------------+ <--- StackPointer
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700636 //
637 // The following variables record the size in bytes of the given areas:
638 // * PreservedRegsSizeBytes: area 1
639 // * SpillAreaPaddingBytes: area 2
640 // * GlobalsSize: area 3
641 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
642 // * LocalsSpillAreaSize: area 5
643 // * SpillAreaSizeBytes: areas 2 - 6
644 // Determine stack frame offsets for each Variable without a
645 // register assignment. This can be done as one variable per stack
646 // slot. Or, do coalescing by running the register allocator again
647 // with an infinite set of registers (as a side effect, this gives
648 // variables a second chance at physical register assignment).
649 //
650 // A middle ground approach is to leverage sparsity and allocate one
651 // block of space on the frame for globals (variables with
652 // multi-block lifetime), and one block to share for locals
653 // (single-block lifetime).
654
655 Context.init(Node);
656 Context.setInsertPoint(Context.getCur());
657
658 llvm::SmallBitVector CalleeSaves =
659 getRegisterSet(RegSet_CalleeSave, RegSet_None);
660 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
661 VarList SortedSpilledVariables;
662 size_t GlobalsSize = 0;
663 // If there is a separate locals area, this represents that area.
664 // Otherwise it counts any variable not counted by GlobalsSize.
665 SpillAreaSizeBytes = 0;
666 // If there is a separate locals area, this specifies the alignment
667 // for it.
668 uint32_t LocalsSlotsAlignmentBytes = 0;
669 // The entire spill locations area gets aligned to largest natural
670 // alignment of the variables that have a spill slot.
671 uint32_t SpillAreaAlignmentBytes = 0;
672 // For now, we don't have target-specific variables that need special
673 // treatment (no stack-slot-linked SpillVariable type).
674 std::function<bool(Variable *)> TargetVarHook =
675 [](Variable *) { return false; };
676
677 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
678 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
679 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
680 &LocalsSlotsAlignmentBytes, TargetVarHook);
681 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
682 SpillAreaSizeBytes += GlobalsSize;
683
684 // Add push instructions for preserved registers.
685 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
686 // Unlike x86, ARM also has callee-saved float/vector registers.
687 // The "vpush" instruction can handle a whole list of float/vector
688 // registers, but it only handles contiguous sequences of registers
689 // by specifying the start and the length.
690 VarList GPRsToPreserve;
691 GPRsToPreserve.reserve(CalleeSaves.size());
692 uint32_t NumCallee = 0;
693 size_t PreservedRegsSizeBytes = 0;
694 // Consider FP and LR as callee-save / used as needed.
695 if (UsesFramePointer) {
696 CalleeSaves[RegARM32::Reg_fp] = true;
697 assert(RegsUsed[RegARM32::Reg_fp] == false);
698 RegsUsed[RegARM32::Reg_fp] = true;
699 }
700 if (!MaybeLeafFunc) {
701 CalleeSaves[RegARM32::Reg_lr] = true;
702 RegsUsed[RegARM32::Reg_lr] = true;
703 }
704 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
705 if (CalleeSaves[i] && RegsUsed[i]) {
706 // TODO(jvoung): do separate vpush for each floating point
707 // register segment and += 4, or 8 depending on type.
708 ++NumCallee;
709 PreservedRegsSizeBytes += 4;
710 GPRsToPreserve.push_back(getPhysicalRegister(i));
711 }
712 }
713 Ctx->statsUpdateRegistersSaved(NumCallee);
714 if (!GPRsToPreserve.empty())
715 _push(GPRsToPreserve);
716
717 // Generate "mov FP, SP" if needed.
718 if (UsesFramePointer) {
719 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
720 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
721 _mov(FP, SP);
722 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
723 Context.insert(InstFakeUse::create(Func, FP));
724 }
725
726 // Align the variables area. SpillAreaPaddingBytes is the size of
727 // the region after the preserved registers and before the spill areas.
728 // LocalsSlotsPaddingBytes is the amount of padding between the globals
729 // and locals area if they are separate.
730 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
731 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
732 uint32_t SpillAreaPaddingBytes = 0;
733 uint32_t LocalsSlotsPaddingBytes = 0;
734 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
735 GlobalsSize, LocalsSlotsAlignmentBytes,
736 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
737 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
738 uint32_t GlobalsAndSubsequentPaddingSize =
739 GlobalsSize + LocalsSlotsPaddingBytes;
740
741 // Align SP if necessary.
742 if (NeedsStackAlignment) {
743 uint32_t StackOffset = PreservedRegsSizeBytes;
744 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
745 SpillAreaSizeBytes = StackSize - StackOffset;
746 }
747
748 // Generate "sub sp, SpillAreaSizeBytes"
749 if (SpillAreaSizeBytes) {
Jan Voung28068ad2015-07-31 12:58:46 -0700750 // Use the scratch register if needed to legalize the immediate.
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700751 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
Jan Voung28068ad2015-07-31 12:58:46 -0700752 Legal_Reg | Legal_Flex, getReservedTmpReg());
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700753 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
754 _sub(SP, SP, SubAmount);
755 }
756 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
757
758 resetStackAdjustment();
759
760 // Fill in stack offsets for stack args, and copy args into registers
761 // for those that were register-allocated. Args are pushed right to
762 // left, so Arg[0] is closest to the stack/frame pointer.
763 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
764 size_t BasicFrameOffset = PreservedRegsSizeBytes;
765 if (!UsesFramePointer)
766 BasicFrameOffset += SpillAreaSizeBytes;
767
768 const VarList &Args = Func->getArgs();
769 size_t InArgsSizeBytes = 0;
Jan Voungb0a8c242015-06-18 15:00:14 -0700770 TargetARM32::CallingConv CC;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700771 for (Variable *Arg : Args) {
772 Type Ty = Arg->getType();
Jan Voungb0a8c242015-06-18 15:00:14 -0700773 bool InRegs = false;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700774 // Skip arguments passed in registers.
Jan Voung86ebec12015-08-09 07:58:35 -0700775 if (isVectorType(Ty) || isFloatingType(Ty)) {
776 int32_t DummyReg;
777 InRegs = CC.FPInReg(Ty, &DummyReg);
Jan Voungb0a8c242015-06-18 15:00:14 -0700778 } else if (Ty == IceType_i64) {
779 std::pair<int32_t, int32_t> DummyRegs;
780 InRegs = CC.I64InRegs(&DummyRegs);
781 } else {
782 assert(Ty == IceType_i32);
783 int32_t DummyReg;
784 InRegs = CC.I32InReg(&DummyReg);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700785 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700786 if (!InRegs)
787 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700788 }
789
790 // Fill in stack offsets for locals.
791 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
792 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
793 UsesFramePointer);
794 this->HasComputedFrame = true;
795
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700796 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700797 OstreamLocker L(Func->getContext());
798 Ostream &Str = Func->getContext()->getStrDump();
799
800 Str << "Stack layout:\n";
801 uint32_t SPAdjustmentPaddingSize =
802 SpillAreaSizeBytes - LocalsSpillAreaSize -
803 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
804 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
805 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
806 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
807 << " globals spill area = " << GlobalsSize << " bytes\n"
808 << " globals-locals spill areas intermediate padding = "
809 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
810 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
811 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
812
813 Str << "Stack details:\n"
814 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
815 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
816 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
817 << " bytes\n"
818 << " is FP based = " << UsesFramePointer << "\n";
819 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700820}
821
822void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700823 InstList &Insts = Node->getInsts();
824 InstList::reverse_iterator RI, E;
825 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
826 if (llvm::isa<InstARM32Ret>(*RI))
827 break;
828 }
829 if (RI == E)
830 return;
831
832 // Convert the reverse_iterator position into its corresponding
833 // (forward) iterator position.
834 InstList::iterator InsertPoint = RI.base();
835 --InsertPoint;
836 Context.init(Node);
837 Context.setInsertPoint(InsertPoint);
838
839 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
840 if (UsesFramePointer) {
841 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
842 // For late-stage liveness analysis (e.g. asm-verbose mode),
843 // adding a fake use of SP before the assignment of SP=FP keeps
844 // previous SP adjustments from being dead-code eliminated.
845 Context.insert(InstFakeUse::create(Func, SP));
846 _mov(SP, FP);
847 } else {
848 // add SP, SpillAreaSizeBytes
849 if (SpillAreaSizeBytes) {
Jan Voung28068ad2015-07-31 12:58:46 -0700850 // Use the scratch register if needed to legalize the immediate.
851 Operand *AddAmount =
852 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
853 Legal_Reg | Legal_Flex, getReservedTmpReg());
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700854 _add(SP, SP, AddAmount);
855 }
856 }
857
858 // Add pop instructions for preserved registers.
859 llvm::SmallBitVector CalleeSaves =
860 getRegisterSet(RegSet_CalleeSave, RegSet_None);
861 VarList GPRsToRestore;
862 GPRsToRestore.reserve(CalleeSaves.size());
863 // Consider FP and LR as callee-save / used as needed.
864 if (UsesFramePointer) {
865 CalleeSaves[RegARM32::Reg_fp] = true;
866 }
867 if (!MaybeLeafFunc) {
868 CalleeSaves[RegARM32::Reg_lr] = true;
869 }
870 // Pop registers in ascending order just like push
871 // (instead of in reverse order).
872 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
873 if (CalleeSaves[i] && RegsUsed[i]) {
874 GPRsToRestore.push_back(getPhysicalRegister(i));
875 }
876 }
877 if (!GPRsToRestore.empty())
878 _pop(GPRsToRestore);
879
880 if (!Ctx->getFlags().getUseSandboxing())
881 return;
882
883 // Change the original ret instruction into a sandboxed return sequence.
884 // bundle_lock
885 // bic lr, #0xc000000f
886 // bx lr
887 // bundle_unlock
888 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
889 // restrict to the lower 1GB as well.
890 Operand *RetMask =
891 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
892 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
893 Variable *RetValue = nullptr;
894 if (RI->getSrcSize())
895 RetValue = llvm::cast<Variable>(RI->getSrc(0));
896 _bundle_lock();
897 _bic(LR, LR, RetMask);
898 _ret(LR, RetValue);
899 _bundle_unlock();
900 RI->setDeleted();
Jan Voungb36ad9b2015-04-21 17:01:49 -0700901}
902
Jan Voung28068ad2015-07-31 12:58:46 -0700903bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
904 constexpr bool SignExt = false;
Jan Voung86ebec12015-08-09 07:58:35 -0700905 // TODO(jvoung): vldr of FP stack slots has a different limit from the
906 // plain stackSlotType().
Jan Voung28068ad2015-07-31 12:58:46 -0700907 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
908}
909
910StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
911 Variable *OrigBaseReg) {
912 int32_t Offset = Var->getStackOffset();
913 // Legalize will likely need a movw/movt combination, but if the top
914 // bits are all 0 from negating the offset and subtracting, we could
915 // use that instead.
916 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
917 if (ShouldSub)
918 Offset = -Offset;
919 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
920 Legal_Reg | Legal_Flex, getReservedTmpReg());
921 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
922 if (ShouldSub)
923 _sub(ScratchReg, OrigBaseReg, OffsetVal);
924 else
925 _add(ScratchReg, OrigBaseReg, OffsetVal);
926 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType());
Andrew Scull11c9a322015-08-28 14:24:14 -0700927 NewVar->setMustNotHaveReg();
Jan Voung28068ad2015-07-31 12:58:46 -0700928 NewVar->setBaseRegNum(ScratchReg->getRegNum());
929 constexpr int32_t NewOffset = 0;
930 NewVar->setStackOffset(NewOffset);
931 return NewVar;
932}
933
934void TargetARM32::legalizeStackSlots() {
935 // If a stack variable's frame offset doesn't fit, convert from:
936 // ldr X, OFF[SP]
937 // to:
938 // movw/movt TMP, OFF_PART
939 // add TMP, TMP, SP
940 // ldr X, OFF_MORE[TMP]
941 //
942 // This is safe because we have reserved TMP, and add for ARM does not
943 // clobber the flags register.
944 Func->dump("Before legalizeStackSlots");
945 assert(hasComputedFrame());
946 // Early exit, if SpillAreaSizeBytes is really small.
947 if (isLegalVariableStackOffset(SpillAreaSizeBytes))
948 return;
949 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
950 int32_t StackAdjust = 0;
951 // Do a fairly naive greedy clustering for now. Pick the first stack slot
952 // that's out of bounds and make a new base reg using the architecture's temp
953 // register. If that works for the next slot, then great. Otherwise, create
954 // a new base register, clobbering the previous base register. Never share a
955 // base reg across different basic blocks. This isn't ideal if local and
956 // multi-block variables are far apart and their references are interspersed.
957 // It may help to be more coordinated about assign stack slot numbers
958 // and may help to assign smaller offsets to higher-weight variables
959 // so that they don't depend on this legalization.
960 for (CfgNode *Node : Func->getNodes()) {
961 Context.init(Node);
962 StackVariable *NewBaseReg = nullptr;
963 int32_t NewBaseOffset = 0;
964 while (!Context.atEnd()) {
965 PostIncrLoweringContext PostIncrement(Context);
966 Inst *CurInstr = Context.getCur();
967 Variable *Dest = CurInstr->getDest();
968 // Check if the previous NewBaseReg is clobbered, and reset if needed.
969 if ((Dest && NewBaseReg && Dest->hasReg() &&
970 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
971 llvm::isa<InstFakeKill>(CurInstr)) {
972 NewBaseReg = nullptr;
973 NewBaseOffset = 0;
974 }
975 // The stack adjustment only matters if we are using SP instead of FP.
976 if (!hasFramePointer()) {
977 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
978 StackAdjust += AdjInst->getAmount();
979 NewBaseOffset += AdjInst->getAmount();
980 continue;
981 }
982 if (llvm::isa<InstARM32Call>(CurInstr)) {
983 NewBaseOffset -= StackAdjust;
984 StackAdjust = 0;
985 continue;
986 }
987 }
988 // For now, only Mov instructions can have stack variables. We need to
989 // know the type of instruction because we currently create a fresh one
990 // to replace Dest/Source, rather than mutate in place.
991 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
992 if (!MovInst) {
993 continue;
994 }
995 if (!Dest->hasReg()) {
996 int32_t Offset = Dest->getStackOffset();
997 Offset += StackAdjust;
998 if (!isLegalVariableStackOffset(Offset)) {
999 if (NewBaseReg) {
1000 int32_t OffsetDiff = Offset - NewBaseOffset;
1001 if (isLegalVariableStackOffset(OffsetDiff)) {
1002 StackVariable *NewDest =
1003 Func->makeVariable<StackVariable>(stackSlotType());
Andrew Scull11c9a322015-08-28 14:24:14 -07001004 NewDest->setMustNotHaveReg();
Jan Voung28068ad2015-07-31 12:58:46 -07001005 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum());
1006 NewDest->setStackOffset(OffsetDiff);
1007 Variable *NewDestVar = NewDest;
1008 _mov(NewDestVar, MovInst->getSrc(0));
1009 MovInst->setDeleted();
1010 continue;
1011 }
1012 }
1013 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg);
1014 assert(LegalDest != Dest);
1015 Variable *LegalDestVar = LegalDest;
1016 _mov(LegalDestVar, MovInst->getSrc(0));
1017 MovInst->setDeleted();
1018 NewBaseReg = LegalDest;
1019 NewBaseOffset = Offset;
1020 continue;
1021 }
1022 }
1023 assert(MovInst->getSrcSize() == 1);
1024 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0));
1025 if (Var && !Var->hasReg()) {
1026 int32_t Offset = Var->getStackOffset();
1027 Offset += StackAdjust;
1028 if (!isLegalVariableStackOffset(Offset)) {
1029 if (NewBaseReg) {
1030 int32_t OffsetDiff = Offset - NewBaseOffset;
1031 if (isLegalVariableStackOffset(OffsetDiff)) {
1032 StackVariable *NewVar =
1033 Func->makeVariable<StackVariable>(stackSlotType());
Andrew Scull11c9a322015-08-28 14:24:14 -07001034 NewVar->setMustNotHaveReg();
Jan Voung28068ad2015-07-31 12:58:46 -07001035 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum());
1036 NewVar->setStackOffset(OffsetDiff);
1037 _mov(Dest, NewVar);
1038 MovInst->setDeleted();
1039 continue;
1040 }
1041 }
1042 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg);
1043 assert(LegalVar != Var);
1044 _mov(Dest, LegalVar);
1045 MovInst->setDeleted();
1046 NewBaseReg = LegalVar;
1047 NewBaseOffset = Offset;
1048 continue;
1049 }
1050 }
1051 }
1052 }
1053}
1054
Jan Voungb3401d22015-05-18 09:38:21 -07001055void TargetARM32::split64(Variable *Var) {
1056 assert(Var->getType() == IceType_i64);
1057 Variable *Lo = Var->getLo();
1058 Variable *Hi = Var->getHi();
1059 if (Lo) {
1060 assert(Hi);
1061 return;
1062 }
1063 assert(Hi == nullptr);
1064 Lo = Func->makeVariable(IceType_i32);
1065 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001066 if (BuildDefs::dump()) {
Jan Voungb3401d22015-05-18 09:38:21 -07001067 Lo->setName(Func, Var->getName(Func) + "__lo");
1068 Hi->setName(Func, Var->getName(Func) + "__hi");
1069 }
1070 Var->setLoHi(Lo, Hi);
1071 if (Var->getIsArg()) {
1072 Lo->setIsArg();
1073 Hi->setIsArg();
1074 }
1075}
1076
1077Operand *TargetARM32::loOperand(Operand *Operand) {
1078 assert(Operand->getType() == IceType_i64);
1079 if (Operand->getType() != IceType_i64)
1080 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -07001081 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001082 split64(Var);
1083 return Var->getLo();
1084 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001085 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001086 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1087 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001088 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001089 // Conservatively disallow memory operands with side-effects (pre/post
1090 // increment) in case of duplication.
1091 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1092 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1093 if (Mem->isRegReg()) {
1094 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1095 Mem->getIndex(), Mem->getShiftOp(),
1096 Mem->getShiftAmt(), Mem->getAddrMode());
1097 } else {
1098 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1099 Mem->getOffset(), Mem->getAddrMode());
1100 }
1101 }
1102 llvm_unreachable("Unsupported operand type");
1103 return nullptr;
1104}
1105
1106Operand *TargetARM32::hiOperand(Operand *Operand) {
1107 assert(Operand->getType() == IceType_i64);
1108 if (Operand->getType() != IceType_i64)
1109 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -07001110 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001111 split64(Var);
1112 return Var->getHi();
1113 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001114 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001115 return Ctx->getConstantInt32(
1116 static_cast<uint32_t>(Const->getValue() >> 32));
1117 }
Jan Voungfbdd2442015-07-15 12:36:20 -07001118 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
Jan Voungb3401d22015-05-18 09:38:21 -07001119 // Conservatively disallow memory operands with side-effects
1120 // in case of duplication.
1121 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1122 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1123 const Type SplitType = IceType_i32;
1124 if (Mem->isRegReg()) {
1125 // We have to make a temp variable T, and add 4 to either Base or Index.
1126 // The Index may be shifted, so adding 4 can mean something else.
1127 // Thus, prefer T := Base + 4, and use T as the new Base.
1128 Variable *Base = Mem->getBase();
1129 Constant *Four = Ctx->getConstantInt32(4);
1130 Variable *NewBase = Func->makeVariable(Base->getType());
1131 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1132 Base, Four));
1133 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
1134 Mem->getShiftOp(), Mem->getShiftAmt(),
1135 Mem->getAddrMode());
1136 } else {
1137 Variable *Base = Mem->getBase();
1138 ConstantInteger32 *Offset = Mem->getOffset();
1139 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1140 int32_t NextOffsetVal = Offset->getValue() + 4;
1141 const bool SignExt = false;
1142 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
1143 // We have to make a temp variable and add 4 to either Base or Offset.
1144 // If we add 4 to Offset, this will convert a non-RegReg addressing
1145 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1146 // RegReg addressing modes, prefer adding to base and replacing instead.
1147 // Thus we leave the old offset alone.
1148 Constant *Four = Ctx->getConstantInt32(4);
1149 Variable *NewBase = Func->makeVariable(Base->getType());
1150 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1151 NewBase, Base, Four));
1152 Base = NewBase;
1153 } else {
1154 Offset =
1155 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1156 }
1157 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
1158 Mem->getAddrMode());
1159 }
1160 }
1161 llvm_unreachable("Unsupported operand type");
1162 return nullptr;
1163}
1164
Jan Voungb36ad9b2015-04-21 17:01:49 -07001165llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
1166 RegSetMask Exclude) const {
1167 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
1168
1169#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
John Porto5300bfe2015-09-08 09:03:22 -07001170 isFP32, isFP64, isVec128, alias_init) \
Jan Voungb36ad9b2015-04-21 17:01:49 -07001171 if (scratch && (Include & RegSet_CallerSave)) \
1172 Registers[RegARM32::val] = true; \
1173 if (preserved && (Include & RegSet_CalleeSave)) \
1174 Registers[RegARM32::val] = true; \
1175 if (stackptr && (Include & RegSet_StackPointer)) \
1176 Registers[RegARM32::val] = true; \
1177 if (frameptr && (Include & RegSet_FramePointer)) \
1178 Registers[RegARM32::val] = true; \
1179 if (scratch && (Exclude & RegSet_CallerSave)) \
1180 Registers[RegARM32::val] = false; \
1181 if (preserved && (Exclude & RegSet_CalleeSave)) \
1182 Registers[RegARM32::val] = false; \
1183 if (stackptr && (Exclude & RegSet_StackPointer)) \
1184 Registers[RegARM32::val] = false; \
1185 if (frameptr && (Exclude & RegSet_FramePointer)) \
1186 Registers[RegARM32::val] = false;
1187
1188 REGARM32_TABLE
1189
1190#undef X
1191
1192 return Registers;
1193}
1194
1195void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
1196 UsesFramePointer = true;
1197 // Conservatively require the stack to be aligned. Some stack
1198 // adjustment operations implemented below assume that the stack is
1199 // aligned before the alloca. All the alloca code ensures that the
1200 // stack alignment is preserved after the alloca. The stack alignment
1201 // restriction can be relaxed in some cases.
1202 NeedsStackAlignment = true;
Jan Voung55500db2015-05-26 14:25:40 -07001203
1204 // TODO(stichnot): minimize the number of adjustments of SP, etc.
1205 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1206 Variable *Dest = Inst->getDest();
1207 uint32_t AlignmentParam = Inst->getAlignInBytes();
1208 // For default align=0, set it to the real value 1, to avoid any
1209 // bit-manipulation problems below.
1210 AlignmentParam = std::max(AlignmentParam, 1u);
1211
1212 // LLVM enforces power of 2 alignment.
1213 assert(llvm::isPowerOf2_32(AlignmentParam));
1214 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1215
1216 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1217 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1218 alignRegisterPow2(SP, Alignment);
1219 }
1220 Operand *TotalSize = Inst->getSizeInBytes();
1221 if (const auto *ConstantTotalSize =
1222 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1223 uint32_t Value = ConstantTotalSize->getValue();
1224 Value = Utils::applyAlignment(Value, Alignment);
1225 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1226 _sub(SP, SP, SubAmount);
1227 } else {
1228 // Non-constant sizes need to be adjusted to the next highest
1229 // multiple of the required alignment at runtime.
Jan Voungfbdd2442015-07-15 12:36:20 -07001230 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
Jan Voung55500db2015-05-26 14:25:40 -07001231 Variable *T = makeReg(IceType_i32);
1232 _mov(T, TotalSize);
1233 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1234 _add(T, T, AddAmount);
1235 alignRegisterPow2(T, Alignment);
1236 _sub(SP, SP, T);
1237 }
1238 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001239}
1240
Jan Voung6ec369e2015-06-30 11:03:15 -07001241void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1242 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1243 return;
Andrew Scull97f460d2015-07-21 10:07:42 -07001244 Variable *SrcLoReg = legalizeToReg(SrcLo);
Jan Voung6ec369e2015-06-30 11:03:15 -07001245 switch (Ty) {
1246 default:
1247 llvm_unreachable("Unexpected type");
1248 case IceType_i8: {
1249 Operand *Mask =
1250 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1251 _tst(SrcLoReg, Mask);
1252 break;
1253 }
1254 case IceType_i16: {
1255 Operand *Mask =
1256 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
1257 _tst(SrcLoReg, Mask);
1258 break;
1259 }
1260 case IceType_i32: {
1261 _tst(SrcLoReg, SrcLoReg);
1262 break;
1263 }
1264 case IceType_i64: {
1265 Variable *ScratchReg = makeReg(IceType_i32);
1266 _orrs(ScratchReg, SrcLoReg, SrcHi);
1267 // ScratchReg isn't going to be used, but we need the
1268 // side-effect of setting flags from this operation.
1269 Context.insert(InstFakeUse::create(Func, ScratchReg));
1270 }
1271 }
1272 InstARM32Label *Label = InstARM32Label::create(Func, this);
1273 _br(Label, CondARM32::NE);
1274 _trap();
1275 Context.insert(Label);
1276}
1277
1278void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1279 Operand *Src1, ExtInstr ExtFunc,
1280 DivInstr DivFunc, const char *DivHelperName,
1281 bool IsRemainder) {
1282 div0Check(Dest->getType(), Src1, nullptr);
Andrew Scull97f460d2015-07-21 10:07:42 -07001283 Variable *Src1R = legalizeToReg(Src1);
Jan Voung6ec369e2015-06-30 11:03:15 -07001284 Variable *T0R = Src0R;
1285 Variable *T1R = Src1R;
1286 if (Dest->getType() != IceType_i32) {
1287 T0R = makeReg(IceType_i32);
1288 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
1289 T1R = makeReg(IceType_i32);
1290 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
1291 }
1292 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
1293 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
1294 if (IsRemainder) {
1295 Variable *T2 = makeReg(IceType_i32);
1296 _mls(T2, T, T1R, T0R);
1297 T = T2;
1298 }
1299 _mov(Dest, T);
1300 } else {
1301 constexpr SizeT MaxSrcs = 2;
1302 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1303 Call->addArg(T0R);
1304 Call->addArg(T1R);
1305 lowerCall(Call);
1306 }
1307 return;
1308}
1309
Jan Voungb36ad9b2015-04-21 17:01:49 -07001310void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001311 Variable *Dest = Inst->getDest();
1312 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
1313 // to legalize Src0 to flex or Src1 to flex and there is a reversible
1314 // instruction. E.g., reverse subtract with immediate, register vs
1315 // register, immediate.
1316 // Or it may be the case that the operands aren't swapped, but the
1317 // bits can be flipped and a different operation applied.
1318 // E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voungfbdd2442015-07-15 12:36:20 -07001319 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1320 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voungb3401d22015-05-18 09:38:21 -07001321 if (Dest->getType() == IceType_i64) {
Jan Voung70fa5252015-07-06 14:01:25 -07001322 // These helper-call-involved instructions are lowered in this
1323 // separate switch. This is because we would otherwise assume that
1324 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
1325 // with helper calls, and such unused/redundant instructions will fail
1326 // liveness analysis under -Om1 setting.
1327 switch (Inst->getOp()) {
1328 default:
1329 break;
1330 case InstArithmetic::Udiv:
1331 case InstArithmetic::Sdiv:
1332 case InstArithmetic::Urem:
1333 case InstArithmetic::Srem: {
1334 // Check for divide by 0 (ARM normally doesn't trap, but we want it
1335 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
1336 // to a register, which will hide a constant source operand.
1337 // Instead, check the not-yet-legalized Src1 to optimize-out a divide
1338 // by 0 check.
1339 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1340 if (C64->getValue() == 0) {
1341 _trap();
1342 return;
1343 }
1344 } else {
1345 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1346 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1347 div0Check(IceType_i64, Src1Lo, Src1Hi);
1348 }
1349 // Technically, ARM has their own aeabi routines, but we can use the
1350 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
1351 // but uses the more standard __moddi3 for rem.
1352 const char *HelperName = "";
1353 switch (Inst->getOp()) {
1354 default:
1355 llvm_unreachable("Should have only matched div ops.");
1356 break;
1357 case InstArithmetic::Udiv:
1358 HelperName = H_udiv_i64;
1359 break;
1360 case InstArithmetic::Sdiv:
1361 HelperName = H_sdiv_i64;
1362 break;
1363 case InstArithmetic::Urem:
1364 HelperName = H_urem_i64;
1365 break;
1366 case InstArithmetic::Srem:
1367 HelperName = H_srem_i64;
1368 break;
1369 }
1370 constexpr SizeT MaxSrcs = 2;
1371 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1372 Call->addArg(Src0);
1373 Call->addArg(Src1);
1374 lowerCall(Call);
1375 return;
1376 }
1377 }
Jan Voung29719972015-05-19 11:24:51 -07001378 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1379 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Andrew Scull97f460d2015-07-21 10:07:42 -07001380 Variable *Src0RLo = legalizeToReg(loOperand(Src0));
1381 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
Jan Voung70fa5252015-07-06 14:01:25 -07001382 Operand *Src1Lo = loOperand(Src1);
1383 Operand *Src1Hi = hiOperand(Src1);
Jan Voung29719972015-05-19 11:24:51 -07001384 Variable *T_Lo = makeReg(DestLo->getType());
1385 Variable *T_Hi = makeReg(DestHi->getType());
1386 switch (Inst->getOp()) {
1387 case InstArithmetic::_num:
1388 llvm_unreachable("Unknown arithmetic operator");
Jan Voung70fa5252015-07-06 14:01:25 -07001389 return;
Jan Voung29719972015-05-19 11:24:51 -07001390 case InstArithmetic::Add:
Jan Voung70fa5252015-07-06 14:01:25 -07001391 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1392 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001393 _adds(T_Lo, Src0RLo, Src1Lo);
1394 _mov(DestLo, T_Lo);
1395 _adc(T_Hi, Src0RHi, Src1Hi);
1396 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001397 return;
Jan Voung29719972015-05-19 11:24:51 -07001398 case InstArithmetic::And:
Jan Voung70fa5252015-07-06 14:01:25 -07001399 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1400 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001401 _and(T_Lo, Src0RLo, Src1Lo);
1402 _mov(DestLo, T_Lo);
1403 _and(T_Hi, Src0RHi, Src1Hi);
1404 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001405 return;
Jan Voung29719972015-05-19 11:24:51 -07001406 case InstArithmetic::Or:
Jan Voung70fa5252015-07-06 14:01:25 -07001407 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1408 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001409 _orr(T_Lo, Src0RLo, Src1Lo);
1410 _mov(DestLo, T_Lo);
1411 _orr(T_Hi, Src0RHi, Src1Hi);
1412 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001413 return;
Jan Voung29719972015-05-19 11:24:51 -07001414 case InstArithmetic::Xor:
Jan Voung70fa5252015-07-06 14:01:25 -07001415 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1416 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001417 _eor(T_Lo, Src0RLo, Src1Lo);
1418 _mov(DestLo, T_Lo);
1419 _eor(T_Hi, Src0RHi, Src1Hi);
1420 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001421 return;
Jan Voung29719972015-05-19 11:24:51 -07001422 case InstArithmetic::Sub:
Jan Voung70fa5252015-07-06 14:01:25 -07001423 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1424 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
Jan Voung29719972015-05-19 11:24:51 -07001425 _subs(T_Lo, Src0RLo, Src1Lo);
1426 _mov(DestLo, T_Lo);
1427 _sbc(T_Hi, Src0RHi, Src1Hi);
1428 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001429 return;
Jan Voung29719972015-05-19 11:24:51 -07001430 case InstArithmetic::Mul: {
1431 // GCC 4.8 does:
1432 // a=b*c ==>
1433 // t_acc =(mul) (b.lo * c.hi)
1434 // t_acc =(mla) (c.lo * b.hi) + t_acc
1435 // t.hi,t.lo =(umull) b.lo * c.lo
1436 // t.hi += t_acc
1437 // a.lo = t.lo
1438 // a.hi = t.hi
1439 //
1440 // LLVM does:
1441 // t.hi,t.lo =(umull) b.lo * c.lo
1442 // t.hi =(mla) (b.lo * c.hi) + t.hi
1443 // t.hi =(mla) (b.hi * c.lo) + t.hi
1444 // a.lo = t.lo
1445 // a.hi = t.hi
1446 //
1447 // LLVM's lowering has fewer instructions, but more register pressure:
1448 // t.lo is live from beginning to end, while GCC delays the two-dest
1449 // instruction till the end, and kills c.hi immediately.
1450 Variable *T_Acc = makeReg(IceType_i32);
1451 Variable *T_Acc1 = makeReg(IceType_i32);
1452 Variable *T_Hi1 = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07001453 Variable *Src1RLo = legalizeToReg(Src1Lo);
1454 Variable *Src1RHi = legalizeToReg(Src1Hi);
Jan Voung29719972015-05-19 11:24:51 -07001455 _mul(T_Acc, Src0RLo, Src1RHi);
1456 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1457 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1458 _add(T_Hi, T_Hi1, T_Acc1);
1459 _mov(DestLo, T_Lo);
1460 _mov(DestHi, T_Hi);
Jan Voung70fa5252015-07-06 14:01:25 -07001461 return;
1462 }
Jan Voung66c3d5e2015-06-04 17:02:31 -07001463 case InstArithmetic::Shl: {
1464 // a=b<<c ==>
1465 // GCC 4.8 does:
1466 // sub t_c1, c.lo, #32
1467 // lsl t_hi, b.hi, c.lo
1468 // orr t_hi, t_hi, b.lo, lsl t_c1
1469 // rsb t_c2, c.lo, #32
1470 // orr t_hi, t_hi, b.lo, lsr t_c2
1471 // lsl t_lo, b.lo, c.lo
1472 // a.lo = t_lo
1473 // a.hi = t_hi
1474 // Can be strength-reduced for constant-shifts, but we don't do
1475 // that for now.
1476 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
1477 // On ARM, shifts only take the lower 8 bits of the shift register,
1478 // and saturate to the range 0-32, so the negative value will
1479 // saturate to 32.
1480 Variable *T_Hi = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07001481 Variable *Src1RLo = legalizeToReg(Src1Lo);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001482 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1483 Variable *T_C1 = makeReg(IceType_i32);
1484 Variable *T_C2 = makeReg(IceType_i32);
1485 _sub(T_C1, Src1RLo, ThirtyTwo);
1486 _lsl(T_Hi, Src0RHi, Src1RLo);
1487 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1488 OperandARM32::LSL, T_C1));
1489 _rsb(T_C2, Src1RLo, ThirtyTwo);
1490 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1491 OperandARM32::LSR, T_C2));
1492 _mov(DestHi, T_Hi);
1493 Variable *T_Lo = makeReg(IceType_i32);
1494 // _mov seems to sometimes have better register preferencing than lsl.
1495 // Otherwise mov w/ lsl shifted register is a pseudo-instruction
1496 // that maps to lsl.
1497 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1498 OperandARM32::LSL, Src1RLo));
1499 _mov(DestLo, T_Lo);
Jan Voung70fa5252015-07-06 14:01:25 -07001500 return;
1501 }
Jan Voung29719972015-05-19 11:24:51 -07001502 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001503 // a=b>>c (unsigned) ==>
1504 // GCC 4.8 does:
1505 // rsb t_c1, c.lo, #32
1506 // lsr t_lo, b.lo, c.lo
1507 // orr t_lo, t_lo, b.hi, lsl t_c1
1508 // sub t_c2, c.lo, #32
1509 // orr t_lo, t_lo, b.hi, lsr t_c2
1510 // lsr t_hi, b.hi, c.lo
1511 // a.lo = t_lo
1512 // a.hi = t_hi
1513 case InstArithmetic::Ashr: {
1514 // a=b>>c (signed) ==> ...
1515 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
1516 // and the next orr should be conditioned on PLUS. The last two
1517 // right shifts should also be arithmetic.
1518 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1519 Variable *T_Lo = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07001520 Variable *Src1RLo = legalizeToReg(Src1Lo);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001521 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1522 Variable *T_C1 = makeReg(IceType_i32);
1523 Variable *T_C2 = makeReg(IceType_i32);
1524 _rsb(T_C1, Src1RLo, ThirtyTwo);
1525 _lsr(T_Lo, Src0RLo, Src1RLo);
1526 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1527 OperandARM32::LSL, T_C1));
1528 OperandARM32::ShiftKind RShiftKind;
1529 CondARM32::Cond Pred;
1530 if (IsAshr) {
1531 _subs(T_C2, Src1RLo, ThirtyTwo);
1532 RShiftKind = OperandARM32::ASR;
1533 Pred = CondARM32::PL;
1534 } else {
1535 _sub(T_C2, Src1RLo, ThirtyTwo);
1536 RShiftKind = OperandARM32::LSR;
1537 Pred = CondARM32::AL;
1538 }
1539 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1540 RShiftKind, T_C2),
1541 Pred);
1542 _mov(DestLo, T_Lo);
1543 Variable *T_Hi = makeReg(IceType_i32);
1544 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1545 RShiftKind, Src1RLo));
1546 _mov(DestHi, T_Hi);
Jan Voung6ec369e2015-06-30 11:03:15 -07001547 return;
1548 }
Jan Voung29719972015-05-19 11:24:51 -07001549 case InstArithmetic::Fadd:
1550 case InstArithmetic::Fsub:
1551 case InstArithmetic::Fmul:
1552 case InstArithmetic::Fdiv:
1553 case InstArithmetic::Frem:
1554 llvm_unreachable("FP instruction with i64 type");
Jan Voung70fa5252015-07-06 14:01:25 -07001555 return;
1556 case InstArithmetic::Udiv:
1557 case InstArithmetic::Sdiv:
1558 case InstArithmetic::Urem:
1559 case InstArithmetic::Srem:
1560 llvm_unreachable("Call-helper-involved instruction for i64 type "
1561 "should have already been handled before");
1562 return;
Jan Voung29719972015-05-19 11:24:51 -07001563 }
Jan Voung70fa5252015-07-06 14:01:25 -07001564 return;
Jan Voungb3401d22015-05-18 09:38:21 -07001565 } else if (isVectorType(Dest->getType())) {
Jan Voungb2d50842015-05-12 09:53:50 -07001566 UnimplementedError(Func->getContext()->getFlags());
Jan Voung86ebec12015-08-09 07:58:35 -07001567 // Add a fake def to keep liveness consistent in the meantime.
1568 Context.insert(InstFakeDef::create(Func, Dest));
Jan Voung70fa5252015-07-06 14:01:25 -07001569 return;
1570 }
1571 // Dest->getType() is a non-i64 scalar.
Andrew Scull97f460d2015-07-21 10:07:42 -07001572 Variable *Src0R = legalizeToReg(Src0);
Jan Voung70fa5252015-07-06 14:01:25 -07001573 Variable *T = makeReg(Dest->getType());
1574 // Handle div/rem separately. They require a non-legalized Src1 to inspect
1575 // whether or not Src1 is a non-zero constant. Once legalized it is more
1576 // difficult to determine (constant may be moved to a register).
1577 switch (Inst->getOp()) {
1578 default:
1579 break;
1580 case InstArithmetic::Udiv: {
1581 constexpr bool IsRemainder = false;
1582 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1583 H_udiv_i32, IsRemainder);
1584 return;
1585 }
1586 case InstArithmetic::Sdiv: {
1587 constexpr bool IsRemainder = false;
1588 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1589 H_sdiv_i32, IsRemainder);
1590 return;
1591 }
1592 case InstArithmetic::Urem: {
1593 constexpr bool IsRemainder = true;
1594 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1595 H_urem_i32, IsRemainder);
1596 return;
1597 }
1598 case InstArithmetic::Srem: {
1599 constexpr bool IsRemainder = true;
1600 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1601 H_srem_i32, IsRemainder);
1602 return;
1603 }
Jan Voung86ebec12015-08-09 07:58:35 -07001604 case InstArithmetic::Frem: {
1605 const SizeT MaxSrcs = 2;
1606 Type Ty = Dest->getType();
1607 InstCall *Call = makeHelperCall(
1608 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1609 Call->addArg(Src0R);
1610 Call->addArg(Src1);
1611 lowerCall(Call);
1612 return;
1613 }
1614 }
1615
1616 // Handle floating point arithmetic separately: they require Src1 to be
1617 // legalized to a register.
1618 switch (Inst->getOp()) {
1619 default:
1620 break;
1621 case InstArithmetic::Fadd: {
1622 Variable *Src1R = legalizeToReg(Src1);
1623 _vadd(T, Src0R, Src1R);
1624 _vmov(Dest, T);
1625 return;
1626 }
1627 case InstArithmetic::Fsub: {
1628 Variable *Src1R = legalizeToReg(Src1);
1629 _vsub(T, Src0R, Src1R);
1630 _vmov(Dest, T);
1631 return;
1632 }
1633 case InstArithmetic::Fmul: {
1634 Variable *Src1R = legalizeToReg(Src1);
1635 _vmul(T, Src0R, Src1R);
1636 _vmov(Dest, T);
1637 return;
1638 }
1639 case InstArithmetic::Fdiv: {
1640 Variable *Src1R = legalizeToReg(Src1);
1641 _vdiv(T, Src0R, Src1R);
1642 _vmov(Dest, T);
1643 return;
1644 }
Jan Voung70fa5252015-07-06 14:01:25 -07001645 }
1646
1647 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1648 switch (Inst->getOp()) {
1649 case InstArithmetic::_num:
1650 llvm_unreachable("Unknown arithmetic operator");
1651 return;
1652 case InstArithmetic::Add:
1653 _add(T, Src0R, Src1RF);
1654 _mov(Dest, T);
1655 return;
1656 case InstArithmetic::And:
1657 _and(T, Src0R, Src1RF);
1658 _mov(Dest, T);
1659 return;
1660 case InstArithmetic::Or:
1661 _orr(T, Src0R, Src1RF);
1662 _mov(Dest, T);
1663 return;
1664 case InstArithmetic::Xor:
1665 _eor(T, Src0R, Src1RF);
1666 _mov(Dest, T);
1667 return;
1668 case InstArithmetic::Sub:
1669 _sub(T, Src0R, Src1RF);
1670 _mov(Dest, T);
1671 return;
1672 case InstArithmetic::Mul: {
Andrew Scull97f460d2015-07-21 10:07:42 -07001673 Variable *Src1R = legalizeToReg(Src1RF);
Jan Voung70fa5252015-07-06 14:01:25 -07001674 _mul(T, Src0R, Src1R);
1675 _mov(Dest, T);
1676 return;
1677 }
1678 case InstArithmetic::Shl:
1679 _lsl(T, Src0R, Src1RF);
1680 _mov(Dest, T);
1681 return;
1682 case InstArithmetic::Lshr:
1683 _lsr(T, Src0R, Src1RF);
1684 _mov(Dest, T);
1685 return;
1686 case InstArithmetic::Ashr:
1687 _asr(T, Src0R, Src1RF);
1688 _mov(Dest, T);
1689 return;
1690 case InstArithmetic::Udiv:
1691 case InstArithmetic::Sdiv:
1692 case InstArithmetic::Urem:
1693 case InstArithmetic::Srem:
1694 llvm_unreachable("Integer div/rem should have been handled earlier.");
1695 return;
1696 case InstArithmetic::Fadd:
Jan Voung70fa5252015-07-06 14:01:25 -07001697 case InstArithmetic::Fsub:
Jan Voung70fa5252015-07-06 14:01:25 -07001698 case InstArithmetic::Fmul:
Jan Voung70fa5252015-07-06 14:01:25 -07001699 case InstArithmetic::Fdiv:
Jan Voung70fa5252015-07-06 14:01:25 -07001700 case InstArithmetic::Frem:
Jan Voung86ebec12015-08-09 07:58:35 -07001701 llvm_unreachable("Floating point arith should have been handled earlier.");
Jan Voung70fa5252015-07-06 14:01:25 -07001702 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001703 }
1704}
1705
1706void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001707 Variable *Dest = Inst->getDest();
1708 Operand *Src0 = Inst->getSrc(0);
1709 assert(Dest->getType() == Src0->getType());
1710 if (Dest->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07001711 Src0 = legalizeUndef(Src0);
1712 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1713 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
Jan Voungb3401d22015-05-18 09:38:21 -07001714 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1715 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1716 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1717 _mov(T_Lo, Src0Lo);
1718 _mov(DestLo, T_Lo);
1719 _mov(T_Hi, Src0Hi);
1720 _mov(DestHi, T_Hi);
1721 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001722 Operand *NewSrc;
Jan Voungb3401d22015-05-18 09:38:21 -07001723 if (Dest->hasReg()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001724 // If Dest already has a physical register, then legalize the Src operand
1725 // into a Variable with the same register assignment. This especially
1726 // helps allow the use of Flex operands.
1727 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
Jan Voungb3401d22015-05-18 09:38:21 -07001728 } else {
1729 // Dest could be a stack operand. Since we could potentially need
1730 // to do a Store (and store can only have Register operands),
1731 // legalize this to a register.
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001732 NewSrc = legalize(Src0, Legal_Reg);
Jan Voungb3401d22015-05-18 09:38:21 -07001733 }
1734 if (isVectorType(Dest->getType())) {
1735 UnimplementedError(Func->getContext()->getFlags());
Jan Voung86ebec12015-08-09 07:58:35 -07001736 } else if (isFloatingType(Dest->getType())) {
1737 Variable *SrcR = legalizeToReg(NewSrc);
1738 _vmov(Dest, SrcR);
Jan Voungb3401d22015-05-18 09:38:21 -07001739 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001740 _mov(Dest, NewSrc);
Jan Voungb3401d22015-05-18 09:38:21 -07001741 }
1742 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001743}
1744
1745void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001746 if (Inst->isUnconditional()) {
1747 _br(Inst->getTargetUnconditional());
1748 return;
1749 }
1750 Operand *Cond = Inst->getCondition();
1751 // TODO(jvoung): Handle folding opportunities.
1752
Andrew Scull97f460d2015-07-21 10:07:42 -07001753 Variable *Src0R = legalizeToReg(Cond);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001754 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1755 _cmp(Src0R, Zero);
Jan Voung6ec369e2015-06-30 11:03:15 -07001756 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001757}
1758
Jan Voung3bfd99a2015-05-22 16:35:25 -07001759void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001760 MaybeLeafFunc = false;
Jan Voungb0a8c242015-06-18 15:00:14 -07001761 NeedsStackAlignment = true;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001762
Jan Voungb0a8c242015-06-18 15:00:14 -07001763 // Assign arguments to registers and stack. Also reserve stack.
1764 TargetARM32::CallingConv CC;
1765 // Pair of Arg Operand -> GPR number assignments.
1766 llvm::SmallVector<std::pair<Operand *, int32_t>,
1767 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
Jan Voung86ebec12015-08-09 07:58:35 -07001768 llvm::SmallVector<std::pair<Operand *, int32_t>,
1769 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
Jan Voungb0a8c242015-06-18 15:00:14 -07001770 // Pair of Arg Operand -> stack offset.
1771 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
1772 int32_t ParameterAreaSizeBytes = 0;
1773
1774 // Classify each argument operand according to the location where the
1775 // argument is passed.
1776 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Jan Voungfbdd2442015-07-15 12:36:20 -07001777 Operand *Arg = legalizeUndef(Instr->getArg(i));
Jan Voungb0a8c242015-06-18 15:00:14 -07001778 Type Ty = Arg->getType();
1779 bool InRegs = false;
Jan Voung86ebec12015-08-09 07:58:35 -07001780 if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -07001781 std::pair<int32_t, int32_t> Regs;
1782 if (CC.I64InRegs(&Regs)) {
1783 InRegs = true;
1784 Operand *Lo = loOperand(Arg);
1785 Operand *Hi = hiOperand(Arg);
1786 GPRArgs.push_back(std::make_pair(Lo, Regs.first));
1787 GPRArgs.push_back(std::make_pair(Hi, Regs.second));
1788 }
Jan Voung86ebec12015-08-09 07:58:35 -07001789 } else if (isVectorType(Ty) || isFloatingType(Ty)) {
1790 int32_t Reg;
1791 if (CC.FPInReg(Ty, &Reg)) {
1792 InRegs = true;
1793 FPArgs.push_back(std::make_pair(Arg, Reg));
1794 }
Jan Voungb0a8c242015-06-18 15:00:14 -07001795 } else {
1796 assert(Ty == IceType_i32);
1797 int32_t Reg;
1798 if (CC.I32InReg(&Reg)) {
1799 InRegs = true;
1800 GPRArgs.push_back(std::make_pair(Arg, Reg));
1801 }
1802 }
1803
1804 if (!InRegs) {
1805 ParameterAreaSizeBytes =
1806 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1807 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1808 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1809 }
1810 }
1811
1812 // Adjust the parameter area so that the stack is aligned. It is
1813 // assumed that the stack is already aligned at the start of the
1814 // calling sequence.
1815 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1816
1817 // Subtract the appropriate amount for the argument area. This also
1818 // takes care of setting the stack adjustment during emission.
1819 //
1820 // TODO: If for some reason the call instruction gets dead-code
1821 // eliminated after lowering, we would need to ensure that the
1822 // pre-call and the post-call esp adjustment get eliminated as well.
1823 if (ParameterAreaSizeBytes) {
1824 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1825 Legal_Reg | Legal_Flex);
1826 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1827 }
1828
1829 // Copy arguments that are passed on the stack to the appropriate
1830 // stack locations.
Jan Voungf645d852015-07-09 10:35:09 -07001831 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb0a8c242015-06-18 15:00:14 -07001832 for (auto &StackArg : StackArgs) {
1833 ConstantInteger32 *Loc =
1834 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1835 Type Ty = StackArg.first->getType();
1836 OperandARM32Mem *Addr;
1837 constexpr bool SignExt = false;
1838 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1839 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1840 } else {
1841 Variable *NewBase = Func->makeVariable(SP->getType());
1842 lowerArithmetic(
1843 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1844 Addr = formMemoryOperand(NewBase, Ty);
1845 }
1846 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1847 }
1848
1849 // Copy arguments to be passed in registers to the appropriate registers.
1850 for (auto &GPRArg : GPRArgs) {
Andrew Scull97f460d2015-07-21 10:07:42 -07001851 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
Jan Voungb0a8c242015-06-18 15:00:14 -07001852 // Generate a FakeUse of register arguments so that they do not get
1853 // dead code eliminated as a result of the FakeKill of scratch
1854 // registers after the call.
1855 Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung3bfd99a2015-05-22 16:35:25 -07001856 }
Jan Voung86ebec12015-08-09 07:58:35 -07001857 for (auto &FPArg : FPArgs) {
1858 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
1859 Context.insert(InstFakeUse::create(Func, Reg));
1860 }
Jan Voung3bfd99a2015-05-22 16:35:25 -07001861
1862 // Generate the call instruction. Assign its result to a temporary
1863 // with high register allocation weight.
1864 Variable *Dest = Instr->getDest();
1865 // ReturnReg doubles as ReturnRegLo as necessary.
1866 Variable *ReturnReg = nullptr;
1867 Variable *ReturnRegHi = nullptr;
1868 if (Dest) {
1869 switch (Dest->getType()) {
1870 case IceType_NUM:
1871 llvm_unreachable("Invalid Call dest type");
1872 break;
1873 case IceType_void:
1874 break;
1875 case IceType_i1:
1876 case IceType_i8:
1877 case IceType_i16:
1878 case IceType_i32:
1879 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1880 break;
1881 case IceType_i64:
1882 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1883 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1884 break;
1885 case IceType_f32:
Jan Voung86ebec12015-08-09 07:58:35 -07001886 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
1887 break;
Jan Voung3bfd99a2015-05-22 16:35:25 -07001888 case IceType_f64:
Jan Voung86ebec12015-08-09 07:58:35 -07001889 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001890 break;
1891 case IceType_v4i1:
1892 case IceType_v8i1:
1893 case IceType_v16i1:
1894 case IceType_v16i8:
1895 case IceType_v8i16:
1896 case IceType_v4i32:
1897 case IceType_v4f32:
Jan Voung86ebec12015-08-09 07:58:35 -07001898 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001899 break;
1900 }
1901 }
1902 Operand *CallTarget = Instr->getCallTarget();
Jan Voungb0a8c242015-06-18 15:00:14 -07001903 // TODO(jvoung): Handle sandboxing.
1904 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
1905
Jan Voung3bfd99a2015-05-22 16:35:25 -07001906 // Allow ConstantRelocatable to be left alone as a direct call,
1907 // but force other constants like ConstantInteger32 to be in
1908 // a register and make it an indirect call.
1909 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1910 CallTarget = legalize(CallTarget, Legal_Reg);
1911 }
1912 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1913 Context.insert(NewCall);
1914 if (ReturnRegHi)
1915 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1916
Jan Voungb0a8c242015-06-18 15:00:14 -07001917 // Add the appropriate offset to SP. The call instruction takes care
1918 // of resetting the stack offset during emission.
1919 if (ParameterAreaSizeBytes) {
1920 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1921 Legal_Reg | Legal_Flex);
Jan Voungf645d852015-07-09 10:35:09 -07001922 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb0a8c242015-06-18 15:00:14 -07001923 _add(SP, SP, AddAmount);
1924 }
1925
Jan Voung3bfd99a2015-05-22 16:35:25 -07001926 // Insert a register-kill pseudo instruction.
1927 Context.insert(InstFakeKill::create(Func, NewCall));
1928
1929 // Generate a FakeUse to keep the call live if necessary.
1930 if (Instr->hasSideEffects() && ReturnReg) {
1931 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1932 Context.insert(FakeUse);
1933 }
1934
1935 if (!Dest)
1936 return;
1937
1938 // Assign the result of the call to Dest.
1939 if (ReturnReg) {
1940 if (ReturnRegHi) {
1941 assert(Dest->getType() == IceType_i64);
1942 split64(Dest);
1943 Variable *DestLo = Dest->getLo();
1944 Variable *DestHi = Dest->getHi();
1945 _mov(DestLo, ReturnReg);
1946 _mov(DestHi, ReturnRegHi);
1947 } else {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001948 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
Jan Voung86ebec12015-08-09 07:58:35 -07001949 _vmov(Dest, ReturnReg);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001950 } else {
Jan Voung86ebec12015-08-09 07:58:35 -07001951 assert(isIntegerType(Dest->getType()) &&
1952 typeWidthInBytes(Dest->getType()) <= 4);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001953 _mov(Dest, ReturnReg);
1954 }
1955 }
1956 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001957}
1958
1959void TargetARM32::lowerCast(const InstCast *Inst) {
1960 InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung66c3d5e2015-06-04 17:02:31 -07001961 Variable *Dest = Inst->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07001962 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Jan Voungb36ad9b2015-04-21 17:01:49 -07001963 switch (CastKind) {
1964 default:
1965 Func->setError("Cast type not supported");
1966 return;
1967 case InstCast::Sext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001968 if (isVectorType(Dest->getType())) {
1969 UnimplementedError(Func->getContext()->getFlags());
1970 } else if (Dest->getType() == IceType_i64) {
1971 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
1972 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1973 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1974 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1975 Variable *T_Lo = makeReg(DestLo->getType());
1976 if (Src0->getType() == IceType_i32) {
1977 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1978 _mov(T_Lo, Src0RF);
1979 } else if (Src0->getType() == IceType_i1) {
Andrew Scull97f460d2015-07-21 10:07:42 -07001980 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001981 _lsl(T_Lo, Src0R, ShiftAmt);
1982 _asr(T_Lo, T_Lo, ShiftAmt);
1983 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07001984 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001985 _sxt(T_Lo, Src0R);
1986 }
1987 _mov(DestLo, T_Lo);
1988 Variable *T_Hi = makeReg(DestHi->getType());
1989 if (Src0->getType() != IceType_i1) {
1990 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
1991 OperandARM32::ASR, ShiftAmt));
1992 } else {
1993 // For i1, the asr instruction is already done above.
1994 _mov(T_Hi, T_Lo);
1995 }
1996 _mov(DestHi, T_Hi);
1997 } else if (Src0->getType() == IceType_i1) {
1998 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
1999 // lsl t1, src_reg, 31
2000 // asr t1, t1, 31
2001 // dst = t1
Andrew Scull97f460d2015-07-21 10:07:42 -07002002 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002003 Constant *ShiftAmt = Ctx->getConstantInt32(31);
2004 Variable *T = makeReg(Dest->getType());
2005 _lsl(T, Src0R, ShiftAmt);
2006 _asr(T, T, ShiftAmt);
2007 _mov(Dest, T);
2008 } else {
2009 // t1 = sxt src; dst = t1
Andrew Scull97f460d2015-07-21 10:07:42 -07002010 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002011 Variable *T = makeReg(Dest->getType());
2012 _sxt(T, Src0R);
2013 _mov(Dest, T);
2014 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002015 break;
2016 }
2017 case InstCast::Zext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002018 if (isVectorType(Dest->getType())) {
2019 UnimplementedError(Func->getContext()->getFlags());
2020 } else if (Dest->getType() == IceType_i64) {
2021 // t1=uxtb src; dst.lo=t1; dst.hi=0
2022 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2023 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2024 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2025 Variable *T_Lo = makeReg(DestLo->getType());
2026 // i32 and i1 can just take up the whole register.
2027 // i32 doesn't need uxt, while i1 will have an and mask later anyway.
2028 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
2029 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2030 _mov(T_Lo, Src0RF);
2031 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002032 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002033 _uxt(T_Lo, Src0R);
2034 }
2035 if (Src0->getType() == IceType_i1) {
2036 Constant *One = Ctx->getConstantInt32(1);
2037 _and(T_Lo, T_Lo, One);
2038 }
2039 _mov(DestLo, T_Lo);
2040 Variable *T_Hi = makeReg(DestLo->getType());
2041 _mov(T_Hi, Zero);
2042 _mov(DestHi, T_Hi);
2043 } else if (Src0->getType() == IceType_i1) {
2044 // t = Src0; t &= 1; Dest = t
2045 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2046 Constant *One = Ctx->getConstantInt32(1);
2047 Variable *T = makeReg(Dest->getType());
2048 // Just use _mov instead of _uxt since all registers are 32-bit.
2049 // _uxt requires the source to be a register so could have required
2050 // a _mov from legalize anyway.
2051 _mov(T, Src0RF);
2052 _and(T, T, One);
2053 _mov(Dest, T);
2054 } else {
2055 // t1 = uxt src; dst = t1
Andrew Scull97f460d2015-07-21 10:07:42 -07002056 Variable *Src0R = legalizeToReg(Src0);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002057 Variable *T = makeReg(Dest->getType());
2058 _uxt(T, Src0R);
2059 _mov(Dest, T);
2060 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002061 break;
2062 }
2063 case InstCast::Trunc: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002064 if (isVectorType(Dest->getType())) {
2065 UnimplementedError(Func->getContext()->getFlags());
2066 } else {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002067 if (Src0->getType() == IceType_i64)
2068 Src0 = loOperand(Src0);
2069 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2070 // t1 = trunc Src0RF; Dest = t1
2071 Variable *T = makeReg(Dest->getType());
2072 _mov(T, Src0RF);
2073 if (Dest->getType() == IceType_i1)
2074 _and(T, T, Ctx->getConstantInt1(1));
2075 _mov(Dest, T);
2076 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002077 break;
2078 }
2079 case InstCast::Fptrunc:
Jan Voungb2d50842015-05-12 09:53:50 -07002080 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002081 break;
2082 case InstCast::Fpext: {
Jan Voungb2d50842015-05-12 09:53:50 -07002083 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002084 break;
2085 }
2086 case InstCast::Fptosi:
Jan Voungb2d50842015-05-12 09:53:50 -07002087 UnimplementedError(Func->getContext()->getFlags());
Jim Stichnothb3bfcbc2015-08-01 18:46:12 -07002088 // Add a fake def to keep liveness consistent in the meantime.
2089 Context.insert(InstFakeDef::create(Func, Dest));
Jan Voungb36ad9b2015-04-21 17:01:49 -07002090 break;
2091 case InstCast::Fptoui:
Jan Voungb2d50842015-05-12 09:53:50 -07002092 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002093 break;
2094 case InstCast::Sitofp:
Jan Voungb2d50842015-05-12 09:53:50 -07002095 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002096 break;
2097 case InstCast::Uitofp: {
Jan Voungb2d50842015-05-12 09:53:50 -07002098 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002099 break;
2100 }
2101 case InstCast::Bitcast: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07002102 Operand *Src0 = Inst->getSrc(0);
2103 if (Dest->getType() == Src0->getType()) {
2104 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2105 lowerAssign(Assign);
2106 return;
2107 }
Jan Voungb2d50842015-05-12 09:53:50 -07002108 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002109 break;
2110 }
2111 }
2112}
2113
2114void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
2115 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002116 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002117}
2118
2119void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
2120 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002121 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002122}
2123
2124void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07002125 Variable *Dest = Inst->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07002126 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2127 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002128
2129 if (isVectorType(Dest->getType())) {
2130 UnimplementedError(Func->getContext()->getFlags());
2131 return;
2132 }
2133
2134 // a=icmp cond, b, c ==>
2135 // GCC does:
2136 // cmp b.hi, c.hi or cmp b.lo, c.lo
2137 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2138 // mov.<C1> t, #1 mov.<C1> t, #1
2139 // mov.<C2> t, #0 mov.<C2> t, #0
2140 // mov a, t mov a, t
2141 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2142 // is used for signed compares. In some cases, b and c need to be swapped
2143 // as well.
2144 //
2145 // LLVM does:
2146 // for EQ and NE:
2147 // eor t1, b.hi, c.hi
2148 // eor t2, b.lo, c.hi
2149 // orrs t, t1, t2
2150 // mov.<C> t, #1
2151 // mov a, t
2152 //
2153 // that's nice in that it's just as short but has fewer dependencies
2154 // for better ILP at the cost of more registers.
2155 //
2156 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
2157 // two unconditional mov #0, two cmps, two conditional mov #1,
2158 // and one conditonal reg mov. That has few dependencies for good ILP,
2159 // but is a longer sequence.
2160 //
2161 // So, we are going with the GCC version since it's usually better (except
2162 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2163 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2164 Constant *One = Ctx->getConstantInt32(1);
2165 if (Src0->getType() == IceType_i64) {
2166 InstIcmp::ICond Conditon = Inst->getCondition();
2167 size_t Index = static_cast<size_t>(Conditon);
2168 assert(Index < TableIcmp64Size);
2169 Variable *Src0Lo, *Src0Hi;
2170 Operand *Src1LoRF, *Src1HiRF;
2171 if (TableIcmp64[Index].Swapped) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002172 Src0Lo = legalizeToReg(loOperand(Src1));
2173 Src0Hi = legalizeToReg(hiOperand(Src1));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002174 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2175 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2176 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002177 Src0Lo = legalizeToReg(loOperand(Src0));
2178 Src0Hi = legalizeToReg(hiOperand(Src0));
Jan Voung3bfd99a2015-05-22 16:35:25 -07002179 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2180 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2181 }
2182 Variable *T = makeReg(IceType_i32);
2183 if (TableIcmp64[Index].IsSigned) {
2184 Variable *ScratchReg = makeReg(IceType_i32);
2185 _cmp(Src0Lo, Src1LoRF);
2186 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2187 // ScratchReg isn't going to be used, but we need the
2188 // side-effect of setting flags from this operation.
2189 Context.insert(InstFakeUse::create(Func, ScratchReg));
2190 } else {
2191 _cmp(Src0Hi, Src1HiRF);
2192 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2193 }
2194 _mov(T, One, TableIcmp64[Index].C1);
2195 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
2196 _mov(Dest, T);
2197 return;
2198 }
2199
2200 // a=icmp cond b, c ==>
2201 // GCC does:
2202 // <u/s>xtb tb, b
2203 // <u/s>xtb tc, c
2204 // cmp tb, tc
2205 // mov.C1 t, #0
2206 // mov.C2 t, #1
2207 // mov a, t
2208 // where the unsigned/sign extension is not needed for 32-bit.
2209 // They also have special cases for EQ and NE. E.g., for NE:
2210 // <extend to tb, tc>
2211 // subs t, tb, tc
2212 // movne t, #1
2213 // mov a, t
2214 //
2215 // LLVM does:
2216 // lsl tb, b, #<N>
2217 // mov t, #0
2218 // cmp tb, c, lsl #<N>
2219 // mov.<C> t, #1
2220 // mov a, t
2221 //
2222 // the left shift is by 0, 16, or 24, which allows the comparison to focus
2223 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
2224 // For the unsigned case, for some reason it does similar to GCC and does
2225 // a uxtb first. It's not clear to me why that special-casing is needed.
2226 //
2227 // We'll go with the LLVM way for now, since it's shorter and has just as
2228 // few dependencies.
Jan Voung66c3d5e2015-06-04 17:02:31 -07002229 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
2230 assert(ShiftAmt >= 0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002231 Constant *ShiftConst = nullptr;
2232 Variable *Src0R = nullptr;
2233 Variable *T = makeReg(IceType_i32);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002234 if (ShiftAmt) {
2235 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002236 Src0R = makeReg(IceType_i32);
Andrew Scull97f460d2015-07-21 10:07:42 -07002237 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002238 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002239 Src0R = legalizeToReg(Src0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002240 }
2241 _mov(T, Zero);
Jan Voung66c3d5e2015-06-04 17:02:31 -07002242 if (ShiftAmt) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002243 Variable *Src1R = legalizeToReg(Src1);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002244 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
2245 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
2246 _cmp(Src0R, Src1RShifted);
2247 } else {
2248 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2249 _cmp(Src0R, Src1RF);
2250 }
2251 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
2252 _mov(Dest, T);
2253 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07002254}
2255
2256void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2257 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002258 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002259}
2260
2261void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Jim Stichnotha8d47132015-09-08 14:43:38 -07002262 switch (Instr->getIntrinsicInfo().ID) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002263 case Intrinsics::AtomicCmpxchg: {
Jan Voungb2d50842015-05-12 09:53:50 -07002264 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002265 return;
2266 }
2267 case Intrinsics::AtomicFence:
Jan Voungb2d50842015-05-12 09:53:50 -07002268 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002269 return;
2270 case Intrinsics::AtomicFenceAll:
2271 // NOTE: FenceAll should prevent and load/store from being moved
2272 // across the fence (both atomic and non-atomic). The InstARM32Mfence
2273 // instruction is currently marked coarsely as "HasSideEffects".
Jan Voungb2d50842015-05-12 09:53:50 -07002274 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002275 return;
2276 case Intrinsics::AtomicIsLockFree: {
Jan Voungb2d50842015-05-12 09:53:50 -07002277 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002278 return;
2279 }
2280 case Intrinsics::AtomicLoad: {
Jan Voungb2d50842015-05-12 09:53:50 -07002281 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002282 return;
2283 }
2284 case Intrinsics::AtomicRMW:
Jan Voungb2d50842015-05-12 09:53:50 -07002285 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002286 return;
2287 case Intrinsics::AtomicStore: {
Jan Voungb2d50842015-05-12 09:53:50 -07002288 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002289 return;
2290 }
2291 case Intrinsics::Bswap: {
Jan Voungf645d852015-07-09 10:35:09 -07002292 Variable *Dest = Instr->getDest();
2293 Operand *Val = Instr->getArg(0);
2294 Type Ty = Val->getType();
2295 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002296 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002297 Variable *Val_Lo = legalizeToReg(loOperand(Val));
2298 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
Jan Voungf645d852015-07-09 10:35:09 -07002299 Variable *T_Lo = makeReg(IceType_i32);
2300 Variable *T_Hi = makeReg(IceType_i32);
2301 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2302 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2303 _rev(T_Lo, Val_Lo);
2304 _rev(T_Hi, Val_Hi);
2305 _mov(DestLo, T_Hi);
2306 _mov(DestHi, T_Lo);
2307 } else {
2308 assert(Ty == IceType_i32 || Ty == IceType_i16);
Andrew Scull97f460d2015-07-21 10:07:42 -07002309 Variable *ValR = legalizeToReg(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002310 Variable *T = makeReg(Ty);
2311 _rev(T, ValR);
2312 if (Val->getType() == IceType_i16) {
2313 Operand *Sixteen =
2314 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
2315 _lsr(T, T, Sixteen);
2316 }
2317 _mov(Dest, T);
2318 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002319 return;
2320 }
2321 case Intrinsics::Ctpop: {
Jan Voungf645d852015-07-09 10:35:09 -07002322 Variable *Dest = Instr->getDest();
2323 Operand *Val = Instr->getArg(0);
2324 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2325 ? H_call_ctpop_i32
2326 : H_call_ctpop_i64,
2327 Dest, 1);
2328 Call->addArg(Val);
2329 lowerCall(Call);
2330 // The popcount helpers always return 32-bit values, while the intrinsic's
2331 // signature matches some 64-bit platform's native instructions and
2332 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
2333 // just in case the user doesn't do that in the IR or doesn't toss the bits
2334 // via truncate.
2335 if (Val->getType() == IceType_i64) {
2336 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2337 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jan Voung28068ad2015-07-31 12:58:46 -07002338 Variable *T = nullptr;
2339 _mov(T, Zero);
2340 _mov(DestHi, T);
Jan Voungf645d852015-07-09 10:35:09 -07002341 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002342 return;
2343 }
2344 case Intrinsics::Ctlz: {
Jan Voungf645d852015-07-09 10:35:09 -07002345 // The "is zero undef" parameter is ignored and we always return
2346 // a well-defined value.
2347 Operand *Val = Instr->getArg(0);
2348 Variable *ValLoR;
2349 Variable *ValHiR = nullptr;
2350 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002351 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002352 ValLoR = legalizeToReg(loOperand(Val));
2353 ValHiR = legalizeToReg(hiOperand(Val));
Jan Voungf645d852015-07-09 10:35:09 -07002354 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002355 ValLoR = legalizeToReg(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002356 }
2357 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002358 return;
2359 }
2360 case Intrinsics::Cttz: {
Jan Voungf645d852015-07-09 10:35:09 -07002361 // Essentially like Clz, but reverse the bits first.
2362 Operand *Val = Instr->getArg(0);
2363 Variable *ValLoR;
2364 Variable *ValHiR = nullptr;
2365 if (Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002366 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07002367 ValLoR = legalizeToReg(loOperand(Val));
2368 ValHiR = legalizeToReg(hiOperand(Val));
Jan Voungf645d852015-07-09 10:35:09 -07002369 Variable *TLo = makeReg(IceType_i32);
2370 Variable *THi = makeReg(IceType_i32);
2371 _rbit(TLo, ValLoR);
2372 _rbit(THi, ValHiR);
2373 ValLoR = THi;
2374 ValHiR = TLo;
2375 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002376 ValLoR = legalizeToReg(Val);
Jan Voungf645d852015-07-09 10:35:09 -07002377 Variable *T = makeReg(IceType_i32);
2378 _rbit(T, ValLoR);
2379 ValLoR = T;
2380 }
2381 lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002382 return;
2383 }
2384 case Intrinsics::Fabs: {
Jan Voung86ebec12015-08-09 07:58:35 -07002385 // Add a fake def to keep liveness consistent in the meantime.
2386 Context.insert(InstFakeDef::create(Func, Instr->getDest()));
Jan Voungb2d50842015-05-12 09:53:50 -07002387 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002388 return;
2389 }
2390 case Intrinsics::Longjmp: {
2391 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
2392 Call->addArg(Instr->getArg(0));
2393 Call->addArg(Instr->getArg(1));
2394 lowerCall(Call);
2395 return;
2396 }
2397 case Intrinsics::Memcpy: {
2398 // In the future, we could potentially emit an inline memcpy/memset, etc.
2399 // for intrinsic calls w/ a known length.
2400 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
2401 Call->addArg(Instr->getArg(0));
2402 Call->addArg(Instr->getArg(1));
2403 Call->addArg(Instr->getArg(2));
2404 lowerCall(Call);
2405 return;
2406 }
2407 case Intrinsics::Memmove: {
2408 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
2409 Call->addArg(Instr->getArg(0));
2410 Call->addArg(Instr->getArg(1));
2411 Call->addArg(Instr->getArg(2));
2412 lowerCall(Call);
2413 return;
2414 }
2415 case Intrinsics::Memset: {
Jan Voungf645d852015-07-09 10:35:09 -07002416 // The value operand needs to be extended to a stack slot size because the
2417 // PNaCl ABI requires arguments to be at least 32 bits wide.
Jan Voungb36ad9b2015-04-21 17:01:49 -07002418 Operand *ValOp = Instr->getArg(1);
2419 assert(ValOp->getType() == IceType_i8);
2420 Variable *ValExt = Func->makeVariable(stackSlotType());
2421 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
Jan Voungf645d852015-07-09 10:35:09 -07002422 // Technically, ARM has their own __aeabi_memset, but we can use plain
2423 // memset too. The value and size argument need to be flipped if we ever
2424 // decide to use __aeabi_memset.
Jan Voungb36ad9b2015-04-21 17:01:49 -07002425 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
2426 Call->addArg(Instr->getArg(0));
2427 Call->addArg(ValExt);
2428 Call->addArg(Instr->getArg(2));
2429 lowerCall(Call);
2430 return;
2431 }
2432 case Intrinsics::NaClReadTP: {
2433 if (Ctx->getFlags().getUseSandboxing()) {
Jan Voungb2d50842015-05-12 09:53:50 -07002434 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002435 } else {
2436 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
2437 lowerCall(Call);
2438 }
2439 return;
2440 }
2441 case Intrinsics::Setjmp: {
2442 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
2443 Call->addArg(Instr->getArg(0));
2444 lowerCall(Call);
2445 return;
2446 }
2447 case Intrinsics::Sqrt: {
Jan Voung86ebec12015-08-09 07:58:35 -07002448 Variable *Src = legalizeToReg(Instr->getArg(0));
2449 Variable *Dest = Instr->getDest();
2450 Variable *T = makeReg(Dest->getType());
2451 _vsqrt(T, Src);
2452 _vmov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002453 return;
2454 }
2455 case Intrinsics::Stacksave: {
Jan Voungf645d852015-07-09 10:35:09 -07002456 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2457 Variable *Dest = Instr->getDest();
2458 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002459 return;
2460 }
2461 case Intrinsics::Stackrestore: {
Jan Voungf645d852015-07-09 10:35:09 -07002462 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2463 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
2464 _mov_nonkillable(SP, Val);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002465 return;
2466 }
2467 case Intrinsics::Trap:
Jan Voungf645d852015-07-09 10:35:09 -07002468 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002469 return;
2470 case Intrinsics::UnknownIntrinsic:
2471 Func->setError("Should not be lowering UnknownIntrinsic");
2472 return;
2473 }
2474 return;
2475}
2476
Jan Voungf645d852015-07-09 10:35:09 -07002477void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
2478 Type Ty = Dest->getType();
2479 assert(Ty == IceType_i32 || Ty == IceType_i64);
2480 Variable *T = makeReg(IceType_i32);
2481 _clz(T, ValLoR);
2482 if (Ty == IceType_i64) {
2483 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2484 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2485 Operand *Zero =
2486 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2487 Operand *ThirtyTwo =
2488 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2489 _cmp(ValHiR, Zero);
2490 Variable *T2 = makeReg(IceType_i32);
2491 _add(T2, T, ThirtyTwo);
2492 _clz(T2, ValHiR, CondARM32::NE);
2493 // T2 is actually a source as well when the predicate is not AL
2494 // (since it may leave T2 alone). We use set_dest_nonkillable to
2495 // prolong the liveness of T2 as if it was used as a source.
2496 _set_dest_nonkillable();
2497 _mov(DestLo, T2);
Jan Voung28068ad2015-07-31 12:58:46 -07002498 Variable *T3 = nullptr;
2499 _mov(T3, Zero);
2500 _mov(DestHi, T3);
Jan Voungf645d852015-07-09 10:35:09 -07002501 return;
2502 }
2503 _mov(Dest, T);
2504 return;
2505}
2506
Jan Voungbefd03a2015-06-02 11:03:03 -07002507void TargetARM32::lowerLoad(const InstLoad *Load) {
2508 // A Load instruction can be treated the same as an Assign
2509 // instruction, after the source operand is transformed into an
2510 // OperandARM32Mem operand.
2511 Type Ty = Load->getDest()->getType();
2512 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2513 Variable *DestLoad = Load->getDest();
2514
2515 // TODO(jvoung): handled folding opportunities. Sign and zero extension
2516 // can be folded into a load.
2517 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
2518 lowerAssign(Assign);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002519}
2520
2521void TargetARM32::doAddressOptLoad() {
Jan Voungb2d50842015-05-12 09:53:50 -07002522 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002523}
2524
Qining Luaee5fa82015-08-20 14:59:03 -07002525void TargetARM32::randomlyInsertNop(float Probability,
2526 RandomNumberGenerator &RNG) {
2527 RandomNumberGeneratorWrapper RNGW(RNG);
2528 if (RNGW.getTrueWithProbability(Probability)) {
Jan Voungb2d50842015-05-12 09:53:50 -07002529 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002530 }
2531}
2532
2533void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
2534 Func->setError("Phi found in regular instruction list");
2535}
2536
2537void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voungb2d50842015-05-12 09:53:50 -07002538 Variable *Reg = nullptr;
2539 if (Inst->hasRetValue()) {
Jan Voungb3401d22015-05-18 09:38:21 -07002540 Operand *Src0 = Inst->getRetValue();
Jan Voung86ebec12015-08-09 07:58:35 -07002541 Type Ty = Src0->getType();
2542 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002543 Src0 = legalizeUndef(Src0);
Andrew Scull97f460d2015-07-21 10:07:42 -07002544 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
2545 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
Jan Voungb3401d22015-05-18 09:38:21 -07002546 Reg = R0;
2547 Context.insert(InstFakeUse::create(Func, R1));
Jan Voung86ebec12015-08-09 07:58:35 -07002548 } else if (Ty == IceType_f32) {
2549 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
2550 Reg = S0;
2551 } else if (Ty == IceType_f64) {
2552 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
2553 Reg = D0;
Jan Voungb3401d22015-05-18 09:38:21 -07002554 } else if (isVectorType(Src0->getType())) {
Jan Voung86ebec12015-08-09 07:58:35 -07002555 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
2556 Reg = Q0;
Jan Voungb3401d22015-05-18 09:38:21 -07002557 } else {
2558 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
Jan Voung3bfd99a2015-05-22 16:35:25 -07002559 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voungb3401d22015-05-18 09:38:21 -07002560 }
Jan Voungb2d50842015-05-12 09:53:50 -07002561 }
2562 // Add a ret instruction even if sandboxing is enabled, because
2563 // addEpilog explicitly looks for a ret instruction as a marker for
2564 // where to insert the frame removal instructions.
2565 // addEpilog is responsible for restoring the "lr" register as needed
2566 // prior to this ret instruction.
2567 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2568 // Add a fake use of sp to make sure sp stays alive for the entire
2569 // function. Otherwise post-call sp adjustments get dead-code
2570 // eliminated. TODO: Are there more places where the fake use
2571 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2572 // have a ret instruction.
Jan Voungf645d852015-07-09 10:35:09 -07002573 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Jan Voungb2d50842015-05-12 09:53:50 -07002574 Context.insert(InstFakeUse::create(Func, SP));
Jan Voungb36ad9b2015-04-21 17:01:49 -07002575}
2576
2577void TargetARM32::lowerSelect(const InstSelect *Inst) {
Jan Vounge0df91f2015-06-30 08:47:06 -07002578 Variable *Dest = Inst->getDest();
2579 Type DestTy = Dest->getType();
2580 Operand *SrcT = Inst->getTrueOperand();
2581 Operand *SrcF = Inst->getFalseOperand();
2582 Operand *Condition = Inst->getCondition();
2583
2584 if (isVectorType(DestTy)) {
2585 UnimplementedError(Func->getContext()->getFlags());
2586 return;
2587 }
2588 if (isFloatingType(DestTy)) {
2589 UnimplementedError(Func->getContext()->getFlags());
2590 return;
2591 }
2592 // TODO(jvoung): handle folding opportunities.
2593 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
Andrew Scull97f460d2015-07-21 10:07:42 -07002594 Variable *CmpOpnd0 = legalizeToReg(Condition);
Jan Vounge0df91f2015-06-30 08:47:06 -07002595 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
2596 _cmp(CmpOpnd0, CmpOpnd1);
2597 CondARM32::Cond Cond = CondARM32::NE;
2598 if (DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002599 SrcT = legalizeUndef(SrcT);
2600 SrcF = legalizeUndef(SrcF);
Jan Vounge0df91f2015-06-30 08:47:06 -07002601 // Set the low portion.
2602 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2603 Variable *TLo = nullptr;
2604 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
2605 _mov(TLo, SrcFLo);
2606 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
2607 _mov_nonkillable(TLo, SrcTLo, Cond);
2608 _mov(DestLo, TLo);
2609 // Set the high portion.
2610 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2611 Variable *THi = nullptr;
2612 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
2613 _mov(THi, SrcFHi);
2614 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
2615 _mov_nonkillable(THi, SrcTHi, Cond);
2616 _mov(DestHi, THi);
2617 return;
2618 }
2619 Variable *T = nullptr;
2620 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
2621 _mov(T, SrcF);
2622 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
2623 _mov_nonkillable(T, SrcT, Cond);
2624 _mov(Dest, T);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002625}
2626
2627void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voungbefd03a2015-06-02 11:03:03 -07002628 Operand *Value = Inst->getData();
2629 Operand *Addr = Inst->getAddr();
2630 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
2631 Type Ty = NewAddr->getType();
2632
2633 if (Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002634 Value = legalizeUndef(Value);
Andrew Scull97f460d2015-07-21 10:07:42 -07002635 Variable *ValueHi = legalizeToReg(hiOperand(Value));
2636 Variable *ValueLo = legalizeToReg(loOperand(Value));
Jan Voungbefd03a2015-06-02 11:03:03 -07002637 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
2638 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
2639 } else if (isVectorType(Ty)) {
2640 UnimplementedError(Func->getContext()->getFlags());
2641 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002642 Variable *ValueR = legalizeToReg(Value);
Jan Voungbefd03a2015-06-02 11:03:03 -07002643 _str(ValueR, NewAddr);
2644 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002645}
2646
2647void TargetARM32::doAddressOptStore() {
Jan Voungb2d50842015-05-12 09:53:50 -07002648 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002649}
2650
2651void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
Andrew Scullfdc54db2015-06-29 11:21:18 -07002652 // This implements the most naive possible lowering.
2653 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
2654 Operand *Src0 = Inst->getComparison();
2655 SizeT NumCases = Inst->getNumCases();
2656 if (Src0->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002657 Src0 = legalizeUndef(Src0);
Andrew Scull97f460d2015-07-21 10:07:42 -07002658 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
2659 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
Andrew Scullfdc54db2015-06-29 11:21:18 -07002660 for (SizeT I = 0; I < NumCases; ++I) {
2661 Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
2662 Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
2663 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
2664 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
2665 _cmp(Src0Lo, ValueLo);
2666 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
2667 _br(Inst->getLabel(I), CondARM32::EQ);
2668 }
2669 _br(Inst->getLabelDefault());
2670 return;
2671 }
Jan Vounge0df91f2015-06-30 08:47:06 -07002672
Andrew Scullfdc54db2015-06-29 11:21:18 -07002673 // 32 bit integer
Andrew Scull97f460d2015-07-21 10:07:42 -07002674 Variable *Src0Var = legalizeToReg(Src0);
Andrew Scullfdc54db2015-06-29 11:21:18 -07002675 for (SizeT I = 0; I < NumCases; ++I) {
2676 Operand *Value = Ctx->getConstantInt32(Inst->getValue(I));
2677 Value = legalize(Value, Legal_Reg | Legal_Flex);
2678 _cmp(Src0Var, Value);
2679 _br(Inst->getLabel(I), CondARM32::EQ);
2680 }
2681 _br(Inst->getLabelDefault());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002682}
2683
2684void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
Jan Voung6ec369e2015-06-30 11:03:15 -07002685 _trap();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002686}
2687
Jan Voungb36ad9b2015-04-21 17:01:49 -07002688void TargetARM32::prelowerPhis() {
Jan Voung53483692015-07-16 10:47:46 -07002689 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002690}
2691
Jan Voungb3401d22015-05-18 09:38:21 -07002692Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
2693 Variable *Reg = makeReg(Ty, RegNum);
2694 UnimplementedError(Func->getContext()->getFlags());
2695 return Reg;
2696}
2697
2698// Helper for legalize() to emit the right code to lower an operand to a
2699// register of the appropriate type.
2700Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2701 Type Ty = Src->getType();
2702 Variable *Reg = makeReg(Ty, RegNum);
Jan Voung86ebec12015-08-09 07:58:35 -07002703 if (isVectorType(Ty) || isFloatingType(Ty)) {
2704 _vmov(Reg, Src);
Jan Voungb3401d22015-05-18 09:38:21 -07002705 } else {
2706 // Mov's Src operand can really only be the flexible second operand type
2707 // or a register. Users should guarantee that.
2708 _mov(Reg, Src);
2709 }
2710 return Reg;
2711}
2712
2713Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2714 int32_t RegNum) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002715 Type Ty = From->getType();
Jan Voungb3401d22015-05-18 09:38:21 -07002716 // Assert that a physical register is allowed. To date, all calls
2717 // to legalize() allow a physical register. Legal_Flex converts
2718 // registers to the right type OperandARM32FlexReg as needed.
2719 assert(Allowed & Legal_Reg);
2720 // Go through the various types of operands:
2721 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
2722 // Given the above assertion, if type of operand is not legal
2723 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
2724 // to a register.
2725 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
2726 // Before doing anything with a Mem operand, we need to ensure
2727 // that the Base and Index components are in physical registers.
2728 Variable *Base = Mem->getBase();
2729 Variable *Index = Mem->getIndex();
2730 Variable *RegBase = nullptr;
2731 Variable *RegIndex = nullptr;
2732 if (Base) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002733 RegBase = legalizeToReg(Base);
Jan Voungb3401d22015-05-18 09:38:21 -07002734 }
2735 if (Index) {
Andrew Scull97f460d2015-07-21 10:07:42 -07002736 RegIndex = legalizeToReg(Index);
Jan Voungb3401d22015-05-18 09:38:21 -07002737 }
2738 // Create a new operand if there was a change.
2739 if (Base != RegBase || Index != RegIndex) {
2740 // There is only a reg +/- reg or reg + imm form.
2741 // Figure out which to re-create.
2742 if (Mem->isRegReg()) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002743 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
Jan Voungb3401d22015-05-18 09:38:21 -07002744 Mem->getShiftOp(), Mem->getShiftAmt(),
2745 Mem->getAddrMode());
2746 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07002747 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Mem->getOffset(),
2748 Mem->getAddrMode());
Jan Voungb3401d22015-05-18 09:38:21 -07002749 }
2750 }
2751 if (!(Allowed & Legal_Mem)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002752 Variable *Reg = makeReg(Ty, RegNum);
Jan Voung86ebec12015-08-09 07:58:35 -07002753 if (isVectorType(Ty)) {
2754 UnimplementedError(Func->getContext()->getFlags());
2755 } else if (isFloatingType(Ty)) {
2756 _vldr(Reg, Mem);
2757 } else {
2758 _ldr(Reg, Mem);
2759 }
Jan Voungb3401d22015-05-18 09:38:21 -07002760 From = Reg;
2761 } else {
2762 From = Mem;
2763 }
2764 return From;
2765 }
2766
2767 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2768 if (!(Allowed & Legal_Flex)) {
2769 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2770 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2771 From = FlexReg->getReg();
2772 // Fall through and let From be checked as a Variable below,
2773 // where it may or may not need a register.
2774 } else {
2775 return copyToReg(Flex, RegNum);
2776 }
2777 } else {
2778 return copyToReg(Flex, RegNum);
2779 }
2780 } else {
2781 return From;
2782 }
2783 }
2784
2785 if (llvm::isa<Constant>(From)) {
2786 if (llvm::isa<ConstantUndef>(From)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002787 From = legalizeUndef(From, RegNum);
2788 if (isVectorType(Ty))
2789 return From;
Jan Voungb3401d22015-05-18 09:38:21 -07002790 }
2791 // There should be no constants of vector type (other than undef).
Jan Voungfbdd2442015-07-15 12:36:20 -07002792 assert(!isVectorType(Ty));
Jan Voungb3401d22015-05-18 09:38:21 -07002793 bool CanBeFlex = Allowed & Legal_Flex;
Jan Voungfbdd2442015-07-15 12:36:20 -07002794 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002795 uint32_t RotateAmt;
2796 uint32_t Immed_8;
2797 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2798 // Check if the immediate will fit in a Flexible second operand,
2799 // if a Flexible second operand is allowed. We need to know the exact
2800 // value, so that rules out relocatable constants.
2801 // Also try the inverse and use MVN if possible.
2802 if (CanBeFlex &&
2803 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002804 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voungb3401d22015-05-18 09:38:21 -07002805 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2806 ~Value, &RotateAmt, &Immed_8)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07002807 auto InvertedFlex =
2808 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
Jan Voungb3401d22015-05-18 09:38:21 -07002809 Variable *Reg = makeReg(Ty, RegNum);
2810 _mvn(Reg, InvertedFlex);
2811 return Reg;
2812 } else {
2813 // Do a movw/movt to a register.
Jan Voungb3401d22015-05-18 09:38:21 -07002814 Variable *Reg = makeReg(Ty, RegNum);
2815 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2816 _movw(Reg,
2817 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2818 if (UpperBits != 0) {
2819 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2820 }
2821 return Reg;
2822 }
Jan Voungfbdd2442015-07-15 12:36:20 -07002823 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
Jan Voungb3401d22015-05-18 09:38:21 -07002824 Variable *Reg = makeReg(Ty, RegNum);
2825 _movw(Reg, C);
2826 _movt(Reg, C);
2827 return Reg;
2828 } else {
Jan Voung86ebec12015-08-09 07:58:35 -07002829 assert(isScalarFloatingType(Ty));
Jan Voungb3401d22015-05-18 09:38:21 -07002830 // Load floats/doubles from literal pool.
Jan Voung86ebec12015-08-09 07:58:35 -07002831 // TODO(jvoung): Allow certain immediates to be encoded directly in
2832 // an operand. See Table A7-18 of the ARM manual:
2833 // "Floating-point modified immediate constants".
2834 // Or, for 32-bit floating point numbers, just encode the raw bits
2835 // into a movw/movt pair to GPR, and vmov to an SREG, instead of using
2836 // a movw/movt pair to get the const-pool address then loading to SREG.
2837 std::string Buffer;
2838 llvm::raw_string_ostream StrBuf(Buffer);
2839 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
2840 llvm::cast<Constant>(From)->setShouldBePooled(true);
2841 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
2842 Variable *BaseReg = makeReg(getPointerType());
2843 _movw(BaseReg, Offset);
2844 _movt(BaseReg, Offset);
2845 From = formMemoryOperand(BaseReg, Ty);
2846 return copyToReg(From, RegNum);
Jan Voungb3401d22015-05-18 09:38:21 -07002847 }
Jan Voungb3401d22015-05-18 09:38:21 -07002848 }
2849
2850 if (auto Var = llvm::dyn_cast<Variable>(From)) {
2851 // Check if the variable is guaranteed a physical register. This
2852 // can happen either when the variable is pre-colored or when it is
2853 // assigned infinite weight.
Andrew Scull11c9a322015-08-28 14:24:14 -07002854 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
Jan Voungb3401d22015-05-18 09:38:21 -07002855 // We need a new physical register for the operand if:
2856 // Mem is not allowed and Var isn't guaranteed a physical
2857 // register, or
2858 // RegNum is required and Var->getRegNum() doesn't match.
2859 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2860 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2861 From = copyToReg(From, RegNum);
2862 }
2863 return From;
2864 }
2865 llvm_unreachable("Unhandled operand kind in legalize()");
2866
2867 return From;
2868}
2869
Jan Voungfbdd2442015-07-15 12:36:20 -07002870/// Provide a trivial wrapper to legalize() for this common usage.
Andrew Scull97f460d2015-07-21 10:07:42 -07002871Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {
Jan Voungb3401d22015-05-18 09:38:21 -07002872 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
2873}
2874
Jan Voungfbdd2442015-07-15 12:36:20 -07002875/// Legalize undef values to concrete values.
2876Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
2877 Type Ty = From->getType();
2878 if (llvm::isa<ConstantUndef>(From)) {
2879 // Lower undefs to zero. Another option is to lower undefs to an
2880 // uninitialized register; however, using an uninitialized register
2881 // results in less predictable code.
2882 //
2883 // If in the future the implementation is changed to lower undef
2884 // values to uninitialized registers, a FakeDef will be needed:
2885 // Context.insert(InstFakeDef::create(Func, Reg));
2886 // This is in order to ensure that the live range of Reg is not
2887 // overestimated. If the constant being lowered is a 64 bit value,
2888 // then the result should be split and the lo and hi components will
2889 // need to go in uninitialized registers.
2890 if (isVectorType(Ty))
2891 return makeVectorOfZeros(Ty, RegNum);
2892 return Ctx->getConstantZero(Ty);
2893 }
2894 return From;
2895}
2896
Jan Voungbefd03a2015-06-02 11:03:03 -07002897OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
2898 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
2899 // It may be the case that address mode optimization already creates
2900 // an OperandARM32Mem, so in that case it wouldn't need another level
2901 // of transformation.
2902 if (Mem) {
2903 return llvm::cast<OperandARM32Mem>(legalize(Mem));
2904 }
2905 // If we didn't do address mode optimization, then we only
2906 // have a base/offset to work with. ARM always requires a base
2907 // register, so just use that to hold the operand.
Andrew Scull97f460d2015-07-21 10:07:42 -07002908 Variable *Base = legalizeToReg(Operand);
Jan Voungbefd03a2015-06-02 11:03:03 -07002909 return OperandARM32Mem::create(
2910 Func, Ty, Base,
2911 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
2912}
2913
Jan Voungb3401d22015-05-18 09:38:21 -07002914Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
2915 // There aren't any 64-bit integer registers for ARM32.
2916 assert(Type != IceType_i64);
2917 Variable *Reg = Func->makeVariable(Type);
2918 if (RegNum == Variable::NoRegister)
Andrew Scull11c9a322015-08-28 14:24:14 -07002919 Reg->setMustHaveReg();
Jan Voungb3401d22015-05-18 09:38:21 -07002920 else
2921 Reg->setRegNum(RegNum);
2922 return Reg;
2923}
2924
Jan Voung55500db2015-05-26 14:25:40 -07002925void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
2926 assert(llvm::isPowerOf2_32(Align));
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002927 uint32_t RotateAmt;
Jan Voung55500db2015-05-26 14:25:40 -07002928 uint32_t Immed_8;
2929 Operand *Mask;
2930 // Use AND or BIC to mask off the bits, depending on which immediate fits
2931 // (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002932 // works better. Thus, this rounds down to the alignment.
Jan Voung55500db2015-05-26 14:25:40 -07002933 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
2934 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
2935 _bic(Reg, Reg, Mask);
2936 } else {
2937 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
2938 _and(Reg, Reg, Mask);
2939 }
2940}
2941
Jan Voungb36ad9b2015-04-21 17:01:49 -07002942void TargetARM32::postLower() {
2943 if (Ctx->getFlags().getOptLevel() == Opt_m1)
2944 return;
Jan Voungb3401d22015-05-18 09:38:21 -07002945 inferTwoAddress();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002946}
2947
2948void TargetARM32::makeRandomRegisterPermutation(
2949 llvm::SmallVectorImpl<int32_t> &Permutation,
Qining Luaee5fa82015-08-20 14:59:03 -07002950 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002951 (void)Permutation;
2952 (void)ExcludeRegisters;
Qining Luaee5fa82015-08-20 14:59:03 -07002953 (void)Salt;
Jan Voungb2d50842015-05-12 09:53:50 -07002954 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002955}
2956
Jan Voung76bb0be2015-05-14 09:26:19 -07002957void TargetARM32::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07002958 if (!BuildDefs::dump())
Jan Voung76bb0be2015-05-14 09:26:19 -07002959 return;
2960 Ostream &Str = Ctx->getStrEmit();
2961 Str << getConstantPrefix() << C->getValue();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002962}
2963
Jan Voung76bb0be2015-05-14 09:26:19 -07002964void TargetARM32::emit(const ConstantInteger64 *) const {
2965 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voungb36ad9b2015-04-21 17:01:49 -07002966}
Jan Voung76bb0be2015-05-14 09:26:19 -07002967
2968void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002969 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002970 UnimplementedError(Ctx->getFlags());
2971}
2972
2973void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002974 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002975 UnimplementedError(Ctx->getFlags());
2976}
2977
2978void TargetARM32::emit(const ConstantUndef *) const {
2979 llvm::report_fatal_error("undef value encountered by emitter.");
2980}
Jan Voungb36ad9b2015-04-21 17:01:49 -07002981
2982TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
2983 : TargetDataLowering(Ctx) {}
2984
John Porto8b1a7052015-06-17 13:20:08 -07002985void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
2986 const IceString &SectionSuffix) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002987 switch (Ctx->getFlags().getOutFileType()) {
2988 case FT_Elf: {
2989 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07002990 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002991 } break;
2992 case FT_Asm:
2993 case FT_Iasm: {
2994 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
2995 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07002996 for (const VariableDeclaration *Var : Vars) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002997 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07002998 emitGlobal(*Var, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002999 }
3000 }
3001 } break;
3002 }
3003}
3004
John Porto0f86d032015-06-15 07:44:27 -07003005void TargetDataARM32::lowerConstants() {
Jan Voungb36ad9b2015-04-21 17:01:49 -07003006 if (Ctx->getFlags().getDisableTranslation())
3007 return;
Jan Voungb2d50842015-05-12 09:53:50 -07003008 UnimplementedError(Ctx->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07003009}
3010
Andrew Scull86df4e92015-07-30 13:54:44 -07003011void TargetDataARM32::lowerJumpTables() {
3012 if (Ctx->getFlags().getDisableTranslation())
3013 return;
3014 UnimplementedError(Ctx->getFlags());
3015}
3016
Jan Voungfb792842015-06-11 15:27:50 -07003017TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
Jan Voung6ec369e2015-06-30 11:03:15 -07003018 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
Jan Voungfb792842015-06-11 15:27:50 -07003019
3020void TargetHeaderARM32::lower() {
3021 OstreamLocker L(Ctx);
3022 Ostream &Str = Ctx->getStrEmit();
3023 Str << ".syntax unified\n";
3024 // Emit build attributes in format: .eabi_attribute TAG, VALUE.
3025 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
3026 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
3027 //
3028 // Tag_conformance should be be emitted first in a file-scope
3029 // sub-subsection of the first public subsection of the attributes.
3030 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
3031 // Chromebooks are at least A15, but do A9 for higher compat.
Jan Voung6ec369e2015-06-30 11:03:15 -07003032 // For some reason, the LLVM ARM asm parser has the .cpu directive override
3033 // the mattr specified on the commandline. So to test hwdiv, we need to set
3034 // the .cpu directive higher (can't just rely on --mattr=...).
3035 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3036 Str << ".cpu cortex-a15\n";
3037 } else {
3038 Str << ".cpu cortex-a9\n";
3039 }
3040 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
Jan Voungfb792842015-06-11 15:27:50 -07003041 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
3042 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
3043 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
Jan Voungfb792842015-06-11 15:27:50 -07003044 Str << ".fpu neon\n"
3045 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
3046 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
3047 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
3048 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
3049 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
3050 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
3051 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
3052 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
3053 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
3054 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
3055 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
3056 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
Jan Voung6ec369e2015-06-30 11:03:15 -07003057 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3058 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3059 }
Jan Voungfb792842015-06-11 15:27:50 -07003060 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3061 // However, for compatibility with current NaCl LLVM, don't claim that.
3062 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3063}
3064
Jan Voungb36ad9b2015-04-21 17:01:49 -07003065} // end of namespace Ice