blob: de39aacab7efd8ce8e23fa5f568c3126a22807cc [file] [log] [blame]
Jan Voungb36ad9b2015-04-21 17:01:49 -07001//===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TargetLoweringARM32 class, which consists almost
11// entirely of the lowering sequence for each high-level instruction.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Support/MathExtras.h"
16
17#include "IceCfg.h"
18#include "IceCfgNode.h"
19#include "IceClFlags.h"
20#include "IceDefs.h"
21#include "IceELFObjectWriter.h"
22#include "IceGlobalInits.h"
23#include "IceInstARM32.h"
24#include "IceLiveness.h"
25#include "IceOperand.h"
26#include "IceRegistersARM32.h"
27#include "IceTargetLoweringARM32.def"
28#include "IceTargetLoweringARM32.h"
29#include "IceUtils.h"
30
31namespace Ice {
32
Jan Voungb2d50842015-05-12 09:53:50 -070033namespace {
Jan Voung3bfd99a2015-05-22 16:35:25 -070034
Jan Voungb2d50842015-05-12 09:53:50 -070035void UnimplementedError(const ClFlags &Flags) {
36 if (!Flags.getSkipUnimplemented()) {
37 // Use llvm_unreachable instead of report_fatal_error, which gives better
38 // stack traces.
39 llvm_unreachable("Not yet implemented");
40 abort();
41 }
42}
Jan Voungb3401d22015-05-18 09:38:21 -070043
Jan Voung3bfd99a2015-05-22 16:35:25 -070044// The following table summarizes the logic for lowering the icmp instruction
45// for i32 and narrower types. Each icmp condition has a clear mapping to an
46// ARM32 conditional move instruction.
47
48const struct TableIcmp32_ {
49 CondARM32::Cond Mapping;
50} TableIcmp32[] = {
51#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
52 { CondARM32::C_32 } \
53 ,
54 ICMPARM32_TABLE
55#undef X
56};
57const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
58
59// The following table summarizes the logic for lowering the icmp instruction
60// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
61// The operands may need to be swapped, and there is a slight difference
62// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
63const struct TableIcmp64_ {
64 bool IsSigned;
65 bool Swapped;
66 CondARM32::Cond C1, C2;
67} TableIcmp64[] = {
68#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
69 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
70 ,
71 ICMPARM32_TABLE
72#undef X
73};
74const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
75
76CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
77 size_t Index = static_cast<size_t>(Cond);
78 assert(Index < TableIcmp32Size);
79 return TableIcmp32[Index].Mapping;
80}
81
82// In some cases, there are x-macros tables for both high-level and
83// low-level instructions/operands that use the same enum key value.
84// The tables are kept separate to maintain a proper separation
85// between abstraction layers. There is a risk that the tables could
86// get out of sync if enum values are reordered or if entries are
87// added or deleted. The following dummy namespaces use
88// static_asserts to ensure everything is kept in sync.
89
90// Validate the enum values in ICMPARM32_TABLE.
91namespace dummy1 {
92// Define a temporary set of enum values based on low-level table
93// entries.
94enum _tmp_enum {
95#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
96 ICMPARM32_TABLE
97#undef X
98 _num
99};
100// Define a set of constants based on high-level table entries.
101#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
102ICEINSTICMP_TABLE
103#undef X
104// Define a set of constants based on low-level table entries, and
105// ensure the table entry keys are consistent.
106#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
107 static const int _table2_##val = _tmp_##val; \
108 static_assert( \
109 _table1_##val == _table2_##val, \
110 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
111ICMPARM32_TABLE
112#undef X
113// Repeat the static asserts with respect to the high-level table
114// entries in case the high-level table has extra entries.
115#define X(tag, str) \
116 static_assert( \
117 _table1_##tag == _table2_##tag, \
118 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
119ICEINSTICMP_TABLE
120#undef X
121} // end of namespace dummy1
122
Jan Voung55500db2015-05-26 14:25:40 -0700123// Stack alignment
124const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
125
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700126// Value is in bytes. Return Value adjusted to the next highest multiple
127// of the stack alignment.
128uint32_t applyStackAlignment(uint32_t Value) {
129 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
130}
131
Jan Voungb0a8c242015-06-18 15:00:14 -0700132// Value is in bytes. Return Value adjusted to the next highest multiple
133// of the stack alignment required for the given type.
134uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
135 // Use natural alignment, except that normally (non-NaCl) ARM only
136 // aligns vectors to 8 bytes.
137 // TODO(jvoung): Check this ...
138 size_t typeAlignInBytes = typeWidthInBytes(Ty);
139 if (isVectorType(Ty))
140 typeAlignInBytes = 8;
141 return Utils::applyAlignment(Value, typeAlignInBytes);
142}
143
Jan Voungb2d50842015-05-12 09:53:50 -0700144} // end of anonymous namespace
145
Jim Stichnotheafb56c2015-06-22 10:35:22 -0700146TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) {
Jan Voungd062f732015-06-15 17:17:31 -0700147 static_assert(
148 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
149 (TargetInstructionSet::ARM32InstructionSet_End -
150 TargetInstructionSet::ARM32InstructionSet_Begin),
151 "ARM32InstructionSet range different from TargetInstructionSet");
152 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
153 TargetInstructionSet::BaseInstructionSet) {
154 InstructionSet = static_cast<ARM32InstructionSet>(
155 (Func->getContext()->getFlags().getTargetInstructionSet() -
156 TargetInstructionSet::ARM32InstructionSet_Begin) +
157 ARM32InstructionSet::Begin);
158 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700159 // TODO: Don't initialize IntegerRegisters and friends every time.
160 // Instead, initialize in some sort of static initializer for the
161 // class.
162 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
163 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
164 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
165 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
166 ScratchRegs.resize(RegARM32::Reg_NUM);
167#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
168 isFP) \
169 IntegerRegisters[RegARM32::val] = isInt; \
170 FloatRegisters[RegARM32::val] = isFP; \
171 VectorRegisters[RegARM32::val] = isFP; \
172 ScratchRegs[RegARM32::val] = scratch;
173 REGARM32_TABLE;
174#undef X
175 TypeToRegisterSet[IceType_void] = InvalidRegisters;
176 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
177 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
178 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
179 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
180 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
181 TypeToRegisterSet[IceType_f32] = FloatRegisters;
182 TypeToRegisterSet[IceType_f64] = FloatRegisters;
183 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
184 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
185 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
186 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
187 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
188 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
189 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
190}
191
192void TargetARM32::translateO2() {
193 TimerMarker T(TimerStack::TT_O2, Func);
194
195 // TODO(stichnot): share passes with X86?
196 // https://code.google.com/p/nativeclient/issues/detail?id=4094
197
198 if (!Ctx->getFlags().getPhiEdgeSplit()) {
199 // Lower Phi instructions.
200 Func->placePhiLoads();
201 if (Func->hasError())
202 return;
203 Func->placePhiStores();
204 if (Func->hasError())
205 return;
206 Func->deletePhis();
207 if (Func->hasError())
208 return;
209 Func->dump("After Phi lowering");
210 }
211
212 // Address mode optimization.
213 Func->getVMetadata()->init(VMK_SingleDefs);
214 Func->doAddressOpt();
215
216 // Argument lowering
217 Func->doArgLowering();
218
219 // Target lowering. This requires liveness analysis for some parts
220 // of the lowering decisions, such as compare/branch fusing. If
221 // non-lightweight liveness analysis is used, the instructions need
222 // to be renumbered first. TODO: This renumbering should only be
223 // necessary if we're actually calculating live intervals, which we
224 // only do for register allocation.
225 Func->renumberInstructions();
226 if (Func->hasError())
227 return;
228
229 // TODO: It should be sufficient to use the fastest liveness
230 // calculation, i.e. livenessLightweight(). However, for some
231 // reason that slows down the rest of the translation. Investigate.
232 Func->liveness(Liveness_Basic);
233 if (Func->hasError())
234 return;
235 Func->dump("After ARM32 address mode opt");
236
237 Func->genCode();
238 if (Func->hasError())
239 return;
240 Func->dump("After ARM32 codegen");
241
242 // Register allocation. This requires instruction renumbering and
243 // full liveness analysis.
244 Func->renumberInstructions();
245 if (Func->hasError())
246 return;
247 Func->liveness(Liveness_Intervals);
248 if (Func->hasError())
249 return;
250 // Validate the live range computations. The expensive validation
251 // call is deliberately only made when assertions are enabled.
252 assert(Func->validateLiveness());
253 // The post-codegen dump is done here, after liveness analysis and
254 // associated cleanup, to make the dump cleaner and more useful.
255 Func->dump("After initial ARM32 codegen");
256 Func->getVMetadata()->init(VMK_All);
257 regAlloc(RAK_Global);
258 if (Func->hasError())
259 return;
260 Func->dump("After linear scan regalloc");
261
262 if (Ctx->getFlags().getPhiEdgeSplit()) {
263 Func->advancedPhiLowering();
264 Func->dump("After advanced Phi lowering");
265 }
266
267 // Stack frame mapping.
268 Func->genFrame();
269 if (Func->hasError())
270 return;
271 Func->dump("After stack frame mapping");
272
273 Func->contractEmptyNodes();
274 Func->reorderNodes();
275
276 // Branch optimization. This needs to be done just before code
277 // emission. In particular, no transformations that insert or
278 // reorder CfgNodes should be done after branch optimization. We go
279 // ahead and do it before nop insertion to reduce the amount of work
280 // needed for searching for opportunities.
281 Func->doBranchOpt();
282 Func->dump("After branch optimization");
283
284 // Nop insertion
285 if (Ctx->getFlags().shouldDoNopInsertion()) {
286 Func->doNopInsertion();
287 }
288}
289
290void TargetARM32::translateOm1() {
291 TimerMarker T(TimerStack::TT_Om1, Func);
292
293 // TODO: share passes with X86?
294
295 Func->placePhiLoads();
296 if (Func->hasError())
297 return;
298 Func->placePhiStores();
299 if (Func->hasError())
300 return;
301 Func->deletePhis();
302 if (Func->hasError())
303 return;
304 Func->dump("After Phi lowering");
305
306 Func->doArgLowering();
307
308 Func->genCode();
309 if (Func->hasError())
310 return;
311 Func->dump("After initial ARM32 codegen");
312
313 regAlloc(RAK_InfOnly);
314 if (Func->hasError())
315 return;
316 Func->dump("After regalloc of infinite-weight variables");
317
318 Func->genFrame();
319 if (Func->hasError())
320 return;
321 Func->dump("After stack frame mapping");
322
323 // Nop insertion
324 if (Ctx->getFlags().shouldDoNopInsertion()) {
325 Func->doNopInsertion();
326 }
327}
328
329bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
Jan Voung3bfd99a2015-05-22 16:35:25 -0700330 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
331 return Br->optimizeBranch(NextNode);
332 }
Jan Voungb2d50842015-05-12 09:53:50 -0700333 return false;
Jan Voungb36ad9b2015-04-21 17:01:49 -0700334}
335
336IceString TargetARM32::RegNames[] = {
337#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
338 isFP) \
339 name,
340 REGARM32_TABLE
341#undef X
342};
343
344IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
345 assert(RegNum < RegARM32::Reg_NUM);
346 (void)Ty;
347 return RegNames[RegNum];
348}
349
350Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
351 if (Ty == IceType_void)
352 Ty = IceType_i32;
353 if (PhysicalRegisters[Ty].empty())
354 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
355 assert(RegNum < PhysicalRegisters[Ty].size());
356 Variable *Reg = PhysicalRegisters[Ty][RegNum];
357 if (Reg == nullptr) {
358 Reg = Func->makeVariable(Ty);
359 Reg->setRegNum(RegNum);
360 PhysicalRegisters[Ty][RegNum] = Reg;
Jan Voungb2d50842015-05-12 09:53:50 -0700361 // Specially mark SP and LR as an "argument" so that it is considered
Jan Voungb36ad9b2015-04-21 17:01:49 -0700362 // live upon function entry.
Jan Voungb2d50842015-05-12 09:53:50 -0700363 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Jan Voungb36ad9b2015-04-21 17:01:49 -0700364 Func->addImplicitArg(Reg);
365 Reg->setIgnoreLiveness();
366 }
367 }
368 return Reg;
369}
370
371void TargetARM32::emitVariable(const Variable *Var) const {
372 Ostream &Str = Ctx->getStrEmit();
Jan Voungb2d50842015-05-12 09:53:50 -0700373 if (Var->hasReg()) {
374 Str << getRegName(Var->getRegNum(), Var->getType());
375 return;
376 }
377 if (Var->getWeight().isInf()) {
378 llvm::report_fatal_error(
379 "Infinite-weight Variable has no register assigned");
380 }
381 int32_t Offset = Var->getStackOffset();
382 if (!hasFramePointer())
383 Offset += getStackAdjustment();
384 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register
385 // to materialize a larger offset.
Jan Voungb0a8c242015-06-18 15:00:14 -0700386 constexpr bool SignExt = false;
Jan Voungb2d50842015-05-12 09:53:50 -0700387 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) {
388 llvm::report_fatal_error("Illegal stack offset");
389 }
390 const Type FrameSPTy = IceType_i32;
Jan Voungb3401d22015-05-18 09:38:21 -0700391 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy);
392 if (Offset != 0) {
393 Str << ", " << getConstantPrefix() << Offset;
394 }
395 Str << "]";
Jan Voungb36ad9b2015-04-21 17:01:49 -0700396}
397
Jan Voungb0a8c242015-06-18 15:00:14 -0700398bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
399 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
400 return false;
401 int32_t RegLo, RegHi;
402 // Always start i64 registers at an even register, so this may end
403 // up padding away a register.
404 if (NumGPRRegsUsed % 2 != 0) {
405 ++NumGPRRegsUsed;
406 }
407 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
408 ++NumGPRRegsUsed;
409 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
410 ++NumGPRRegsUsed;
411 // If this bumps us past the boundary, don't allocate to a register
412 // and leave any previously speculatively consumed registers as consumed.
413 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
414 return false;
415 Regs->first = RegLo;
416 Regs->second = RegHi;
417 return true;
418}
419
420bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
421 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
422 return false;
423 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
424 ++NumGPRRegsUsed;
425 return true;
426}
427
Jan Voungb36ad9b2015-04-21 17:01:49 -0700428void TargetARM32::lowerArguments() {
Jan Voungb3401d22015-05-18 09:38:21 -0700429 VarList &Args = Func->getArgs();
Jan Voungb0a8c242015-06-18 15:00:14 -0700430 TargetARM32::CallingConv CC;
Jan Voungb3401d22015-05-18 09:38:21 -0700431
432 // For each register argument, replace Arg in the argument list with the
433 // home register. Then generate an instruction in the prolog to copy the
434 // home register to the assigned location of Arg.
435 Context.init(Func->getEntryNode());
436 Context.setInsertPoint(Context.getCur());
437
438 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
439 Variable *Arg = Args[I];
440 Type Ty = Arg->getType();
441 // TODO(jvoung): handle float/vector types.
442 if (isVectorType(Ty)) {
443 UnimplementedError(Func->getContext()->getFlags());
444 continue;
445 } else if (isFloatingType(Ty)) {
446 UnimplementedError(Func->getContext()->getFlags());
447 continue;
448 } else if (Ty == IceType_i64) {
Jan Voungb0a8c242015-06-18 15:00:14 -0700449 std::pair<int32_t, int32_t> RegPair;
450 if (!CC.I64InRegs(&RegPair))
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700451 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700452 Variable *RegisterArg = Func->makeVariable(Ty);
453 Variable *RegisterLo = Func->makeVariable(IceType_i32);
454 Variable *RegisterHi = Func->makeVariable(IceType_i32);
455 if (ALLOW_DUMP) {
456 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
457 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
458 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
459 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700460 RegisterLo->setRegNum(RegPair.first);
Jan Voungb3401d22015-05-18 09:38:21 -0700461 RegisterLo->setIsArg();
Jan Voungb0a8c242015-06-18 15:00:14 -0700462 RegisterHi->setRegNum(RegPair.second);
Jan Voungb3401d22015-05-18 09:38:21 -0700463 RegisterHi->setIsArg();
464 RegisterArg->setLoHi(RegisterLo, RegisterHi);
465 RegisterArg->setIsArg();
466 Arg->setIsArg(false);
467
468 Args[I] = RegisterArg;
469 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
470 continue;
471 } else {
472 assert(Ty == IceType_i32);
Jan Voungb0a8c242015-06-18 15:00:14 -0700473 int32_t RegNum;
474 if (!CC.I32InReg(&RegNum))
Jan Voungb3401d22015-05-18 09:38:21 -0700475 continue;
Jan Voungb3401d22015-05-18 09:38:21 -0700476 Variable *RegisterArg = Func->makeVariable(Ty);
477 if (ALLOW_DUMP) {
478 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
479 }
480 RegisterArg->setRegNum(RegNum);
481 RegisterArg->setIsArg();
482 Arg->setIsArg(false);
483
484 Args[I] = RegisterArg;
485 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
486 }
487 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700488}
489
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700490// Helper function for addProlog().
491//
492// This assumes Arg is an argument passed on the stack. This sets the
493// frame offset for Arg and updates InArgsSizeBytes according to Arg's
494// width. For an I64 arg that has been split into Lo and Hi components,
495// it calls itself recursively on the components, taking care to handle
496// Lo first because of the little-endian architecture. Lastly, this
497// function generates an instruction to copy Arg into its assigned
498// register if applicable.
499void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
500 size_t BasicFrameOffset,
501 size_t &InArgsSizeBytes) {
502 Variable *Lo = Arg->getLo();
503 Variable *Hi = Arg->getHi();
504 Type Ty = Arg->getType();
505 if (Lo && Hi && Ty == IceType_i64) {
506 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
507 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
508 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
509 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
510 return;
511 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700512 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700513 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
514 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
515 // If the argument variable has been assigned a register, we need to load
516 // the value from the stack slot.
517 if (Arg->hasReg()) {
518 assert(Ty != IceType_i64);
519 OperandARM32Mem *Mem = OperandARM32Mem::create(
520 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
521 Ctx->getConstantInt32(Arg->getStackOffset())));
522 if (isVectorType(Arg->getType())) {
523 UnimplementedError(Func->getContext()->getFlags());
524 } else {
525 _ldr(Arg, Mem);
526 }
527 // This argument-copying instruction uses an explicit
528 // OperandARM32Mem operand instead of a Variable, so its
529 // fill-from-stack operation has to be tracked separately for
530 // statistics.
531 Ctx->statsUpdateFills();
532 }
533}
534
Jan Voungb36ad9b2015-04-21 17:01:49 -0700535Type TargetARM32::stackSlotType() { return IceType_i32; }
536
537void TargetARM32::addProlog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700538 // Stack frame layout:
539 //
540 // +------------------------+
541 // | 1. preserved registers |
542 // +------------------------+
543 // | 2. padding |
544 // +------------------------+
545 // | 3. global spill area |
546 // +------------------------+
547 // | 4. padding |
548 // +------------------------+
549 // | 5. local spill area |
550 // +------------------------+
551 // | 6. padding |
552 // +------------------------+
553 // | 7. allocas |
554 // +------------------------+
555 //
556 // The following variables record the size in bytes of the given areas:
557 // * PreservedRegsSizeBytes: area 1
558 // * SpillAreaPaddingBytes: area 2
559 // * GlobalsSize: area 3
560 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
561 // * LocalsSpillAreaSize: area 5
562 // * SpillAreaSizeBytes: areas 2 - 6
563 // Determine stack frame offsets for each Variable without a
564 // register assignment. This can be done as one variable per stack
565 // slot. Or, do coalescing by running the register allocator again
566 // with an infinite set of registers (as a side effect, this gives
567 // variables a second chance at physical register assignment).
568 //
569 // A middle ground approach is to leverage sparsity and allocate one
570 // block of space on the frame for globals (variables with
571 // multi-block lifetime), and one block to share for locals
572 // (single-block lifetime).
573
574 Context.init(Node);
575 Context.setInsertPoint(Context.getCur());
576
577 llvm::SmallBitVector CalleeSaves =
578 getRegisterSet(RegSet_CalleeSave, RegSet_None);
579 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
580 VarList SortedSpilledVariables;
581 size_t GlobalsSize = 0;
582 // If there is a separate locals area, this represents that area.
583 // Otherwise it counts any variable not counted by GlobalsSize.
584 SpillAreaSizeBytes = 0;
585 // If there is a separate locals area, this specifies the alignment
586 // for it.
587 uint32_t LocalsSlotsAlignmentBytes = 0;
588 // The entire spill locations area gets aligned to largest natural
589 // alignment of the variables that have a spill slot.
590 uint32_t SpillAreaAlignmentBytes = 0;
591 // For now, we don't have target-specific variables that need special
592 // treatment (no stack-slot-linked SpillVariable type).
593 std::function<bool(Variable *)> TargetVarHook =
594 [](Variable *) { return false; };
595
596 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
597 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
598 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
599 &LocalsSlotsAlignmentBytes, TargetVarHook);
600 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
601 SpillAreaSizeBytes += GlobalsSize;
602
603 // Add push instructions for preserved registers.
604 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
605 // Unlike x86, ARM also has callee-saved float/vector registers.
606 // The "vpush" instruction can handle a whole list of float/vector
607 // registers, but it only handles contiguous sequences of registers
608 // by specifying the start and the length.
609 VarList GPRsToPreserve;
610 GPRsToPreserve.reserve(CalleeSaves.size());
611 uint32_t NumCallee = 0;
612 size_t PreservedRegsSizeBytes = 0;
613 // Consider FP and LR as callee-save / used as needed.
614 if (UsesFramePointer) {
615 CalleeSaves[RegARM32::Reg_fp] = true;
616 assert(RegsUsed[RegARM32::Reg_fp] == false);
617 RegsUsed[RegARM32::Reg_fp] = true;
618 }
619 if (!MaybeLeafFunc) {
620 CalleeSaves[RegARM32::Reg_lr] = true;
621 RegsUsed[RegARM32::Reg_lr] = true;
622 }
623 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
624 if (CalleeSaves[i] && RegsUsed[i]) {
625 // TODO(jvoung): do separate vpush for each floating point
626 // register segment and += 4, or 8 depending on type.
627 ++NumCallee;
628 PreservedRegsSizeBytes += 4;
629 GPRsToPreserve.push_back(getPhysicalRegister(i));
630 }
631 }
632 Ctx->statsUpdateRegistersSaved(NumCallee);
633 if (!GPRsToPreserve.empty())
634 _push(GPRsToPreserve);
635
636 // Generate "mov FP, SP" if needed.
637 if (UsesFramePointer) {
638 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
639 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
640 _mov(FP, SP);
641 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
642 Context.insert(InstFakeUse::create(Func, FP));
643 }
644
645 // Align the variables area. SpillAreaPaddingBytes is the size of
646 // the region after the preserved registers and before the spill areas.
647 // LocalsSlotsPaddingBytes is the amount of padding between the globals
648 // and locals area if they are separate.
649 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
650 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
651 uint32_t SpillAreaPaddingBytes = 0;
652 uint32_t LocalsSlotsPaddingBytes = 0;
653 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
654 GlobalsSize, LocalsSlotsAlignmentBytes,
655 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
656 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
657 uint32_t GlobalsAndSubsequentPaddingSize =
658 GlobalsSize + LocalsSlotsPaddingBytes;
659
660 // Align SP if necessary.
661 if (NeedsStackAlignment) {
662 uint32_t StackOffset = PreservedRegsSizeBytes;
663 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
664 SpillAreaSizeBytes = StackSize - StackOffset;
665 }
666
667 // Generate "sub sp, SpillAreaSizeBytes"
668 if (SpillAreaSizeBytes) {
669 // Use the IP inter-procedural scratch register if needed to legalize
670 // the immediate.
671 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
672 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
673 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
674 _sub(SP, SP, SubAmount);
675 }
676 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
677
678 resetStackAdjustment();
679
680 // Fill in stack offsets for stack args, and copy args into registers
681 // for those that were register-allocated. Args are pushed right to
682 // left, so Arg[0] is closest to the stack/frame pointer.
683 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
684 size_t BasicFrameOffset = PreservedRegsSizeBytes;
685 if (!UsesFramePointer)
686 BasicFrameOffset += SpillAreaSizeBytes;
687
688 const VarList &Args = Func->getArgs();
689 size_t InArgsSizeBytes = 0;
Jan Voungb0a8c242015-06-18 15:00:14 -0700690 TargetARM32::CallingConv CC;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700691 for (Variable *Arg : Args) {
692 Type Ty = Arg->getType();
Jan Voungb0a8c242015-06-18 15:00:14 -0700693 bool InRegs = false;
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700694 // Skip arguments passed in registers.
695 if (isVectorType(Ty)) {
696 UnimplementedError(Func->getContext()->getFlags());
697 continue;
698 } else if (isFloatingType(Ty)) {
699 UnimplementedError(Func->getContext()->getFlags());
700 continue;
Jan Voungb0a8c242015-06-18 15:00:14 -0700701 } else if (Ty == IceType_i64) {
702 std::pair<int32_t, int32_t> DummyRegs;
703 InRegs = CC.I64InRegs(&DummyRegs);
704 } else {
705 assert(Ty == IceType_i32);
706 int32_t DummyReg;
707 InRegs = CC.I32InReg(&DummyReg);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700708 }
Jan Voungb0a8c242015-06-18 15:00:14 -0700709 if (!InRegs)
710 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700711 }
712
713 // Fill in stack offsets for locals.
714 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
715 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
716 UsesFramePointer);
717 this->HasComputedFrame = true;
718
719 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
720 OstreamLocker L(Func->getContext());
721 Ostream &Str = Func->getContext()->getStrDump();
722
723 Str << "Stack layout:\n";
724 uint32_t SPAdjustmentPaddingSize =
725 SpillAreaSizeBytes - LocalsSpillAreaSize -
726 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
727 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
728 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
729 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
730 << " globals spill area = " << GlobalsSize << " bytes\n"
731 << " globals-locals spill areas intermediate padding = "
732 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
733 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
734 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
735
736 Str << "Stack details:\n"
737 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
738 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
739 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
740 << " bytes\n"
741 << " is FP based = " << UsesFramePointer << "\n";
742 }
Jan Voungb36ad9b2015-04-21 17:01:49 -0700743}
744
745void TargetARM32::addEpilog(CfgNode *Node) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -0700746 InstList &Insts = Node->getInsts();
747 InstList::reverse_iterator RI, E;
748 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
749 if (llvm::isa<InstARM32Ret>(*RI))
750 break;
751 }
752 if (RI == E)
753 return;
754
755 // Convert the reverse_iterator position into its corresponding
756 // (forward) iterator position.
757 InstList::iterator InsertPoint = RI.base();
758 --InsertPoint;
759 Context.init(Node);
760 Context.setInsertPoint(InsertPoint);
761
762 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
763 if (UsesFramePointer) {
764 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
765 // For late-stage liveness analysis (e.g. asm-verbose mode),
766 // adding a fake use of SP before the assignment of SP=FP keeps
767 // previous SP adjustments from being dead-code eliminated.
768 Context.insert(InstFakeUse::create(Func, SP));
769 _mov(SP, FP);
770 } else {
771 // add SP, SpillAreaSizeBytes
772 if (SpillAreaSizeBytes) {
773 // Use the IP inter-procedural scratch register if needed to legalize
774 // the immediate. It shouldn't be live at this point.
775 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
776 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
777 _add(SP, SP, AddAmount);
778 }
779 }
780
781 // Add pop instructions for preserved registers.
782 llvm::SmallBitVector CalleeSaves =
783 getRegisterSet(RegSet_CalleeSave, RegSet_None);
784 VarList GPRsToRestore;
785 GPRsToRestore.reserve(CalleeSaves.size());
786 // Consider FP and LR as callee-save / used as needed.
787 if (UsesFramePointer) {
788 CalleeSaves[RegARM32::Reg_fp] = true;
789 }
790 if (!MaybeLeafFunc) {
791 CalleeSaves[RegARM32::Reg_lr] = true;
792 }
793 // Pop registers in ascending order just like push
794 // (instead of in reverse order).
795 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
796 if (CalleeSaves[i] && RegsUsed[i]) {
797 GPRsToRestore.push_back(getPhysicalRegister(i));
798 }
799 }
800 if (!GPRsToRestore.empty())
801 _pop(GPRsToRestore);
802
803 if (!Ctx->getFlags().getUseSandboxing())
804 return;
805
806 // Change the original ret instruction into a sandboxed return sequence.
807 // bundle_lock
808 // bic lr, #0xc000000f
809 // bx lr
810 // bundle_unlock
811 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
812 // restrict to the lower 1GB as well.
813 Operand *RetMask =
814 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
815 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
816 Variable *RetValue = nullptr;
817 if (RI->getSrcSize())
818 RetValue = llvm::cast<Variable>(RI->getSrc(0));
819 _bundle_lock();
820 _bic(LR, LR, RetMask);
821 _ret(LR, RetValue);
822 _bundle_unlock();
823 RI->setDeleted();
Jan Voungb36ad9b2015-04-21 17:01:49 -0700824}
825
Jan Voungb3401d22015-05-18 09:38:21 -0700826void TargetARM32::split64(Variable *Var) {
827 assert(Var->getType() == IceType_i64);
828 Variable *Lo = Var->getLo();
829 Variable *Hi = Var->getHi();
830 if (Lo) {
831 assert(Hi);
832 return;
833 }
834 assert(Hi == nullptr);
835 Lo = Func->makeVariable(IceType_i32);
836 Hi = Func->makeVariable(IceType_i32);
837 if (ALLOW_DUMP) {
838 Lo->setName(Func, Var->getName(Func) + "__lo");
839 Hi->setName(Func, Var->getName(Func) + "__hi");
840 }
841 Var->setLoHi(Lo, Hi);
842 if (Var->getIsArg()) {
843 Lo->setIsArg();
844 Hi->setIsArg();
845 }
846}
847
848Operand *TargetARM32::loOperand(Operand *Operand) {
849 assert(Operand->getType() == IceType_i64);
850 if (Operand->getType() != IceType_i64)
851 return Operand;
852 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
853 split64(Var);
854 return Var->getLo();
855 }
856 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
857 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
858 }
859 if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
860 // Conservatively disallow memory operands with side-effects (pre/post
861 // increment) in case of duplication.
862 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
863 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
864 if (Mem->isRegReg()) {
865 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
866 Mem->getIndex(), Mem->getShiftOp(),
867 Mem->getShiftAmt(), Mem->getAddrMode());
868 } else {
869 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
870 Mem->getOffset(), Mem->getAddrMode());
871 }
872 }
873 llvm_unreachable("Unsupported operand type");
874 return nullptr;
875}
876
877Operand *TargetARM32::hiOperand(Operand *Operand) {
878 assert(Operand->getType() == IceType_i64);
879 if (Operand->getType() != IceType_i64)
880 return Operand;
881 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
882 split64(Var);
883 return Var->getHi();
884 }
885 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
886 return Ctx->getConstantInt32(
887 static_cast<uint32_t>(Const->getValue() >> 32));
888 }
889 if (OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
890 // Conservatively disallow memory operands with side-effects
891 // in case of duplication.
892 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
893 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
894 const Type SplitType = IceType_i32;
895 if (Mem->isRegReg()) {
896 // We have to make a temp variable T, and add 4 to either Base or Index.
897 // The Index may be shifted, so adding 4 can mean something else.
898 // Thus, prefer T := Base + 4, and use T as the new Base.
899 Variable *Base = Mem->getBase();
900 Constant *Four = Ctx->getConstantInt32(4);
901 Variable *NewBase = Func->makeVariable(Base->getType());
902 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
903 Base, Four));
904 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
905 Mem->getShiftOp(), Mem->getShiftAmt(),
906 Mem->getAddrMode());
907 } else {
908 Variable *Base = Mem->getBase();
909 ConstantInteger32 *Offset = Mem->getOffset();
910 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
911 int32_t NextOffsetVal = Offset->getValue() + 4;
912 const bool SignExt = false;
913 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
914 // We have to make a temp variable and add 4 to either Base or Offset.
915 // If we add 4 to Offset, this will convert a non-RegReg addressing
916 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
917 // RegReg addressing modes, prefer adding to base and replacing instead.
918 // Thus we leave the old offset alone.
919 Constant *Four = Ctx->getConstantInt32(4);
920 Variable *NewBase = Func->makeVariable(Base->getType());
921 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
922 NewBase, Base, Four));
923 Base = NewBase;
924 } else {
925 Offset =
926 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
927 }
928 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
929 Mem->getAddrMode());
930 }
931 }
932 llvm_unreachable("Unsupported operand type");
933 return nullptr;
934}
935
Jan Voungb36ad9b2015-04-21 17:01:49 -0700936llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
937 RegSetMask Exclude) const {
938 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
939
940#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
941 isFP) \
942 if (scratch && (Include & RegSet_CallerSave)) \
943 Registers[RegARM32::val] = true; \
944 if (preserved && (Include & RegSet_CalleeSave)) \
945 Registers[RegARM32::val] = true; \
946 if (stackptr && (Include & RegSet_StackPointer)) \
947 Registers[RegARM32::val] = true; \
948 if (frameptr && (Include & RegSet_FramePointer)) \
949 Registers[RegARM32::val] = true; \
950 if (scratch && (Exclude & RegSet_CallerSave)) \
951 Registers[RegARM32::val] = false; \
952 if (preserved && (Exclude & RegSet_CalleeSave)) \
953 Registers[RegARM32::val] = false; \
954 if (stackptr && (Exclude & RegSet_StackPointer)) \
955 Registers[RegARM32::val] = false; \
956 if (frameptr && (Exclude & RegSet_FramePointer)) \
957 Registers[RegARM32::val] = false;
958
959 REGARM32_TABLE
960
961#undef X
962
963 return Registers;
964}
965
966void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
967 UsesFramePointer = true;
968 // Conservatively require the stack to be aligned. Some stack
969 // adjustment operations implemented below assume that the stack is
970 // aligned before the alloca. All the alloca code ensures that the
971 // stack alignment is preserved after the alloca. The stack alignment
972 // restriction can be relaxed in some cases.
973 NeedsStackAlignment = true;
Jan Voung55500db2015-05-26 14:25:40 -0700974
975 // TODO(stichnot): minimize the number of adjustments of SP, etc.
976 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
977 Variable *Dest = Inst->getDest();
978 uint32_t AlignmentParam = Inst->getAlignInBytes();
979 // For default align=0, set it to the real value 1, to avoid any
980 // bit-manipulation problems below.
981 AlignmentParam = std::max(AlignmentParam, 1u);
982
983 // LLVM enforces power of 2 alignment.
984 assert(llvm::isPowerOf2_32(AlignmentParam));
985 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
986
987 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
988 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
989 alignRegisterPow2(SP, Alignment);
990 }
991 Operand *TotalSize = Inst->getSizeInBytes();
992 if (const auto *ConstantTotalSize =
993 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
994 uint32_t Value = ConstantTotalSize->getValue();
995 Value = Utils::applyAlignment(Value, Alignment);
996 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
997 _sub(SP, SP, SubAmount);
998 } else {
999 // Non-constant sizes need to be adjusted to the next highest
1000 // multiple of the required alignment at runtime.
1001 TotalSize = legalize(TotalSize);
1002 Variable *T = makeReg(IceType_i32);
1003 _mov(T, TotalSize);
1004 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1005 _add(T, T, AddAmount);
1006 alignRegisterPow2(T, Alignment);
1007 _sub(SP, SP, T);
1008 }
1009 _mov(Dest, SP);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001010}
1011
1012void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001013 Variable *Dest = Inst->getDest();
1014 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
1015 // to legalize Src0 to flex or Src1 to flex and there is a reversible
1016 // instruction. E.g., reverse subtract with immediate, register vs
1017 // register, immediate.
1018 // Or it may be the case that the operands aren't swapped, but the
1019 // bits can be flipped and a different operation applied.
1020 // E.g., use BIC (bit clear) instead of AND for some masks.
Jan Voung29719972015-05-19 11:24:51 -07001021 Operand *Src0 = Inst->getSrc(0);
1022 Operand *Src1 = Inst->getSrc(1);
Jan Voungb3401d22015-05-18 09:38:21 -07001023 if (Dest->getType() == IceType_i64) {
Jan Voung29719972015-05-19 11:24:51 -07001024 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1025 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1026 Variable *Src0RLo = legalizeToVar(loOperand(Src0));
1027 Variable *Src0RHi = legalizeToVar(hiOperand(Src0));
1028 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1029 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1030 Variable *T_Lo = makeReg(DestLo->getType());
1031 Variable *T_Hi = makeReg(DestHi->getType());
1032 switch (Inst->getOp()) {
1033 case InstArithmetic::_num:
1034 llvm_unreachable("Unknown arithmetic operator");
1035 break;
1036 case InstArithmetic::Add:
1037 _adds(T_Lo, Src0RLo, Src1Lo);
1038 _mov(DestLo, T_Lo);
1039 _adc(T_Hi, Src0RHi, Src1Hi);
1040 _mov(DestHi, T_Hi);
1041 break;
1042 case InstArithmetic::And:
1043 _and(T_Lo, Src0RLo, Src1Lo);
1044 _mov(DestLo, T_Lo);
1045 _and(T_Hi, Src0RHi, Src1Hi);
1046 _mov(DestHi, T_Hi);
1047 break;
1048 case InstArithmetic::Or:
1049 _orr(T_Lo, Src0RLo, Src1Lo);
1050 _mov(DestLo, T_Lo);
1051 _orr(T_Hi, Src0RHi, Src1Hi);
1052 _mov(DestHi, T_Hi);
1053 break;
1054 case InstArithmetic::Xor:
1055 _eor(T_Lo, Src0RLo, Src1Lo);
1056 _mov(DestLo, T_Lo);
1057 _eor(T_Hi, Src0RHi, Src1Hi);
1058 _mov(DestHi, T_Hi);
1059 break;
1060 case InstArithmetic::Sub:
1061 _subs(T_Lo, Src0RLo, Src1Lo);
1062 _mov(DestLo, T_Lo);
1063 _sbc(T_Hi, Src0RHi, Src1Hi);
1064 _mov(DestHi, T_Hi);
1065 break;
1066 case InstArithmetic::Mul: {
1067 // GCC 4.8 does:
1068 // a=b*c ==>
1069 // t_acc =(mul) (b.lo * c.hi)
1070 // t_acc =(mla) (c.lo * b.hi) + t_acc
1071 // t.hi,t.lo =(umull) b.lo * c.lo
1072 // t.hi += t_acc
1073 // a.lo = t.lo
1074 // a.hi = t.hi
1075 //
1076 // LLVM does:
1077 // t.hi,t.lo =(umull) b.lo * c.lo
1078 // t.hi =(mla) (b.lo * c.hi) + t.hi
1079 // t.hi =(mla) (b.hi * c.lo) + t.hi
1080 // a.lo = t.lo
1081 // a.hi = t.hi
1082 //
1083 // LLVM's lowering has fewer instructions, but more register pressure:
1084 // t.lo is live from beginning to end, while GCC delays the two-dest
1085 // instruction till the end, and kills c.hi immediately.
1086 Variable *T_Acc = makeReg(IceType_i32);
1087 Variable *T_Acc1 = makeReg(IceType_i32);
1088 Variable *T_Hi1 = makeReg(IceType_i32);
1089 Variable *Src1RLo = legalizeToVar(Src1Lo);
1090 Variable *Src1RHi = legalizeToVar(Src1Hi);
1091 _mul(T_Acc, Src0RLo, Src1RHi);
1092 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1093 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1094 _add(T_Hi, T_Hi1, T_Acc1);
1095 _mov(DestLo, T_Lo);
1096 _mov(DestHi, T_Hi);
1097 } break;
Jan Voung66c3d5e2015-06-04 17:02:31 -07001098 case InstArithmetic::Shl: {
1099 // a=b<<c ==>
1100 // GCC 4.8 does:
1101 // sub t_c1, c.lo, #32
1102 // lsl t_hi, b.hi, c.lo
1103 // orr t_hi, t_hi, b.lo, lsl t_c1
1104 // rsb t_c2, c.lo, #32
1105 // orr t_hi, t_hi, b.lo, lsr t_c2
1106 // lsl t_lo, b.lo, c.lo
1107 // a.lo = t_lo
1108 // a.hi = t_hi
1109 // Can be strength-reduced for constant-shifts, but we don't do
1110 // that for now.
1111 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
1112 // On ARM, shifts only take the lower 8 bits of the shift register,
1113 // and saturate to the range 0-32, so the negative value will
1114 // saturate to 32.
1115 Variable *T_Hi = makeReg(IceType_i32);
1116 Variable *Src1RLo = legalizeToVar(Src1Lo);
1117 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1118 Variable *T_C1 = makeReg(IceType_i32);
1119 Variable *T_C2 = makeReg(IceType_i32);
1120 _sub(T_C1, Src1RLo, ThirtyTwo);
1121 _lsl(T_Hi, Src0RHi, Src1RLo);
1122 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1123 OperandARM32::LSL, T_C1));
1124 _rsb(T_C2, Src1RLo, ThirtyTwo);
1125 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1126 OperandARM32::LSR, T_C2));
1127 _mov(DestHi, T_Hi);
1128 Variable *T_Lo = makeReg(IceType_i32);
1129 // _mov seems to sometimes have better register preferencing than lsl.
1130 // Otherwise mov w/ lsl shifted register is a pseudo-instruction
1131 // that maps to lsl.
1132 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1133 OperandARM32::LSL, Src1RLo));
1134 _mov(DestLo, T_Lo);
1135 } break;
Jan Voung29719972015-05-19 11:24:51 -07001136 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001137 // a=b>>c (unsigned) ==>
1138 // GCC 4.8 does:
1139 // rsb t_c1, c.lo, #32
1140 // lsr t_lo, b.lo, c.lo
1141 // orr t_lo, t_lo, b.hi, lsl t_c1
1142 // sub t_c2, c.lo, #32
1143 // orr t_lo, t_lo, b.hi, lsr t_c2
1144 // lsr t_hi, b.hi, c.lo
1145 // a.lo = t_lo
1146 // a.hi = t_hi
1147 case InstArithmetic::Ashr: {
1148 // a=b>>c (signed) ==> ...
1149 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
1150 // and the next orr should be conditioned on PLUS. The last two
1151 // right shifts should also be arithmetic.
1152 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1153 Variable *T_Lo = makeReg(IceType_i32);
1154 Variable *Src1RLo = legalizeToVar(Src1Lo);
1155 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1156 Variable *T_C1 = makeReg(IceType_i32);
1157 Variable *T_C2 = makeReg(IceType_i32);
1158 _rsb(T_C1, Src1RLo, ThirtyTwo);
1159 _lsr(T_Lo, Src0RLo, Src1RLo);
1160 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1161 OperandARM32::LSL, T_C1));
1162 OperandARM32::ShiftKind RShiftKind;
1163 CondARM32::Cond Pred;
1164 if (IsAshr) {
1165 _subs(T_C2, Src1RLo, ThirtyTwo);
1166 RShiftKind = OperandARM32::ASR;
1167 Pred = CondARM32::PL;
1168 } else {
1169 _sub(T_C2, Src1RLo, ThirtyTwo);
1170 RShiftKind = OperandARM32::LSR;
1171 Pred = CondARM32::AL;
1172 }
1173 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1174 RShiftKind, T_C2),
1175 Pred);
1176 _mov(DestLo, T_Lo);
1177 Variable *T_Hi = makeReg(IceType_i32);
1178 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1179 RShiftKind, Src1RLo));
1180 _mov(DestHi, T_Hi);
1181 } break;
Jan Voung29719972015-05-19 11:24:51 -07001182 case InstArithmetic::Udiv:
1183 case InstArithmetic::Sdiv:
1184 case InstArithmetic::Urem:
1185 case InstArithmetic::Srem:
1186 UnimplementedError(Func->getContext()->getFlags());
1187 break;
1188 case InstArithmetic::Fadd:
1189 case InstArithmetic::Fsub:
1190 case InstArithmetic::Fmul:
1191 case InstArithmetic::Fdiv:
1192 case InstArithmetic::Frem:
1193 llvm_unreachable("FP instruction with i64 type");
1194 break;
1195 }
Jan Voungb3401d22015-05-18 09:38:21 -07001196 } else if (isVectorType(Dest->getType())) {
Jan Voungb2d50842015-05-12 09:53:50 -07001197 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb3401d22015-05-18 09:38:21 -07001198 } else { // Dest->getType() is non-i64 scalar
Jan Voung29719972015-05-19 11:24:51 -07001199 Variable *Src0R = legalizeToVar(Inst->getSrc(0));
1200 Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex);
1201 Variable *T = makeReg(Dest->getType());
Jan Voungb3401d22015-05-18 09:38:21 -07001202 switch (Inst->getOp()) {
1203 case InstArithmetic::_num:
1204 llvm_unreachable("Unknown arithmetic operator");
1205 break;
1206 case InstArithmetic::Add: {
Jan Voung29719972015-05-19 11:24:51 -07001207 _add(T, Src0R, Src1);
1208 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001209 } break;
Jan Voung29719972015-05-19 11:24:51 -07001210 case InstArithmetic::And: {
1211 _and(T, Src0R, Src1);
1212 _mov(Dest, T);
1213 } break;
1214 case InstArithmetic::Or: {
1215 _orr(T, Src0R, Src1);
1216 _mov(Dest, T);
1217 } break;
1218 case InstArithmetic::Xor: {
1219 _eor(T, Src0R, Src1);
1220 _mov(Dest, T);
1221 } break;
1222 case InstArithmetic::Sub: {
1223 _sub(T, Src0R, Src1);
1224 _mov(Dest, T);
1225 } break;
1226 case InstArithmetic::Mul: {
1227 Variable *Src1R = legalizeToVar(Src1);
1228 _mul(T, Src0R, Src1R);
1229 _mov(Dest, T);
1230 } break;
Jan Voungb3401d22015-05-18 09:38:21 -07001231 case InstArithmetic::Shl:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001232 _lsl(T, Src0R, Src1);
1233 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001234 break;
1235 case InstArithmetic::Lshr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001236 _lsr(T, Src0R, Src1);
1237 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001238 break;
1239 case InstArithmetic::Ashr:
Jan Voung66c3d5e2015-06-04 17:02:31 -07001240 _asr(T, Src0R, Src1);
1241 _mov(Dest, T);
Jan Voungb3401d22015-05-18 09:38:21 -07001242 break;
1243 case InstArithmetic::Udiv:
1244 UnimplementedError(Func->getContext()->getFlags());
1245 break;
1246 case InstArithmetic::Sdiv:
1247 UnimplementedError(Func->getContext()->getFlags());
1248 break;
1249 case InstArithmetic::Urem:
1250 UnimplementedError(Func->getContext()->getFlags());
1251 break;
1252 case InstArithmetic::Srem:
1253 UnimplementedError(Func->getContext()->getFlags());
1254 break;
1255 case InstArithmetic::Fadd:
1256 UnimplementedError(Func->getContext()->getFlags());
1257 break;
1258 case InstArithmetic::Fsub:
1259 UnimplementedError(Func->getContext()->getFlags());
1260 break;
1261 case InstArithmetic::Fmul:
1262 UnimplementedError(Func->getContext()->getFlags());
1263 break;
1264 case InstArithmetic::Fdiv:
1265 UnimplementedError(Func->getContext()->getFlags());
1266 break;
1267 case InstArithmetic::Frem:
1268 UnimplementedError(Func->getContext()->getFlags());
1269 break;
1270 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001271 }
1272}
1273
1274void TargetARM32::lowerAssign(const InstAssign *Inst) {
Jan Voungb3401d22015-05-18 09:38:21 -07001275 Variable *Dest = Inst->getDest();
1276 Operand *Src0 = Inst->getSrc(0);
1277 assert(Dest->getType() == Src0->getType());
1278 if (Dest->getType() == IceType_i64) {
1279 Src0 = legalize(Src0);
1280 Operand *Src0Lo = loOperand(Src0);
1281 Operand *Src0Hi = hiOperand(Src0);
1282 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1283 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1284 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1285 _mov(T_Lo, Src0Lo);
1286 _mov(DestLo, T_Lo);
1287 _mov(T_Hi, Src0Hi);
1288 _mov(DestHi, T_Hi);
1289 } else {
1290 Operand *SrcR;
1291 if (Dest->hasReg()) {
1292 // If Dest already has a physical register, then legalize the
1293 // Src operand into a Variable with the same register
1294 // assignment. This is mostly a workaround for advanced phi
1295 // lowering's ad-hoc register allocation which assumes no
1296 // register allocation is needed when at least one of the
1297 // operands is non-memory.
1298 // TODO(jvoung): check this for ARM.
1299 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
1300 } else {
1301 // Dest could be a stack operand. Since we could potentially need
1302 // to do a Store (and store can only have Register operands),
1303 // legalize this to a register.
1304 SrcR = legalize(Src0, Legal_Reg);
1305 }
1306 if (isVectorType(Dest->getType())) {
1307 UnimplementedError(Func->getContext()->getFlags());
1308 } else {
1309 _mov(Dest, SrcR);
1310 }
1311 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001312}
1313
1314void TargetARM32::lowerBr(const InstBr *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001315 if (Inst->isUnconditional()) {
1316 _br(Inst->getTargetUnconditional());
1317 return;
1318 }
1319 Operand *Cond = Inst->getCondition();
1320 // TODO(jvoung): Handle folding opportunities.
1321
1322 Variable *Src0R = legalizeToVar(Cond);
1323 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1324 _cmp(Src0R, Zero);
1325 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001326}
1327
Jan Voung3bfd99a2015-05-22 16:35:25 -07001328void TargetARM32::lowerCall(const InstCall *Instr) {
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001329 MaybeLeafFunc = false;
Jan Voungb0a8c242015-06-18 15:00:14 -07001330 NeedsStackAlignment = true;
Jan Voung0fa6c5a2015-06-01 11:04:04 -07001331
Jan Voungb0a8c242015-06-18 15:00:14 -07001332 // Assign arguments to registers and stack. Also reserve stack.
1333 TargetARM32::CallingConv CC;
1334 // Pair of Arg Operand -> GPR number assignments.
1335 llvm::SmallVector<std::pair<Operand *, int32_t>,
1336 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
1337 // Pair of Arg Operand -> stack offset.
1338 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
1339 int32_t ParameterAreaSizeBytes = 0;
1340
1341 // Classify each argument operand according to the location where the
1342 // argument is passed.
1343 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1344 Operand *Arg = Instr->getArg(i);
1345 Type Ty = Arg->getType();
1346 bool InRegs = false;
1347 if (isVectorType(Ty)) {
1348 UnimplementedError(Func->getContext()->getFlags());
1349 } else if (isFloatingType(Ty)) {
1350 UnimplementedError(Func->getContext()->getFlags());
1351 } else if (Ty == IceType_i64) {
1352 std::pair<int32_t, int32_t> Regs;
1353 if (CC.I64InRegs(&Regs)) {
1354 InRegs = true;
1355 Operand *Lo = loOperand(Arg);
1356 Operand *Hi = hiOperand(Arg);
1357 GPRArgs.push_back(std::make_pair(Lo, Regs.first));
1358 GPRArgs.push_back(std::make_pair(Hi, Regs.second));
1359 }
1360 } else {
1361 assert(Ty == IceType_i32);
1362 int32_t Reg;
1363 if (CC.I32InReg(&Reg)) {
1364 InRegs = true;
1365 GPRArgs.push_back(std::make_pair(Arg, Reg));
1366 }
1367 }
1368
1369 if (!InRegs) {
1370 ParameterAreaSizeBytes =
1371 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1372 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1373 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1374 }
1375 }
1376
1377 // Adjust the parameter area so that the stack is aligned. It is
1378 // assumed that the stack is already aligned at the start of the
1379 // calling sequence.
1380 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1381
1382 // Subtract the appropriate amount for the argument area. This also
1383 // takes care of setting the stack adjustment during emission.
1384 //
1385 // TODO: If for some reason the call instruction gets dead-code
1386 // eliminated after lowering, we would need to ensure that the
1387 // pre-call and the post-call esp adjustment get eliminated as well.
1388 if (ParameterAreaSizeBytes) {
1389 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1390 Legal_Reg | Legal_Flex);
1391 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1392 }
1393
1394 // Copy arguments that are passed on the stack to the appropriate
1395 // stack locations.
1396 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
1397 for (auto &StackArg : StackArgs) {
1398 ConstantInteger32 *Loc =
1399 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1400 Type Ty = StackArg.first->getType();
1401 OperandARM32Mem *Addr;
1402 constexpr bool SignExt = false;
1403 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1404 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1405 } else {
1406 Variable *NewBase = Func->makeVariable(SP->getType());
1407 lowerArithmetic(
1408 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1409 Addr = formMemoryOperand(NewBase, Ty);
1410 }
1411 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1412 }
1413
1414 // Copy arguments to be passed in registers to the appropriate registers.
1415 for (auto &GPRArg : GPRArgs) {
1416 Variable *Reg = legalizeToVar(GPRArg.first, GPRArg.second);
1417 // Generate a FakeUse of register arguments so that they do not get
1418 // dead code eliminated as a result of the FakeKill of scratch
1419 // registers after the call.
1420 Context.insert(InstFakeUse::create(Func, Reg));
Jan Voung3bfd99a2015-05-22 16:35:25 -07001421 }
1422
1423 // Generate the call instruction. Assign its result to a temporary
1424 // with high register allocation weight.
1425 Variable *Dest = Instr->getDest();
1426 // ReturnReg doubles as ReturnRegLo as necessary.
1427 Variable *ReturnReg = nullptr;
1428 Variable *ReturnRegHi = nullptr;
1429 if (Dest) {
1430 switch (Dest->getType()) {
1431 case IceType_NUM:
1432 llvm_unreachable("Invalid Call dest type");
1433 break;
1434 case IceType_void:
1435 break;
1436 case IceType_i1:
1437 case IceType_i8:
1438 case IceType_i16:
1439 case IceType_i32:
1440 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1441 break;
1442 case IceType_i64:
1443 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1444 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1445 break;
1446 case IceType_f32:
1447 case IceType_f64:
1448 // Use S and D regs.
1449 UnimplementedError(Func->getContext()->getFlags());
1450 break;
1451 case IceType_v4i1:
1452 case IceType_v8i1:
1453 case IceType_v16i1:
1454 case IceType_v16i8:
1455 case IceType_v8i16:
1456 case IceType_v4i32:
1457 case IceType_v4f32:
1458 // Use Q regs.
1459 UnimplementedError(Func->getContext()->getFlags());
1460 break;
1461 }
1462 }
1463 Operand *CallTarget = Instr->getCallTarget();
Jan Voungb0a8c242015-06-18 15:00:14 -07001464 // TODO(jvoung): Handle sandboxing.
1465 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
1466
Jan Voung3bfd99a2015-05-22 16:35:25 -07001467 // Allow ConstantRelocatable to be left alone as a direct call,
1468 // but force other constants like ConstantInteger32 to be in
1469 // a register and make it an indirect call.
1470 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1471 CallTarget = legalize(CallTarget, Legal_Reg);
1472 }
1473 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1474 Context.insert(NewCall);
1475 if (ReturnRegHi)
1476 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1477
Jan Voungb0a8c242015-06-18 15:00:14 -07001478 // Add the appropriate offset to SP. The call instruction takes care
1479 // of resetting the stack offset during emission.
1480 if (ParameterAreaSizeBytes) {
1481 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1482 Legal_Reg | Legal_Flex);
1483 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
1484 _add(SP, SP, AddAmount);
1485 }
1486
Jan Voung3bfd99a2015-05-22 16:35:25 -07001487 // Insert a register-kill pseudo instruction.
1488 Context.insert(InstFakeKill::create(Func, NewCall));
1489
1490 // Generate a FakeUse to keep the call live if necessary.
1491 if (Instr->hasSideEffects() && ReturnReg) {
1492 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1493 Context.insert(FakeUse);
1494 }
1495
1496 if (!Dest)
1497 return;
1498
1499 // Assign the result of the call to Dest.
1500 if (ReturnReg) {
1501 if (ReturnRegHi) {
1502 assert(Dest->getType() == IceType_i64);
1503 split64(Dest);
1504 Variable *DestLo = Dest->getLo();
1505 Variable *DestHi = Dest->getHi();
1506 _mov(DestLo, ReturnReg);
1507 _mov(DestHi, ReturnRegHi);
1508 } else {
1509 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
1510 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
1511 isVectorType(Dest->getType()));
1512 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
1513 UnimplementedError(Func->getContext()->getFlags());
1514 } else {
1515 _mov(Dest, ReturnReg);
1516 }
1517 }
1518 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001519}
1520
1521void TargetARM32::lowerCast(const InstCast *Inst) {
1522 InstCast::OpKind CastKind = Inst->getCastKind();
Jan Voung66c3d5e2015-06-04 17:02:31 -07001523 Variable *Dest = Inst->getDest();
1524 Operand *Src0 = Inst->getSrc(0);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001525 switch (CastKind) {
1526 default:
1527 Func->setError("Cast type not supported");
1528 return;
1529 case InstCast::Sext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001530 if (isVectorType(Dest->getType())) {
1531 UnimplementedError(Func->getContext()->getFlags());
1532 } else if (Dest->getType() == IceType_i64) {
1533 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
1534 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1535 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1536 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1537 Variable *T_Lo = makeReg(DestLo->getType());
1538 if (Src0->getType() == IceType_i32) {
1539 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1540 _mov(T_Lo, Src0RF);
1541 } else if (Src0->getType() == IceType_i1) {
1542 Variable *Src0R = legalizeToVar(Src0);
1543 _lsl(T_Lo, Src0R, ShiftAmt);
1544 _asr(T_Lo, T_Lo, ShiftAmt);
1545 } else {
1546 Variable *Src0R = legalizeToVar(Src0);
1547 _sxt(T_Lo, Src0R);
1548 }
1549 _mov(DestLo, T_Lo);
1550 Variable *T_Hi = makeReg(DestHi->getType());
1551 if (Src0->getType() != IceType_i1) {
1552 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
1553 OperandARM32::ASR, ShiftAmt));
1554 } else {
1555 // For i1, the asr instruction is already done above.
1556 _mov(T_Hi, T_Lo);
1557 }
1558 _mov(DestHi, T_Hi);
1559 } else if (Src0->getType() == IceType_i1) {
1560 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
1561 // lsl t1, src_reg, 31
1562 // asr t1, t1, 31
1563 // dst = t1
1564 Variable *Src0R = legalizeToVar(Src0);
1565 Constant *ShiftAmt = Ctx->getConstantInt32(31);
1566 Variable *T = makeReg(Dest->getType());
1567 _lsl(T, Src0R, ShiftAmt);
1568 _asr(T, T, ShiftAmt);
1569 _mov(Dest, T);
1570 } else {
1571 // t1 = sxt src; dst = t1
1572 Variable *Src0R = legalizeToVar(Src0);
1573 Variable *T = makeReg(Dest->getType());
1574 _sxt(T, Src0R);
1575 _mov(Dest, T);
1576 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001577 break;
1578 }
1579 case InstCast::Zext: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001580 if (isVectorType(Dest->getType())) {
1581 UnimplementedError(Func->getContext()->getFlags());
1582 } else if (Dest->getType() == IceType_i64) {
1583 // t1=uxtb src; dst.lo=t1; dst.hi=0
1584 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1585 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1586 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1587 Variable *T_Lo = makeReg(DestLo->getType());
1588 // i32 and i1 can just take up the whole register.
1589 // i32 doesn't need uxt, while i1 will have an and mask later anyway.
1590 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
1591 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1592 _mov(T_Lo, Src0RF);
1593 } else {
1594 Variable *Src0R = legalizeToVar(Src0);
1595 _uxt(T_Lo, Src0R);
1596 }
1597 if (Src0->getType() == IceType_i1) {
1598 Constant *One = Ctx->getConstantInt32(1);
1599 _and(T_Lo, T_Lo, One);
1600 }
1601 _mov(DestLo, T_Lo);
1602 Variable *T_Hi = makeReg(DestLo->getType());
1603 _mov(T_Hi, Zero);
1604 _mov(DestHi, T_Hi);
1605 } else if (Src0->getType() == IceType_i1) {
1606 // t = Src0; t &= 1; Dest = t
1607 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1608 Constant *One = Ctx->getConstantInt32(1);
1609 Variable *T = makeReg(Dest->getType());
1610 // Just use _mov instead of _uxt since all registers are 32-bit.
1611 // _uxt requires the source to be a register so could have required
1612 // a _mov from legalize anyway.
1613 _mov(T, Src0RF);
1614 _and(T, T, One);
1615 _mov(Dest, T);
1616 } else {
1617 // t1 = uxt src; dst = t1
1618 Variable *Src0R = legalizeToVar(Src0);
1619 Variable *T = makeReg(Dest->getType());
1620 _uxt(T, Src0R);
1621 _mov(Dest, T);
1622 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001623 break;
1624 }
1625 case InstCast::Trunc: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001626 if (isVectorType(Dest->getType())) {
1627 UnimplementedError(Func->getContext()->getFlags());
1628 } else {
1629 Operand *Src0 = Inst->getSrc(0);
1630 if (Src0->getType() == IceType_i64)
1631 Src0 = loOperand(Src0);
1632 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
1633 // t1 = trunc Src0RF; Dest = t1
1634 Variable *T = makeReg(Dest->getType());
1635 _mov(T, Src0RF);
1636 if (Dest->getType() == IceType_i1)
1637 _and(T, T, Ctx->getConstantInt1(1));
1638 _mov(Dest, T);
1639 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07001640 break;
1641 }
1642 case InstCast::Fptrunc:
Jan Voungb2d50842015-05-12 09:53:50 -07001643 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001644 break;
1645 case InstCast::Fpext: {
Jan Voungb2d50842015-05-12 09:53:50 -07001646 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001647 break;
1648 }
1649 case InstCast::Fptosi:
Jan Voungb2d50842015-05-12 09:53:50 -07001650 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001651 break;
1652 case InstCast::Fptoui:
Jan Voungb2d50842015-05-12 09:53:50 -07001653 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001654 break;
1655 case InstCast::Sitofp:
Jan Voungb2d50842015-05-12 09:53:50 -07001656 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001657 break;
1658 case InstCast::Uitofp: {
Jan Voungb2d50842015-05-12 09:53:50 -07001659 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001660 break;
1661 }
1662 case InstCast::Bitcast: {
Jan Voung66c3d5e2015-06-04 17:02:31 -07001663 Operand *Src0 = Inst->getSrc(0);
1664 if (Dest->getType() == Src0->getType()) {
1665 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
1666 lowerAssign(Assign);
1667 return;
1668 }
Jan Voungb2d50842015-05-12 09:53:50 -07001669 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001670 break;
1671 }
1672 }
1673}
1674
1675void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
1676 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001677 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001678}
1679
1680void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
1681 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001682 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001683}
1684
1685void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001686 Variable *Dest = Inst->getDest();
1687 Operand *Src0 = Inst->getSrc(0);
1688 Operand *Src1 = Inst->getSrc(1);
1689
1690 if (isVectorType(Dest->getType())) {
1691 UnimplementedError(Func->getContext()->getFlags());
1692 return;
1693 }
1694
1695 // a=icmp cond, b, c ==>
1696 // GCC does:
1697 // cmp b.hi, c.hi or cmp b.lo, c.lo
1698 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
1699 // mov.<C1> t, #1 mov.<C1> t, #1
1700 // mov.<C2> t, #0 mov.<C2> t, #0
1701 // mov a, t mov a, t
1702 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
1703 // is used for signed compares. In some cases, b and c need to be swapped
1704 // as well.
1705 //
1706 // LLVM does:
1707 // for EQ and NE:
1708 // eor t1, b.hi, c.hi
1709 // eor t2, b.lo, c.hi
1710 // orrs t, t1, t2
1711 // mov.<C> t, #1
1712 // mov a, t
1713 //
1714 // that's nice in that it's just as short but has fewer dependencies
1715 // for better ILP at the cost of more registers.
1716 //
1717 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
1718 // two unconditional mov #0, two cmps, two conditional mov #1,
1719 // and one conditonal reg mov. That has few dependencies for good ILP,
1720 // but is a longer sequence.
1721 //
1722 // So, we are going with the GCC version since it's usually better (except
1723 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
1724 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1725 Constant *One = Ctx->getConstantInt32(1);
1726 if (Src0->getType() == IceType_i64) {
1727 InstIcmp::ICond Conditon = Inst->getCondition();
1728 size_t Index = static_cast<size_t>(Conditon);
1729 assert(Index < TableIcmp64Size);
1730 Variable *Src0Lo, *Src0Hi;
1731 Operand *Src1LoRF, *Src1HiRF;
1732 if (TableIcmp64[Index].Swapped) {
1733 Src0Lo = legalizeToVar(loOperand(Src1));
1734 Src0Hi = legalizeToVar(hiOperand(Src1));
1735 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1736 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1737 } else {
1738 Src0Lo = legalizeToVar(loOperand(Src0));
1739 Src0Hi = legalizeToVar(hiOperand(Src0));
1740 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1741 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1742 }
1743 Variable *T = makeReg(IceType_i32);
1744 if (TableIcmp64[Index].IsSigned) {
1745 Variable *ScratchReg = makeReg(IceType_i32);
1746 _cmp(Src0Lo, Src1LoRF);
1747 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
1748 // ScratchReg isn't going to be used, but we need the
1749 // side-effect of setting flags from this operation.
1750 Context.insert(InstFakeUse::create(Func, ScratchReg));
1751 } else {
1752 _cmp(Src0Hi, Src1HiRF);
1753 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
1754 }
1755 _mov(T, One, TableIcmp64[Index].C1);
1756 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
1757 _mov(Dest, T);
1758 return;
1759 }
1760
1761 // a=icmp cond b, c ==>
1762 // GCC does:
1763 // <u/s>xtb tb, b
1764 // <u/s>xtb tc, c
1765 // cmp tb, tc
1766 // mov.C1 t, #0
1767 // mov.C2 t, #1
1768 // mov a, t
1769 // where the unsigned/sign extension is not needed for 32-bit.
1770 // They also have special cases for EQ and NE. E.g., for NE:
1771 // <extend to tb, tc>
1772 // subs t, tb, tc
1773 // movne t, #1
1774 // mov a, t
1775 //
1776 // LLVM does:
1777 // lsl tb, b, #<N>
1778 // mov t, #0
1779 // cmp tb, c, lsl #<N>
1780 // mov.<C> t, #1
1781 // mov a, t
1782 //
1783 // the left shift is by 0, 16, or 24, which allows the comparison to focus
1784 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
1785 // For the unsigned case, for some reason it does similar to GCC and does
1786 // a uxtb first. It's not clear to me why that special-casing is needed.
1787 //
1788 // We'll go with the LLVM way for now, since it's shorter and has just as
1789 // few dependencies.
Jan Voung66c3d5e2015-06-04 17:02:31 -07001790 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
1791 assert(ShiftAmt >= 0);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001792 Constant *ShiftConst = nullptr;
1793 Variable *Src0R = nullptr;
1794 Variable *T = makeReg(IceType_i32);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001795 if (ShiftAmt) {
1796 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001797 Src0R = makeReg(IceType_i32);
1798 _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
1799 } else {
1800 Src0R = legalizeToVar(Src0);
1801 }
1802 _mov(T, Zero);
Jan Voung66c3d5e2015-06-04 17:02:31 -07001803 if (ShiftAmt) {
Jan Voung3bfd99a2015-05-22 16:35:25 -07001804 Variable *Src1R = legalizeToVar(Src1);
1805 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
1806 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
1807 _cmp(Src0R, Src1RShifted);
1808 } else {
1809 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1810 _cmp(Src0R, Src1RF);
1811 }
1812 _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition()));
1813 _mov(Dest, T);
1814 return;
Jan Voungb36ad9b2015-04-21 17:01:49 -07001815}
1816
1817void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
1818 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07001819 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001820}
1821
1822void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
1823 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
1824 case Intrinsics::AtomicCmpxchg: {
Jan Voungb2d50842015-05-12 09:53:50 -07001825 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001826 return;
1827 }
1828 case Intrinsics::AtomicFence:
Jan Voungb2d50842015-05-12 09:53:50 -07001829 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001830 return;
1831 case Intrinsics::AtomicFenceAll:
1832 // NOTE: FenceAll should prevent and load/store from being moved
1833 // across the fence (both atomic and non-atomic). The InstARM32Mfence
1834 // instruction is currently marked coarsely as "HasSideEffects".
Jan Voungb2d50842015-05-12 09:53:50 -07001835 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001836 return;
1837 case Intrinsics::AtomicIsLockFree: {
Jan Voungb2d50842015-05-12 09:53:50 -07001838 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001839 return;
1840 }
1841 case Intrinsics::AtomicLoad: {
Jan Voungb2d50842015-05-12 09:53:50 -07001842 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001843 return;
1844 }
1845 case Intrinsics::AtomicRMW:
Jan Voungb2d50842015-05-12 09:53:50 -07001846 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001847 return;
1848 case Intrinsics::AtomicStore: {
Jan Voungb2d50842015-05-12 09:53:50 -07001849 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001850 return;
1851 }
1852 case Intrinsics::Bswap: {
Jan Voungb2d50842015-05-12 09:53:50 -07001853 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001854 return;
1855 }
1856 case Intrinsics::Ctpop: {
Jan Voungb2d50842015-05-12 09:53:50 -07001857 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001858 return;
1859 }
1860 case Intrinsics::Ctlz: {
Jan Voungb2d50842015-05-12 09:53:50 -07001861 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001862 return;
1863 }
1864 case Intrinsics::Cttz: {
Jan Voungb2d50842015-05-12 09:53:50 -07001865 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001866 return;
1867 }
1868 case Intrinsics::Fabs: {
Jan Voungb2d50842015-05-12 09:53:50 -07001869 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001870 return;
1871 }
1872 case Intrinsics::Longjmp: {
1873 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
1874 Call->addArg(Instr->getArg(0));
1875 Call->addArg(Instr->getArg(1));
1876 lowerCall(Call);
1877 return;
1878 }
1879 case Intrinsics::Memcpy: {
1880 // In the future, we could potentially emit an inline memcpy/memset, etc.
1881 // for intrinsic calls w/ a known length.
1882 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
1883 Call->addArg(Instr->getArg(0));
1884 Call->addArg(Instr->getArg(1));
1885 Call->addArg(Instr->getArg(2));
1886 lowerCall(Call);
1887 return;
1888 }
1889 case Intrinsics::Memmove: {
1890 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
1891 Call->addArg(Instr->getArg(0));
1892 Call->addArg(Instr->getArg(1));
1893 Call->addArg(Instr->getArg(2));
1894 lowerCall(Call);
1895 return;
1896 }
1897 case Intrinsics::Memset: {
1898 // The value operand needs to be extended to a stack slot size
1899 // because the PNaCl ABI requires arguments to be at least 32 bits
1900 // wide.
1901 Operand *ValOp = Instr->getArg(1);
1902 assert(ValOp->getType() == IceType_i8);
1903 Variable *ValExt = Func->makeVariable(stackSlotType());
1904 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
1905 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
1906 Call->addArg(Instr->getArg(0));
1907 Call->addArg(ValExt);
1908 Call->addArg(Instr->getArg(2));
1909 lowerCall(Call);
1910 return;
1911 }
1912 case Intrinsics::NaClReadTP: {
1913 if (Ctx->getFlags().getUseSandboxing()) {
Jan Voungb2d50842015-05-12 09:53:50 -07001914 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001915 } else {
1916 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
1917 lowerCall(Call);
1918 }
1919 return;
1920 }
1921 case Intrinsics::Setjmp: {
1922 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
1923 Call->addArg(Instr->getArg(0));
1924 lowerCall(Call);
1925 return;
1926 }
1927 case Intrinsics::Sqrt: {
Jan Voungb2d50842015-05-12 09:53:50 -07001928 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001929 return;
1930 }
1931 case Intrinsics::Stacksave: {
Jan Voungb2d50842015-05-12 09:53:50 -07001932 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001933 return;
1934 }
1935 case Intrinsics::Stackrestore: {
Jan Voungb2d50842015-05-12 09:53:50 -07001936 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001937 return;
1938 }
1939 case Intrinsics::Trap:
Jan Voungb2d50842015-05-12 09:53:50 -07001940 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001941 return;
1942 case Intrinsics::UnknownIntrinsic:
1943 Func->setError("Should not be lowering UnknownIntrinsic");
1944 return;
1945 }
1946 return;
1947}
1948
Jan Voungbefd03a2015-06-02 11:03:03 -07001949void TargetARM32::lowerLoad(const InstLoad *Load) {
1950 // A Load instruction can be treated the same as an Assign
1951 // instruction, after the source operand is transformed into an
1952 // OperandARM32Mem operand.
1953 Type Ty = Load->getDest()->getType();
1954 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
1955 Variable *DestLoad = Load->getDest();
1956
1957 // TODO(jvoung): handled folding opportunities. Sign and zero extension
1958 // can be folded into a load.
1959 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
1960 lowerAssign(Assign);
Jan Voungb36ad9b2015-04-21 17:01:49 -07001961}
1962
1963void TargetARM32::doAddressOptLoad() {
Jan Voungb2d50842015-05-12 09:53:50 -07001964 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001965}
1966
1967void TargetARM32::randomlyInsertNop(float Probability) {
1968 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
1969 if (RNG.getTrueWithProbability(Probability)) {
Jan Voungb2d50842015-05-12 09:53:50 -07001970 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07001971 }
1972}
1973
1974void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
1975 Func->setError("Phi found in regular instruction list");
1976}
1977
1978void TargetARM32::lowerRet(const InstRet *Inst) {
Jan Voungb2d50842015-05-12 09:53:50 -07001979 Variable *Reg = nullptr;
1980 if (Inst->hasRetValue()) {
Jan Voungb3401d22015-05-18 09:38:21 -07001981 Operand *Src0 = Inst->getRetValue();
1982 if (Src0->getType() == IceType_i64) {
1983 Variable *R0 = legalizeToVar(loOperand(Src0), RegARM32::Reg_r0);
1984 Variable *R1 = legalizeToVar(hiOperand(Src0), RegARM32::Reg_r1);
1985 Reg = R0;
1986 Context.insert(InstFakeUse::create(Func, R1));
1987 } else if (isScalarFloatingType(Src0->getType())) {
1988 UnimplementedError(Func->getContext()->getFlags());
1989 } else if (isVectorType(Src0->getType())) {
1990 UnimplementedError(Func->getContext()->getFlags());
1991 } else {
1992 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
Jan Voung3bfd99a2015-05-22 16:35:25 -07001993 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
Jan Voungb3401d22015-05-18 09:38:21 -07001994 }
Jan Voungb2d50842015-05-12 09:53:50 -07001995 }
1996 // Add a ret instruction even if sandboxing is enabled, because
1997 // addEpilog explicitly looks for a ret instruction as a marker for
1998 // where to insert the frame removal instructions.
1999 // addEpilog is responsible for restoring the "lr" register as needed
2000 // prior to this ret instruction.
2001 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2002 // Add a fake use of sp to make sure sp stays alive for the entire
2003 // function. Otherwise post-call sp adjustments get dead-code
2004 // eliminated. TODO: Are there more places where the fake use
2005 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
2006 // have a ret instruction.
2007 Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp);
2008 Context.insert(InstFakeUse::create(Func, SP));
Jan Voungb36ad9b2015-04-21 17:01:49 -07002009}
2010
2011void TargetARM32::lowerSelect(const InstSelect *Inst) {
2012 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002013 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002014}
2015
2016void TargetARM32::lowerStore(const InstStore *Inst) {
Jan Voungbefd03a2015-06-02 11:03:03 -07002017 Operand *Value = Inst->getData();
2018 Operand *Addr = Inst->getAddr();
2019 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
2020 Type Ty = NewAddr->getType();
2021
2022 if (Ty == IceType_i64) {
2023 Variable *ValueHi = legalizeToVar(hiOperand(Value));
2024 Variable *ValueLo = legalizeToVar(loOperand(Value));
2025 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
2026 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
2027 } else if (isVectorType(Ty)) {
2028 UnimplementedError(Func->getContext()->getFlags());
2029 } else {
2030 Variable *ValueR = legalizeToVar(Value);
2031 _str(ValueR, NewAddr);
2032 }
Jan Voungb36ad9b2015-04-21 17:01:49 -07002033}
2034
2035void TargetARM32::doAddressOptStore() {
Jan Voungb2d50842015-05-12 09:53:50 -07002036 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002037}
2038
2039void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
2040 (void)Inst;
Jan Voungb2d50842015-05-12 09:53:50 -07002041 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002042}
2043
2044void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
Jan Voungb3401d22015-05-18 09:38:21 -07002045 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002046}
2047
2048// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
2049// preserve integrity of liveness analysis. Undef values are also
2050// turned into zeroes, since loOperand() and hiOperand() don't expect
2051// Undef input.
2052void TargetARM32::prelowerPhis() {
Jan Voungb2d50842015-05-12 09:53:50 -07002053 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002054}
2055
2056// Lower the pre-ordered list of assignments into mov instructions.
2057// Also has to do some ad-hoc register allocation as necessary.
2058void TargetARM32::lowerPhiAssignments(CfgNode *Node,
2059 const AssignList &Assignments) {
2060 (void)Node;
2061 (void)Assignments;
Jan Voungb2d50842015-05-12 09:53:50 -07002062 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002063}
2064
Jan Voungb3401d22015-05-18 09:38:21 -07002065Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
2066 Variable *Reg = makeReg(Ty, RegNum);
2067 UnimplementedError(Func->getContext()->getFlags());
2068 return Reg;
2069}
2070
2071// Helper for legalize() to emit the right code to lower an operand to a
2072// register of the appropriate type.
2073Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2074 Type Ty = Src->getType();
2075 Variable *Reg = makeReg(Ty, RegNum);
2076 if (isVectorType(Ty)) {
2077 UnimplementedError(Func->getContext()->getFlags());
2078 } else {
2079 // Mov's Src operand can really only be the flexible second operand type
2080 // or a register. Users should guarantee that.
2081 _mov(Reg, Src);
2082 }
2083 return Reg;
2084}
2085
2086Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2087 int32_t RegNum) {
2088 // Assert that a physical register is allowed. To date, all calls
2089 // to legalize() allow a physical register. Legal_Flex converts
2090 // registers to the right type OperandARM32FlexReg as needed.
2091 assert(Allowed & Legal_Reg);
2092 // Go through the various types of operands:
2093 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
2094 // Given the above assertion, if type of operand is not legal
2095 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
2096 // to a register.
2097 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
2098 // Before doing anything with a Mem operand, we need to ensure
2099 // that the Base and Index components are in physical registers.
2100 Variable *Base = Mem->getBase();
2101 Variable *Index = Mem->getIndex();
2102 Variable *RegBase = nullptr;
2103 Variable *RegIndex = nullptr;
2104 if (Base) {
2105 RegBase = legalizeToVar(Base);
2106 }
2107 if (Index) {
2108 RegIndex = legalizeToVar(Index);
2109 }
2110 // Create a new operand if there was a change.
2111 if (Base != RegBase || Index != RegIndex) {
2112 // There is only a reg +/- reg or reg + imm form.
2113 // Figure out which to re-create.
2114 if (Mem->isRegReg()) {
2115 Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase, RegIndex,
2116 Mem->getShiftOp(), Mem->getShiftAmt(),
2117 Mem->getAddrMode());
2118 } else {
2119 Mem = OperandARM32Mem::create(Func, Mem->getType(), RegBase,
2120 Mem->getOffset(), Mem->getAddrMode());
2121 }
2122 }
2123 if (!(Allowed & Legal_Mem)) {
2124 Type Ty = Mem->getType();
2125 Variable *Reg = makeReg(Ty, RegNum);
2126 _ldr(Reg, Mem);
2127 From = Reg;
2128 } else {
2129 From = Mem;
2130 }
2131 return From;
2132 }
2133
2134 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2135 if (!(Allowed & Legal_Flex)) {
2136 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2137 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2138 From = FlexReg->getReg();
2139 // Fall through and let From be checked as a Variable below,
2140 // where it may or may not need a register.
2141 } else {
2142 return copyToReg(Flex, RegNum);
2143 }
2144 } else {
2145 return copyToReg(Flex, RegNum);
2146 }
2147 } else {
2148 return From;
2149 }
2150 }
2151
2152 if (llvm::isa<Constant>(From)) {
2153 if (llvm::isa<ConstantUndef>(From)) {
2154 // Lower undefs to zero. Another option is to lower undefs to an
2155 // uninitialized register; however, using an uninitialized register
2156 // results in less predictable code.
2157 if (isVectorType(From->getType()))
2158 return makeVectorOfZeros(From->getType(), RegNum);
2159 From = Ctx->getConstantZero(From->getType());
2160 }
2161 // There should be no constants of vector type (other than undef).
2162 assert(!isVectorType(From->getType()));
2163 bool CanBeFlex = Allowed & Legal_Flex;
2164 if (auto C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
2165 uint32_t RotateAmt;
2166 uint32_t Immed_8;
2167 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2168 // Check if the immediate will fit in a Flexible second operand,
2169 // if a Flexible second operand is allowed. We need to know the exact
2170 // value, so that rules out relocatable constants.
2171 // Also try the inverse and use MVN if possible.
2172 if (CanBeFlex &&
2173 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
2174 return OperandARM32FlexImm::create(Func, From->getType(), Immed_8,
2175 RotateAmt);
2176 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2177 ~Value, &RotateAmt, &Immed_8)) {
2178 auto InvertedFlex = OperandARM32FlexImm::create(Func, From->getType(),
2179 Immed_8, RotateAmt);
2180 Type Ty = From->getType();
2181 Variable *Reg = makeReg(Ty, RegNum);
2182 _mvn(Reg, InvertedFlex);
2183 return Reg;
2184 } else {
2185 // Do a movw/movt to a register.
2186 Type Ty = From->getType();
2187 Variable *Reg = makeReg(Ty, RegNum);
2188 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2189 _movw(Reg,
2190 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2191 if (UpperBits != 0) {
2192 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2193 }
2194 return Reg;
2195 }
2196 } else if (auto C = llvm::dyn_cast<ConstantRelocatable>(From)) {
2197 Type Ty = From->getType();
2198 Variable *Reg = makeReg(Ty, RegNum);
2199 _movw(Reg, C);
2200 _movt(Reg, C);
2201 return Reg;
2202 } else {
2203 // Load floats/doubles from literal pool.
2204 UnimplementedError(Func->getContext()->getFlags());
2205 From = copyToReg(From, RegNum);
2206 }
2207 return From;
2208 }
2209
2210 if (auto Var = llvm::dyn_cast<Variable>(From)) {
2211 // Check if the variable is guaranteed a physical register. This
2212 // can happen either when the variable is pre-colored or when it is
2213 // assigned infinite weight.
2214 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
2215 // We need a new physical register for the operand if:
2216 // Mem is not allowed and Var isn't guaranteed a physical
2217 // register, or
2218 // RegNum is required and Var->getRegNum() doesn't match.
2219 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2220 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2221 From = copyToReg(From, RegNum);
2222 }
2223 return From;
2224 }
2225 llvm_unreachable("Unhandled operand kind in legalize()");
2226
2227 return From;
2228}
2229
2230// Provide a trivial wrapper to legalize() for this common usage.
2231Variable *TargetARM32::legalizeToVar(Operand *From, int32_t RegNum) {
2232 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
2233}
2234
Jan Voungbefd03a2015-06-02 11:03:03 -07002235OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
2236 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
2237 // It may be the case that address mode optimization already creates
2238 // an OperandARM32Mem, so in that case it wouldn't need another level
2239 // of transformation.
2240 if (Mem) {
2241 return llvm::cast<OperandARM32Mem>(legalize(Mem));
2242 }
2243 // If we didn't do address mode optimization, then we only
2244 // have a base/offset to work with. ARM always requires a base
2245 // register, so just use that to hold the operand.
2246 Variable *Base = legalizeToVar(Operand);
2247 return OperandARM32Mem::create(
2248 Func, Ty, Base,
2249 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
2250}
2251
Jan Voungb3401d22015-05-18 09:38:21 -07002252Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
2253 // There aren't any 64-bit integer registers for ARM32.
2254 assert(Type != IceType_i64);
2255 Variable *Reg = Func->makeVariable(Type);
2256 if (RegNum == Variable::NoRegister)
2257 Reg->setWeightInfinite();
2258 else
2259 Reg->setRegNum(RegNum);
2260 return Reg;
2261}
2262
Jan Voung55500db2015-05-26 14:25:40 -07002263void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
2264 assert(llvm::isPowerOf2_32(Align));
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002265 uint32_t RotateAmt;
Jan Voung55500db2015-05-26 14:25:40 -07002266 uint32_t Immed_8;
2267 Operand *Mask;
2268 // Use AND or BIC to mask off the bits, depending on which immediate fits
2269 // (if it fits at all). Assume Align is usually small, in which case BIC
Jan Voung0fa6c5a2015-06-01 11:04:04 -07002270 // works better. Thus, this rounds down to the alignment.
Jan Voung55500db2015-05-26 14:25:40 -07002271 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
2272 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
2273 _bic(Reg, Reg, Mask);
2274 } else {
2275 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
2276 _and(Reg, Reg, Mask);
2277 }
2278}
2279
Jan Voungb36ad9b2015-04-21 17:01:49 -07002280void TargetARM32::postLower() {
2281 if (Ctx->getFlags().getOptLevel() == Opt_m1)
2282 return;
Jan Voungb3401d22015-05-18 09:38:21 -07002283 inferTwoAddress();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002284}
2285
2286void TargetARM32::makeRandomRegisterPermutation(
2287 llvm::SmallVectorImpl<int32_t> &Permutation,
2288 const llvm::SmallBitVector &ExcludeRegisters) const {
2289 (void)Permutation;
2290 (void)ExcludeRegisters;
Jan Voungb2d50842015-05-12 09:53:50 -07002291 UnimplementedError(Func->getContext()->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002292}
2293
Jan Voung76bb0be2015-05-14 09:26:19 -07002294void TargetARM32::emit(const ConstantInteger32 *C) const {
2295 if (!ALLOW_DUMP)
2296 return;
2297 Ostream &Str = Ctx->getStrEmit();
2298 Str << getConstantPrefix() << C->getValue();
Jan Voungb36ad9b2015-04-21 17:01:49 -07002299}
2300
Jan Voung76bb0be2015-05-14 09:26:19 -07002301void TargetARM32::emit(const ConstantInteger64 *) const {
2302 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
Jan Voungb36ad9b2015-04-21 17:01:49 -07002303}
Jan Voung76bb0be2015-05-14 09:26:19 -07002304
2305void TargetARM32::emit(const ConstantFloat *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002306 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002307 UnimplementedError(Ctx->getFlags());
2308}
2309
2310void TargetARM32::emit(const ConstantDouble *C) const {
Jan Voungb3401d22015-05-18 09:38:21 -07002311 (void)C;
Jan Voung76bb0be2015-05-14 09:26:19 -07002312 UnimplementedError(Ctx->getFlags());
2313}
2314
2315void TargetARM32::emit(const ConstantUndef *) const {
2316 llvm::report_fatal_error("undef value encountered by emitter.");
2317}
Jan Voungb36ad9b2015-04-21 17:01:49 -07002318
2319TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
2320 : TargetDataLowering(Ctx) {}
2321
John Porto8b1a7052015-06-17 13:20:08 -07002322void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
2323 const IceString &SectionSuffix) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002324 switch (Ctx->getFlags().getOutFileType()) {
2325 case FT_Elf: {
2326 ELFObjectWriter *Writer = Ctx->getObjectWriter();
John Porto8b1a7052015-06-17 13:20:08 -07002327 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002328 } break;
2329 case FT_Asm:
2330 case FT_Iasm: {
2331 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
2332 OstreamLocker L(Ctx);
John Porto8b1a7052015-06-17 13:20:08 -07002333 for (const VariableDeclaration *Var : Vars) {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002334 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
John Porto8b1a7052015-06-17 13:20:08 -07002335 emitGlobal(*Var, SectionSuffix);
Jan Voungb36ad9b2015-04-21 17:01:49 -07002336 }
2337 }
2338 } break;
2339 }
2340}
2341
John Porto0f86d032015-06-15 07:44:27 -07002342void TargetDataARM32::lowerConstants() {
Jan Voungb36ad9b2015-04-21 17:01:49 -07002343 if (Ctx->getFlags().getDisableTranslation())
2344 return;
Jan Voungb2d50842015-05-12 09:53:50 -07002345 UnimplementedError(Ctx->getFlags());
Jan Voungb36ad9b2015-04-21 17:01:49 -07002346}
2347
Jan Voungfb792842015-06-11 15:27:50 -07002348TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
2349 : TargetHeaderLowering(Ctx) {}
2350
2351void TargetHeaderARM32::lower() {
2352 OstreamLocker L(Ctx);
2353 Ostream &Str = Ctx->getStrEmit();
2354 Str << ".syntax unified\n";
2355 // Emit build attributes in format: .eabi_attribute TAG, VALUE.
2356 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
2357 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
2358 //
2359 // Tag_conformance should be be emitted first in a file-scope
2360 // sub-subsection of the first public subsection of the attributes.
2361 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
2362 // Chromebooks are at least A15, but do A9 for higher compat.
2363 Str << ".cpu cortex-a9\n"
2364 << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
2365 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
2366 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
2367 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
2368 // TODO(jvoung): check other CPU features like HW div.
2369 Str << ".fpu neon\n"
2370 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
2371 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
2372 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
2373 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
2374 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
2375 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
2376 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
2377 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
2378 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
2379 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
2380 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
2381 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
2382 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2383 // However, for compatibility with current NaCl LLVM, don't claim that.
2384 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2385}
2386
Jan Voungb36ad9b2015-04-21 17:01:49 -07002387} // end of namespace Ice