blob: e190b5d0f4bf5b41c2114c9288db3cd426c685ed [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
11/// This file implements the TargetLoweringX86Base class, which
12/// consists almost entirely of the lowering sequence for each
13/// high-level instruction.
14///
John Porto7e93c622015-06-23 10:58:57 -070015//===----------------------------------------------------------------------===//
16
17#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
19
John Porto7e93c622015-06-23 10:58:57 -070020#include "IceCfg.h"
21#include "IceCfgNode.h"
22#include "IceClFlags.h"
23#include "IceDefs.h"
24#include "IceELFObjectWriter.h"
25#include "IceGlobalInits.h"
John Porto7e93c622015-06-23 10:58:57 -070026#include "IceLiveness.h"
27#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070028#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070029#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070030#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070031
Andrew Scull87f80c12015-07-20 10:19:16 -070032#include <stack>
33
John Porto7e93c622015-06-23 10:58:57 -070034namespace Ice {
35namespace X86Internal {
36
John Porto921856d2015-07-07 11:56:26 -070037/// A helper class to ease the settings of RandomizationPoolingPause to disable
38/// constant blinding or pooling for some translation phases.
John Porto7e93c622015-06-23 10:58:57 -070039class BoolFlagSaver {
40 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43
44public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; }
47
48private:
49 const bool OldValue;
50 bool &Flag;
51};
52
53template <class MachineTraits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55
56public:
57 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -070060 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -070061 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -070062 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -070063 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -070064 /// IsLiveOut is initialized conservatively to true, and is set to false when
65 /// we encounter an instruction that ends Var's live range. We disable the
66 /// folding optimization when Var is live beyond this basic block. Note that
67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -070069 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0;
74};
75
76template <class MachineTraits> class BoolFolding {
77public:
78 enum BoolFoldingProducerKind {
79 PK_None,
John Porto1d235422015-08-12 12:37:53 -070080 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
John Porto7e93c622015-06-23 10:58:57 -070081 PK_Icmp32,
82 PK_Icmp64,
83 PK_Fcmp,
84 PK_Trunc
85 };
86
Andrew Scull9612d322015-07-06 14:53:25 -070087 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -070088 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -070089 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -070090 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
91
92private:
93 BoolFolding(const BoolFolding &) = delete;
94 BoolFolding &operator=(const BoolFolding &) = delete;
95
96public:
97 BoolFolding() = default;
98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
100 static bool hasComplexLowering(const Inst *Instr);
101 void init(CfgNode *Node);
102 const Inst *getProducerFor(const Operand *Opnd) const;
103 void dump(const Cfg *Func) const;
104
105private:
Andrew Scull9612d322015-07-06 14:53:25 -0700106 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700107 bool containsValid(SizeT VarNum) const {
108 auto Element = Producers.find(VarNum);
109 return Element != Producers.end() && Element->second.Instr != nullptr;
110 }
111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Andrew Scull9612d322015-07-06 14:53:25 -0700112 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Porto7e93c622015-06-23 10:58:57 -0700113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
114};
115
116template <class MachineTraits>
117BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
119
120template <class MachineTraits>
121typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
122BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
123 if (llvm::isa<InstIcmp>(Instr)) {
John Porto1d235422015-08-12 12:37:53 -0700124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -0700125 return PK_Icmp32;
126 return PK_None; // TODO(stichnot): actually PK_Icmp64;
127 }
128 return PK_None; // TODO(stichnot): remove this
129
130 if (llvm::isa<InstFcmp>(Instr))
131 return PK_Fcmp;
132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
133 switch (Cast->getCastKind()) {
134 default:
135 return PK_None;
136 case InstCast::Trunc:
137 return PK_Trunc;
138 }
139 }
140 return PK_None;
141}
142
143template <class MachineTraits>
144typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
145BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
146 if (llvm::isa<InstBr>(Instr))
147 return CK_Br;
148 if (llvm::isa<InstSelect>(Instr))
149 return CK_Select;
150 return CK_None; // TODO(stichnot): remove this
151
152 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
153 switch (Cast->getCastKind()) {
154 default:
155 return CK_None;
156 case InstCast::Sext:
157 return CK_Sext;
158 case InstCast::Zext:
159 return CK_Zext;
160 }
161 }
162 return CK_None;
163}
164
John Porto921856d2015-07-07 11:56:26 -0700165/// Returns true if the producing instruction has a "complex" lowering sequence.
166/// This generally means that its lowering sequence requires more than one
167/// conditional branch, namely 64-bit integer compares and some floating-point
168/// compares. When this is true, and there is more than one consumer, we prefer
169/// to disable the folding optimization because it minimizes branches.
John Porto7e93c622015-06-23 10:58:57 -0700170template <class MachineTraits>
171bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
172 switch (getProducerKind(Instr)) {
173 default:
174 return false;
175 case PK_Icmp64:
176 return true;
177 case PK_Fcmp:
178 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
John Porto5d0acff2015-06-30 15:29:21 -0700179 .C2 != MachineTraits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700180 }
181}
182
183template <class MachineTraits>
184void BoolFolding<MachineTraits>::init(CfgNode *Node) {
185 Producers.clear();
186 for (Inst &Instr : Node->getInsts()) {
187 // Check whether Instr is a valid producer.
188 Variable *Var = Instr.getDest();
189 if (!Instr.isDeleted() // only consider non-deleted instructions
190 && Var // only instructions with an actual dest var
191 && Var->getType() == IceType_i1 // only bool-type dest vars
192 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
193 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
194 }
195 // Check each src variable against the map.
196 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
197 Operand *Src = Instr.getSrc(I);
198 SizeT NumVars = Src->getNumVars();
199 for (SizeT J = 0; J < NumVars; ++J) {
200 const Variable *Var = Src->getVar(J);
201 SizeT VarNum = Var->getIndex();
202 if (containsValid(VarNum)) {
203 if (I != 0 // All valid consumers use Var as the first source operand
204 || getConsumerKind(&Instr) == CK_None // must be white-listed
205 || (Producers[VarNum].IsComplex && // complex can't be multi-use
206 Producers[VarNum].NumUses > 0)) {
207 setInvalid(VarNum);
208 continue;
209 }
210 ++Producers[VarNum].NumUses;
211 if (Instr.isLastUse(Var)) {
212 Producers[VarNum].IsLiveOut = false;
213 }
214 }
215 }
216 }
217 }
218 for (auto &I : Producers) {
219 // Ignore entries previously marked invalid.
220 if (I.second.Instr == nullptr)
221 continue;
222 // Disable the producer if its dest may be live beyond this block.
223 if (I.second.IsLiveOut) {
224 setInvalid(I.first);
225 continue;
226 }
John Porto921856d2015-07-07 11:56:26 -0700227 // Mark as "dead" rather than outright deleting. This is so that other
228 // peephole style optimizations during or before lowering have access to
229 // this instruction in undeleted form. See for example
230 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700231 I.second.Instr->setDead();
232 }
233}
234
235template <class MachineTraits>
236const Inst *
237BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
238 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
239 if (Var == nullptr)
240 return nullptr;
241 SizeT VarNum = Var->getIndex();
242 auto Element = Producers.find(VarNum);
243 if (Element == Producers.end())
244 return nullptr;
245 return Element->second.Instr;
246}
247
248template <class MachineTraits>
249void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700250 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700251 return;
252 OstreamLocker L(Func->getContext());
253 Ostream &Str = Func->getContext()->getStrDump();
254 for (auto &I : Producers) {
255 if (I.second.Instr == nullptr)
256 continue;
257 Str << "Found foldable producer:\n ";
258 I.second.Instr->dump(Func);
259 Str << "\n";
260 }
261}
262
263template <class Machine>
264void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
265 FoldingInfo.init(Node);
266 FoldingInfo.dump(Func);
267}
268
269template <class Machine>
270TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
John Porto5aeed952015-07-21 13:39:09 -0700271 : TargetLowering(Func) {
John Porto7e93c622015-06-23 10:58:57 -0700272 static_assert(
273 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
274 (TargetInstructionSet::X86InstructionSet_End -
275 TargetInstructionSet::X86InstructionSet_Begin),
276 "Traits::InstructionSet range different from TargetInstructionSet");
277 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
278 TargetInstructionSet::BaseInstructionSet) {
279 InstructionSet = static_cast<typename Traits::InstructionSet>(
280 (Func->getContext()->getFlags().getTargetInstructionSet() -
281 TargetInstructionSet::X86InstructionSet_Begin) +
282 Traits::InstructionSet::Begin);
283 }
John Porto921856d2015-07-07 11:56:26 -0700284 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
285 // initialize in some sort of static initializer for the class.
John Porto5d0acff2015-06-30 15:29:21 -0700286 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
287 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
288 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
289 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
290 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
291 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
John Porto921856d2015-07-07 11:56:26 -0700292
293 Traits::initRegisterSet(&IntegerRegisters, &IntegerRegistersI8,
294 &FloatRegisters, &VectorRegisters, &ScratchRegs);
295
John Porto7e93c622015-06-23 10:58:57 -0700296 TypeToRegisterSet[IceType_void] = InvalidRegisters;
297 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
298 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
299 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
300 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
301 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
302 TypeToRegisterSet[IceType_f32] = FloatRegisters;
303 TypeToRegisterSet[IceType_f64] = FloatRegisters;
304 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
305 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
306 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
307 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
308 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
309 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
310 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
311}
312
313template <class Machine> void TargetX86Base<Machine>::translateO2() {
314 TimerMarker T(TimerStack::TT_O2, Func);
315
316 if (!Ctx->getFlags().getPhiEdgeSplit()) {
317 // Lower Phi instructions.
318 Func->placePhiLoads();
319 if (Func->hasError())
320 return;
321 Func->placePhiStores();
322 if (Func->hasError())
323 return;
324 Func->deletePhis();
325 if (Func->hasError())
326 return;
327 Func->dump("After Phi lowering");
328 }
329
330 // Address mode optimization.
331 Func->getVMetadata()->init(VMK_SingleDefs);
332 Func->doAddressOpt();
333
334 // Find read-modify-write opportunities. Do this after address mode
335 // optimization so that doAddressOpt() doesn't need to be applied to RMW
336 // instructions as well.
337 findRMW();
338 Func->dump("After RMW transform");
339
340 // Argument lowering
341 Func->doArgLowering();
342
John Porto921856d2015-07-07 11:56:26 -0700343 // Target lowering. This requires liveness analysis for some parts of the
344 // lowering decisions, such as compare/branch fusing. If non-lightweight
345 // liveness analysis is used, the instructions need to be renumbered first
346 // TODO: This renumbering should only be necessary if we're actually
347 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700348 Func->renumberInstructions();
349 if (Func->hasError())
350 return;
351
John Porto921856d2015-07-07 11:56:26 -0700352 // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
353 // livenessLightweight(). However, for some reason that slows down the rest
354 // of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700355 Func->liveness(Liveness_Basic);
356 if (Func->hasError())
357 return;
358 Func->dump("After x86 address mode opt");
359
360 // Disable constant blinding or pooling for load optimization.
361 {
362 BoolFlagSaver B(RandomizationPoolingPaused, true);
363 doLoadOpt();
364 }
365 Func->genCode();
366 if (Func->hasError())
367 return;
368 Func->dump("After x86 codegen");
369
John Porto921856d2015-07-07 11:56:26 -0700370 // Register allocation. This requires instruction renumbering and full
371 // liveness analysis.
John Porto7e93c622015-06-23 10:58:57 -0700372 Func->renumberInstructions();
373 if (Func->hasError())
374 return;
375 Func->liveness(Liveness_Intervals);
376 if (Func->hasError())
377 return;
John Porto921856d2015-07-07 11:56:26 -0700378 // Validate the live range computations. The expensive validation call is
379 // deliberately only made when assertions are enabled.
John Porto7e93c622015-06-23 10:58:57 -0700380 assert(Func->validateLiveness());
John Porto921856d2015-07-07 11:56:26 -0700381 // The post-codegen dump is done here, after liveness analysis and associated
382 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700383 Func->dump("After initial x8632 codegen");
384 Func->getVMetadata()->init(VMK_All);
385 regAlloc(RAK_Global);
386 if (Func->hasError())
387 return;
388 Func->dump("After linear scan regalloc");
389
390 if (Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700391 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700392 Func->dump("After advanced Phi lowering");
393 }
394
395 // Stack frame mapping.
396 Func->genFrame();
397 if (Func->hasError())
398 return;
399 Func->dump("After stack frame mapping");
400
401 Func->contractEmptyNodes();
402 Func->reorderNodes();
403
Qining Lu969f6a32015-07-31 09:58:34 -0700404 // Shuffle basic block order if -reorder-basic-blocks is enabled.
405 Func->shuffleNodes();
406
John Porto921856d2015-07-07 11:56:26 -0700407 // Branch optimization. This needs to be done just before code emission. In
408 // particular, no transformations that insert or reorder CfgNodes should be
409 // done after branch optimization. We go ahead and do it before nop insertion
410 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700411 Func->doBranchOpt();
412 Func->dump("After branch optimization");
413
Qining Lu969f6a32015-07-31 09:58:34 -0700414 // Nop insertion if -nop-insertion is enabled.
415 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700416
417 // Mark nodes that require sandbox alignment
418 if (Ctx->getFlags().getUseSandboxing())
419 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700420}
421
422template <class Machine> void TargetX86Base<Machine>::translateOm1() {
423 TimerMarker T(TimerStack::TT_Om1, Func);
424
425 Func->placePhiLoads();
426 if (Func->hasError())
427 return;
428 Func->placePhiStores();
429 if (Func->hasError())
430 return;
431 Func->deletePhis();
432 if (Func->hasError())
433 return;
434 Func->dump("After Phi lowering");
435
436 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700437 Func->genCode();
438 if (Func->hasError())
439 return;
440 Func->dump("After initial x8632 codegen");
441
442 regAlloc(RAK_InfOnly);
443 if (Func->hasError())
444 return;
445 Func->dump("After regalloc of infinite-weight variables");
446
447 Func->genFrame();
448 if (Func->hasError())
449 return;
450 Func->dump("After stack frame mapping");
451
Qining Lu969f6a32015-07-31 09:58:34 -0700452 // Shuffle basic block order if -reorder-basic-blocks is enabled.
453 Func->shuffleNodes();
454
455 // Nop insertion if -nop-insertion is enabled.
456 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700457
458 // Mark nodes that require sandbox alignment
459 if (Ctx->getFlags().getUseSandboxing())
460 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700461}
462
John Porto5aeed952015-07-21 13:39:09 -0700463inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700464 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700465 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700466 if (isVectorType(Ty))
467 return false;
468 bool isI64 = Ty == IceType_i64;
469
470 switch (Arith->getOp()) {
471 // Not handled for lack of simple lowering:
472 // shift on i64
473 // mul, udiv, urem, sdiv, srem, frem
474 // Not handled for lack of RMW instructions:
475 // fadd, fsub, fmul, fdiv (also vector types)
476 default:
477 return false;
478 case InstArithmetic::Add:
479 case InstArithmetic::Sub:
480 case InstArithmetic::And:
481 case InstArithmetic::Or:
482 case InstArithmetic::Xor:
483 return true;
484 case InstArithmetic::Shl:
485 case InstArithmetic::Lshr:
486 case InstArithmetic::Ashr:
487 return false; // TODO(stichnot): implement
488 return !isI64;
489 }
490}
491
John Porto921856d2015-07-07 11:56:26 -0700492template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700493bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
494 if (A == B)
495 return true;
John Porto921856d2015-07-07 11:56:26 -0700496 if (auto *MemA = llvm::dyn_cast<
497 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
498 if (auto *MemB = llvm::dyn_cast<
499 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
John Porto7e93c622015-06-23 10:58:57 -0700500 return MemA->getBase() == MemB->getBase() &&
501 MemA->getOffset() == MemB->getOffset() &&
502 MemA->getIndex() == MemB->getIndex() &&
503 MemA->getShift() == MemB->getShift() &&
504 MemA->getSegmentRegister() == MemB->getSegmentRegister();
505 }
506 }
507 return false;
508}
509
510template <class Machine> void TargetX86Base<Machine>::findRMW() {
511 Func->dump("Before RMW");
512 OstreamLocker L(Func->getContext());
513 Ostream &Str = Func->getContext()->getStrDump();
514 for (CfgNode *Node : Func->getNodes()) {
515 // Walk through the instructions, considering each sequence of 3
516 // instructions, and look for the particular RMW pattern. Note that this
517 // search can be "broken" (false negatives) if there are intervening deleted
518 // instructions, or intervening instructions that could be safely moved out
519 // of the way to reveal an RMW pattern.
520 auto E = Node->getInsts().end();
521 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
522 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
523 // Make I3 skip over deleted instructions.
524 while (I3 != E && I3->isDeleted())
525 ++I3;
526 if (I1 == E || I2 == E || I3 == E)
527 continue;
528 assert(!I1->isDeleted());
529 assert(!I2->isDeleted());
530 assert(!I3->isDeleted());
531 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
532 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
533 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
534 // Look for:
535 // a = Load addr
536 // b = <op> a, other
537 // Store b, addr
538 // Change to:
539 // a = Load addr
540 // b = <op> a, other
541 // x = FakeDef
542 // RMW <op>, addr, other, x
543 // b = Store b, addr, x
544 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
545 // called on the updated Store instruction, to avoid liveness
546 // problems later.
547 //
548 // With this transformation, the Store instruction acquires a Dest
549 // variable and is now subject to dead code elimination if there are
550 // no more uses of "b". Variable "x" is a beacon for determining
551 // whether the Store instruction gets dead-code eliminated. If the
552 // Store instruction is eliminated, then it must be the case that
553 // the RMW instruction ends x's live range, and therefore the RMW
554 // instruction will be retained and later lowered. On the other
555 // hand, if the RMW instruction does not end x's live range, then
556 // the Store instruction must still be present, and therefore the
557 // RMW instruction is ignored during lowering because it is
558 // redundant with the Store instruction.
559 //
560 // Note that if "a" has further uses, the RMW transformation may
561 // still trigger, resulting in two loads and one store, which is
562 // worse than the original one load and one store. However, this is
563 // probably rare, and caching probably keeps it just as fast.
John Porto921856d2015-07-07 11:56:26 -0700564 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
565 Store->getAddr()))
John Porto7e93c622015-06-23 10:58:57 -0700566 continue;
567 Operand *ArithSrcFromLoad = Arith->getSrc(0);
568 Operand *ArithSrcOther = Arith->getSrc(1);
569 if (ArithSrcFromLoad != Load->getDest()) {
570 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
571 continue;
572 std::swap(ArithSrcFromLoad, ArithSrcOther);
573 }
574 if (Arith->getDest() != Store->getData())
575 continue;
576 if (!canRMW(Arith))
577 continue;
578 if (Func->isVerbose(IceV_RMW)) {
579 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
580 Load->dump(Func);
581 Str << "\n ";
582 Arith->dump(Func);
583 Str << "\n ";
584 Store->dump(Func);
585 Str << "\n";
586 }
John Porto5aeed952015-07-21 13:39:09 -0700587 Variable *Beacon = Func->makeVariable(IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -0700588 Beacon->setWeight(0);
589 Store->setRmwBeacon(Beacon);
590 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
591 Node->getInsts().insert(I3, BeaconDef);
John Porto921856d2015-07-07 11:56:26 -0700592 auto *RMW = Traits::Insts::FakeRMW::create(
John Porto7e93c622015-06-23 10:58:57 -0700593 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
594 Node->getInsts().insert(I3, RMW);
595 }
596 }
597 }
598 }
599 }
600}
601
602// Converts a ConstantInteger32 operand into its constant value, or
603// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700604inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
John Porto7e93c622015-06-23 10:58:57 -0700605 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
606 return Integer->getValue();
607 return Intrinsics::MemoryOrderInvalid;
608}
609
Andrew Scull9612d322015-07-06 14:53:25 -0700610/// Determines whether the dest of a Load instruction can be folded
611/// into one of the src operands of a 2-operand instruction. This is
612/// true as long as the load dest matches exactly one of the binary
613/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
614/// the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700615inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
616 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700617 if (Src0 == LoadDest && Src1 != LoadDest) {
618 Src0 = LoadSrc;
619 return true;
620 }
621 if (Src0 != LoadDest && Src1 == LoadDest) {
622 Src1 = LoadSrc;
623 return true;
624 }
625 return false;
626}
627
628template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
629 for (CfgNode *Node : Func->getNodes()) {
630 Context.init(Node);
631 while (!Context.atEnd()) {
632 Variable *LoadDest = nullptr;
633 Operand *LoadSrc = nullptr;
634 Inst *CurInst = Context.getCur();
635 Inst *Next = Context.getNextInst();
636 // Determine whether the current instruction is a Load
637 // instruction or equivalent.
638 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
639 // An InstLoad always qualifies.
640 LoadDest = Load->getDest();
641 const bool DoLegalize = false;
642 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
643 LoadDest->getType(), DoLegalize);
644 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
645 // An AtomicLoad intrinsic qualifies as long as it has a valid
646 // memory ordering, and can be implemented in a single
John Porto1d235422015-08-12 12:37:53 -0700647 // instruction (i.e., not i64 on x86-32).
John Porto7e93c622015-06-23 10:58:57 -0700648 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
649 if (ID == Intrinsics::AtomicLoad &&
John Porto1d235422015-08-12 12:37:53 -0700650 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
John Porto7e93c622015-06-23 10:58:57 -0700651 Intrinsics::isMemoryOrderValid(
652 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
653 LoadDest = Intrin->getDest();
654 const bool DoLegalize = false;
655 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
656 DoLegalize);
657 }
658 }
659 // A Load instruction can be folded into the following
660 // instruction only if the following instruction ends the Load's
661 // Dest variable's live range.
662 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
663 assert(LoadSrc);
664 Inst *NewInst = nullptr;
665 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
666 Operand *Src0 = Arith->getSrc(0);
667 Operand *Src1 = Arith->getSrc(1);
668 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
669 NewInst = InstArithmetic::create(Func, Arith->getOp(),
670 Arith->getDest(), Src0, Src1);
671 }
672 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
673 Operand *Src0 = Icmp->getSrc(0);
674 Operand *Src1 = Icmp->getSrc(1);
675 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
676 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
677 Icmp->getDest(), Src0, Src1);
678 }
679 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
680 Operand *Src0 = Fcmp->getSrc(0);
681 Operand *Src1 = Fcmp->getSrc(1);
682 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
683 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
684 Fcmp->getDest(), Src0, Src1);
685 }
686 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
687 Operand *Src0 = Select->getTrueOperand();
688 Operand *Src1 = Select->getFalseOperand();
689 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
690 NewInst = InstSelect::create(Func, Select->getDest(),
691 Select->getCondition(), Src0, Src1);
692 }
693 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
694 // The load dest can always be folded into a Cast
695 // instruction.
696 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
697 if (Src0 == LoadDest) {
698 NewInst = InstCast::create(Func, Cast->getCastKind(),
699 Cast->getDest(), LoadSrc);
700 }
701 }
702 if (NewInst) {
703 CurInst->setDeleted();
704 Next->setDeleted();
705 Context.insert(NewInst);
706 // Update NewInst->LiveRangesEnded so that target lowering
707 // may benefit. Also update NewInst->HasSideEffects.
708 NewInst->spliceLivenessInfo(Next, CurInst);
709 }
710 }
711 Context.advanceCur();
712 Context.advanceNext();
713 }
714 }
715 Func->dump("After load optimization");
716}
717
718template <class Machine>
719bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
John Porto921856d2015-07-07 11:56:26 -0700720 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700721 return Br->optimizeBranch(NextNode);
722 }
723 return false;
724}
725
726template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700727Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
John Porto1d235422015-08-12 12:37:53 -0700728 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
729 if (RegNum == Traits::RegisterSet::Reg_esp ||
730 RegNum == Traits::RegisterSet::Reg_ebp)
731 Ty = Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -0700732 if (Ty == IceType_void)
733 Ty = IceType_i32;
734 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700735 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
John Porto7e93c622015-06-23 10:58:57 -0700736 assert(RegNum < PhysicalRegisters[Ty].size());
737 Variable *Reg = PhysicalRegisters[Ty][RegNum];
738 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700739 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700740 Reg->setRegNum(RegNum);
741 PhysicalRegisters[Ty][RegNum] = Reg;
742 // Specially mark esp as an "argument" so that it is considered
743 // live upon function entry.
John Porto5d0acff2015-06-30 15:29:21 -0700744 if (RegNum == Traits::RegisterSet::Reg_esp) {
John Porto7e93c622015-06-23 10:58:57 -0700745 Func->addImplicitArg(Reg);
746 Reg->setIgnoreLiveness();
747 }
748 }
749 return Reg;
750}
751
752template <class Machine>
753IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
John Porto921856d2015-07-07 11:56:26 -0700754 return Traits::getRegName(RegNum, Ty);
John Porto7e93c622015-06-23 10:58:57 -0700755}
756
757template <class Machine>
758void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700759 if (!BuildDefs::dump())
760 return;
John Porto7e93c622015-06-23 10:58:57 -0700761 Ostream &Str = Ctx->getStrEmit();
762 if (Var->hasReg()) {
763 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
764 return;
765 }
766 if (Var->getWeight().isInf()) {
767 llvm_unreachable("Infinite-weight Variable has no register assigned");
768 }
769 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700770 int32_t BaseRegNum = Var->getBaseRegNum();
771 if (BaseRegNum == Variable::NoRegister) {
772 BaseRegNum = getFrameOrStackReg();
773 if (!hasFramePointer())
774 Offset += getStackAdjustment();
775 }
John Porto7e93c622015-06-23 10:58:57 -0700776 if (Offset)
777 Str << Offset;
John Porto1d235422015-08-12 12:37:53 -0700778 const Type FrameSPTy = Traits::WordType;
Jan Voung28068ad2015-07-31 12:58:46 -0700779 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
John Porto7e93c622015-06-23 10:58:57 -0700780}
781
782template <class Machine>
John Porto5d0acff2015-06-30 15:29:21 -0700783typename TargetX86Base<Machine>::Traits::Address
John Porto7e93c622015-06-23 10:58:57 -0700784TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
785 if (Var->hasReg())
786 llvm_unreachable("Stack Variable has a register assigned");
787 if (Var->getWeight().isInf()) {
788 llvm_unreachable("Infinite-weight Variable has no register assigned");
789 }
790 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700791 int32_t BaseRegNum = Var->getBaseRegNum();
792 if (Var->getBaseRegNum() == Variable::NoRegister) {
793 BaseRegNum = getFrameOrStackReg();
794 if (!hasFramePointer())
795 Offset += getStackAdjustment();
796 }
John Porto5d0acff2015-06-30 15:29:21 -0700797 return typename Traits::Address(
Jan Voung28068ad2015-07-31 12:58:46 -0700798 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
John Porto7e93c622015-06-23 10:58:57 -0700799}
800
Andrew Scull9612d322015-07-06 14:53:25 -0700801/// Helper function for addProlog().
802///
803/// This assumes Arg is an argument passed on the stack. This sets the
804/// frame offset for Arg and updates InArgsSizeBytes according to Arg's
805/// width. For an I64 arg that has been split into Lo and Hi components,
806/// it calls itself recursively on the components, taking care to handle
807/// Lo first because of the little-endian architecture. Lastly, this
808/// function generates an instruction to copy Arg into its assigned
809/// register if applicable.
John Porto7e93c622015-06-23 10:58:57 -0700810template <class Machine>
811void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
812 Variable *FramePtr,
813 size_t BasicFrameOffset,
814 size_t &InArgsSizeBytes) {
815 Variable *Lo = Arg->getLo();
816 Variable *Hi = Arg->getHi();
817 Type Ty = Arg->getType();
John Porto1d235422015-08-12 12:37:53 -0700818 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -0700819 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
820 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
821 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
822 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
823 return;
824 }
825 if (isVectorType(Ty)) {
826 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
827 }
828 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
829 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
830 if (Arg->hasReg()) {
John Porto1d235422015-08-12 12:37:53 -0700831 assert(Ty != IceType_i64 || Traits::Is64Bit);
John Porto921856d2015-07-07 11:56:26 -0700832 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700833 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
834 if (isVectorType(Arg->getType())) {
835 _movp(Arg, Mem);
836 } else {
837 _mov(Arg, Mem);
838 }
John Porto921856d2015-07-07 11:56:26 -0700839 // This argument-copying instruction uses an explicit Traits::X86OperandMem
840 // operand instead of a Variable, so its fill-from-stack operation has to be
841 // tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -0700842 Ctx->statsUpdateFills();
843 }
844}
845
846template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
John Porto1d235422015-08-12 12:37:53 -0700847 return Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -0700848}
849
John Porto1d235422015-08-12 12:37:53 -0700850template <class Machine>
851template <typename T>
852typename std::enable_if<!T::Is64Bit, void>::type
853TargetX86Base<Machine>::split64(Variable *Var) {
John Porto7e93c622015-06-23 10:58:57 -0700854 switch (Var->getType()) {
855 default:
856 return;
857 case IceType_i64:
858 // TODO: Only consider F64 if we need to push each half when
859 // passing as an argument to a function call. Note that each half
860 // is still typed as I32.
861 case IceType_f64:
862 break;
863 }
864 Variable *Lo = Var->getLo();
865 Variable *Hi = Var->getHi();
866 if (Lo) {
867 assert(Hi);
868 return;
869 }
870 assert(Hi == nullptr);
John Porto5aeed952015-07-21 13:39:09 -0700871 Lo = Func->makeVariable(IceType_i32);
872 Hi = Func->makeVariable(IceType_i32);
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700873 if (BuildDefs::dump()) {
John Porto7e93c622015-06-23 10:58:57 -0700874 Lo->setName(Func, Var->getName(Func) + "__lo");
875 Hi->setName(Func, Var->getName(Func) + "__hi");
876 }
877 Var->setLoHi(Lo, Hi);
878 if (Var->getIsArg()) {
879 Lo->setIsArg();
880 Hi->setIsArg();
881 }
882}
883
884template <class Machine>
John Porto1d235422015-08-12 12:37:53 -0700885template <typename T>
886typename std::enable_if<!T::Is64Bit, Operand>::type *
887TargetX86Base<Machine>::loOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -0700888 assert(Operand->getType() == IceType_i64 ||
889 Operand->getType() == IceType_f64);
890 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
891 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -0700892 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700893 split64(Var);
894 return Var->getLo();
895 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700896 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
897 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -0700898 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -0700899 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -0700900 return legalize(ConstInt);
901 }
John Porto921856d2015-07-07 11:56:26 -0700902 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
903 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700904 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
905 Mem->getShift(), Mem->getSegmentRegister());
906 // Test if we should randomize or pool the offset, if so randomize it or
907 // pool it then create mem operand with the blinded/pooled constant.
908 // Otherwise, return the mem operand as ordinary mem operand.
909 return legalize(MemOperand);
910 }
911 llvm_unreachable("Unsupported operand type");
912 return nullptr;
913}
914
915template <class Machine>
John Porto1d235422015-08-12 12:37:53 -0700916template <typename T>
917typename std::enable_if<!T::Is64Bit, Operand>::type *
918TargetX86Base<Machine>::hiOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -0700919 assert(Operand->getType() == IceType_i64 ||
920 Operand->getType() == IceType_f64);
921 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
922 return Operand;
Jan Voungfbdd2442015-07-15 12:36:20 -0700923 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700924 split64(Var);
925 return Var->getHi();
926 }
Jan Voungfbdd2442015-07-15 12:36:20 -0700927 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
928 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -0700929 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -0700930 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -0700931 return legalize(ConstInt);
932 }
John Porto921856d2015-07-07 11:56:26 -0700933 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700934 Constant *Offset = Mem->getOffset();
935 if (Offset == nullptr) {
936 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -0700937 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -0700938 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -0700939 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -0700940 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
941 Offset =
942 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
943 SymOffset->getSuppressMangling());
944 }
John Porto921856d2015-07-07 11:56:26 -0700945 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700946 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
947 Mem->getShift(), Mem->getSegmentRegister());
948 // Test if the Offset is an eligible i32 constants for randomization and
949 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
950 // operand.
951 return legalize(MemOperand);
952 }
953 llvm_unreachable("Unsupported operand type");
954 return nullptr;
955}
956
957template <class Machine>
958llvm::SmallBitVector
959TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
960 RegSetMask Exclude) const {
John Porto921856d2015-07-07 11:56:26 -0700961 return Traits::getRegisterSet(Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -0700962}
963
964template <class Machine>
965void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
966 IsEbpBasedFrame = true;
967 // Conservatively require the stack to be aligned. Some stack
968 // adjustment operations implemented below assume that the stack is
969 // aligned before the alloca. All the alloca code ensures that the
970 // stack alignment is preserved after the alloca. The stack alignment
971 // restriction can be relaxed in some cases.
972 NeedsStackAlignment = true;
973
974 // TODO(stichnot): minimize the number of adjustments of esp, etc.
John Porto5d0acff2015-06-30 15:29:21 -0700975 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -0700976 Operand *TotalSize = legalize(Inst->getSizeInBytes());
977 Variable *Dest = Inst->getDest();
978 uint32_t AlignmentParam = Inst->getAlignInBytes();
979 // For default align=0, set it to the real value 1, to avoid any
980 // bit-manipulation problems below.
981 AlignmentParam = std::max(AlignmentParam, 1u);
982
983 // LLVM enforces power of 2 alignment.
984 assert(llvm::isPowerOf2_32(AlignmentParam));
985 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
986
987 uint32_t Alignment =
988 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
989 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
990 _and(esp, Ctx->getConstantInt32(-Alignment));
991 }
992 if (const auto *ConstantTotalSize =
993 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
994 uint32_t Value = ConstantTotalSize->getValue();
995 Value = Utils::applyAlignment(Value, Alignment);
996 _sub(esp, Ctx->getConstantInt32(Value));
997 } else {
998 // Non-constant sizes need to be adjusted to the next highest
999 // multiple of the required alignment at runtime.
1000 Variable *T = makeReg(IceType_i32);
1001 _mov(T, TotalSize);
1002 _add(T, Ctx->getConstantInt32(Alignment - 1));
1003 _and(T, Ctx->getConstantInt32(-Alignment));
1004 _sub(esp, T);
1005 }
1006 _mov(Dest, esp);
1007}
1008
Andrew Scull9612d322015-07-06 14:53:25 -07001009/// Strength-reduce scalar integer multiplication by a constant (for
1010/// i32 or narrower) for certain constants. The lea instruction can be
1011/// used to multiply by 3, 5, or 9, and the lsh instruction can be used
1012/// to multiply by powers of 2. These can be combined such that
1013/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1014/// combined with left-shifting by 2.
John Porto7e93c622015-06-23 10:58:57 -07001015template <class Machine>
1016bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1017 int32_t Src1) {
1018 // Disable this optimization for Om1 and O0, just to keep things
1019 // simple there.
1020 if (Ctx->getFlags().getOptLevel() < Opt_1)
1021 return false;
1022 Type Ty = Dest->getType();
1023 Variable *T = nullptr;
1024 if (Src1 == -1) {
1025 _mov(T, Src0);
1026 _neg(T);
1027 _mov(Dest, T);
1028 return true;
1029 }
1030 if (Src1 == 0) {
1031 _mov(Dest, Ctx->getConstantZero(Ty));
1032 return true;
1033 }
1034 if (Src1 == 1) {
1035 _mov(T, Src0);
1036 _mov(Dest, T);
1037 return true;
1038 }
1039 // Don't bother with the edge case where Src1 == MININT.
1040 if (Src1 == -Src1)
1041 return false;
1042 const bool Src1IsNegative = Src1 < 0;
1043 if (Src1IsNegative)
1044 Src1 = -Src1;
1045 uint32_t Count9 = 0;
1046 uint32_t Count5 = 0;
1047 uint32_t Count3 = 0;
1048 uint32_t Count2 = 0;
1049 uint32_t CountOps = 0;
1050 while (Src1 > 1) {
1051 if (Src1 % 9 == 0) {
1052 ++CountOps;
1053 ++Count9;
1054 Src1 /= 9;
1055 } else if (Src1 % 5 == 0) {
1056 ++CountOps;
1057 ++Count5;
1058 Src1 /= 5;
1059 } else if (Src1 % 3 == 0) {
1060 ++CountOps;
1061 ++Count3;
1062 Src1 /= 3;
1063 } else if (Src1 % 2 == 0) {
1064 if (Count2 == 0)
1065 ++CountOps;
1066 ++Count2;
1067 Src1 /= 2;
1068 } else {
1069 return false;
1070 }
1071 }
1072 // Lea optimization only works for i16 and i32 types, not i8.
1073 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1074 return false;
1075 // Limit the number of lea/shl operations for a single multiply, to
1076 // a somewhat arbitrary choice of 3.
1077 const uint32_t MaxOpsForOptimizedMul = 3;
1078 if (CountOps > MaxOpsForOptimizedMul)
1079 return false;
1080 _mov(T, Src0);
1081 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1082 for (uint32_t i = 0; i < Count9; ++i) {
1083 const uint16_t Shift = 3; // log2(9-1)
John Porto921856d2015-07-07 11:56:26 -07001084 _lea(T,
1085 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001086 _set_dest_nonkillable();
1087 }
1088 for (uint32_t i = 0; i < Count5; ++i) {
1089 const uint16_t Shift = 2; // log2(5-1)
John Porto921856d2015-07-07 11:56:26 -07001090 _lea(T,
1091 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001092 _set_dest_nonkillable();
1093 }
1094 for (uint32_t i = 0; i < Count3; ++i) {
1095 const uint16_t Shift = 1; // log2(3-1)
John Porto921856d2015-07-07 11:56:26 -07001096 _lea(T,
1097 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001098 _set_dest_nonkillable();
1099 }
1100 if (Count2) {
1101 _shl(T, Ctx->getConstantInt(Ty, Count2));
1102 }
1103 if (Src1IsNegative)
1104 _neg(T);
1105 _mov(Dest, T);
1106 return true;
1107}
1108
1109template <class Machine>
1110void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1111 Variable *Dest = Inst->getDest();
1112 Operand *Src0 = legalize(Inst->getSrc(0));
1113 Operand *Src1 = legalize(Inst->getSrc(1));
1114 if (Inst->isCommutative()) {
1115 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1116 std::swap(Src0, Src1);
1117 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1118 std::swap(Src0, Src1);
1119 }
John Porto1d235422015-08-12 12:37:53 -07001120 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1121 // These x86-32 helper-call-involved instructions are lowered in this
John Porto7e93c622015-06-23 10:58:57 -07001122 // separate switch. This is because loOperand() and hiOperand()
1123 // may insert redundant instructions for constant blinding and
1124 // pooling. Such redundant instructions will fail liveness analysis
1125 // under -Om1 setting. And, actually these arguments do not need
1126 // to be processed with loOperand() and hiOperand() to be used.
1127 switch (Inst->getOp()) {
1128 case InstArithmetic::Udiv: {
1129 const SizeT MaxSrcs = 2;
1130 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1131 Call->addArg(Inst->getSrc(0));
1132 Call->addArg(Inst->getSrc(1));
1133 lowerCall(Call);
1134 return;
1135 }
1136 case InstArithmetic::Sdiv: {
1137 const SizeT MaxSrcs = 2;
1138 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1139 Call->addArg(Inst->getSrc(0));
1140 Call->addArg(Inst->getSrc(1));
1141 lowerCall(Call);
1142 return;
1143 }
1144 case InstArithmetic::Urem: {
1145 const SizeT MaxSrcs = 2;
1146 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1147 Call->addArg(Inst->getSrc(0));
1148 Call->addArg(Inst->getSrc(1));
1149 lowerCall(Call);
1150 return;
1151 }
1152 case InstArithmetic::Srem: {
1153 const SizeT MaxSrcs = 2;
1154 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1155 Call->addArg(Inst->getSrc(0));
1156 Call->addArg(Inst->getSrc(1));
1157 lowerCall(Call);
1158 return;
1159 }
1160 default:
1161 break;
1162 }
1163
1164 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1165 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1166 Operand *Src0Lo = loOperand(Src0);
1167 Operand *Src0Hi = hiOperand(Src0);
1168 Operand *Src1Lo = loOperand(Src1);
1169 Operand *Src1Hi = hiOperand(Src1);
1170 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1171 switch (Inst->getOp()) {
1172 case InstArithmetic::_num:
1173 llvm_unreachable("Unknown arithmetic operator");
1174 break;
1175 case InstArithmetic::Add:
1176 _mov(T_Lo, Src0Lo);
1177 _add(T_Lo, Src1Lo);
1178 _mov(DestLo, T_Lo);
1179 _mov(T_Hi, Src0Hi);
1180 _adc(T_Hi, Src1Hi);
1181 _mov(DestHi, T_Hi);
1182 break;
1183 case InstArithmetic::And:
1184 _mov(T_Lo, Src0Lo);
1185 _and(T_Lo, Src1Lo);
1186 _mov(DestLo, T_Lo);
1187 _mov(T_Hi, Src0Hi);
1188 _and(T_Hi, Src1Hi);
1189 _mov(DestHi, T_Hi);
1190 break;
1191 case InstArithmetic::Or:
1192 _mov(T_Lo, Src0Lo);
1193 _or(T_Lo, Src1Lo);
1194 _mov(DestLo, T_Lo);
1195 _mov(T_Hi, Src0Hi);
1196 _or(T_Hi, Src1Hi);
1197 _mov(DestHi, T_Hi);
1198 break;
1199 case InstArithmetic::Xor:
1200 _mov(T_Lo, Src0Lo);
1201 _xor(T_Lo, Src1Lo);
1202 _mov(DestLo, T_Lo);
1203 _mov(T_Hi, Src0Hi);
1204 _xor(T_Hi, Src1Hi);
1205 _mov(DestHi, T_Hi);
1206 break;
1207 case InstArithmetic::Sub:
1208 _mov(T_Lo, Src0Lo);
1209 _sub(T_Lo, Src1Lo);
1210 _mov(DestLo, T_Lo);
1211 _mov(T_Hi, Src0Hi);
1212 _sbb(T_Hi, Src1Hi);
1213 _mov(DestHi, T_Hi);
1214 break;
1215 case InstArithmetic::Mul: {
1216 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07001217 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1218 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001219 // gcc does the following:
1220 // a=b*c ==>
1221 // t1 = b.hi; t1 *=(imul) c.lo
1222 // t2 = c.hi; t2 *=(imul) b.lo
1223 // t3:eax = b.lo
1224 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1225 // a.lo = t4.lo
1226 // t4.hi += t1
1227 // t4.hi += t2
1228 // a.hi = t4.hi
1229 // The mul instruction cannot take an immediate operand.
1230 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1231 _mov(T_1, Src0Hi);
1232 _imul(T_1, Src1Lo);
1233 _mov(T_2, Src1Hi);
1234 _imul(T_2, Src0Lo);
John Porto5d0acff2015-06-30 15:29:21 -07001235 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001236 _mul(T_4Lo, T_3, Src1Lo);
1237 // The mul instruction produces two dest variables, edx:eax. We
1238 // create a fake definition of edx to account for this.
1239 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1240 _mov(DestLo, T_4Lo);
1241 _add(T_4Hi, T_1);
1242 _add(T_4Hi, T_2);
1243 _mov(DestHi, T_4Hi);
1244 } break;
1245 case InstArithmetic::Shl: {
1246 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1247 // gcc does the following:
1248 // a=b<<c ==>
1249 // t1:ecx = c.lo & 0xff
1250 // t2 = b.lo
1251 // t3 = b.hi
1252 // t3 = shld t3, t2, t1
1253 // t2 = shl t2, t1
1254 // test t1, 0x20
1255 // je L1
1256 // use(t3)
1257 // t3 = t2
1258 // t2 = 0
1259 // L1:
1260 // a.lo = t2
1261 // a.hi = t3
1262 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1263 Constant *BitTest = Ctx->getConstantInt32(0x20);
1264 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto921856d2015-07-07 11:56:26 -07001265 typename Traits::Insts::Label *Label =
1266 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001267 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001268 _mov(T_2, Src0Lo);
1269 _mov(T_3, Src0Hi);
1270 _shld(T_3, T_2, T_1);
1271 _shl(T_2, T_1);
1272 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001273 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001274 // T_2 and T_3 are being assigned again because of the
1275 // intra-block control flow, so we need the _mov_nonkillable
1276 // variant to avoid liveness problems.
1277 _mov_nonkillable(T_3, T_2);
1278 _mov_nonkillable(T_2, Zero);
1279 Context.insert(Label);
1280 _mov(DestLo, T_2);
1281 _mov(DestHi, T_3);
1282 } break;
1283 case InstArithmetic::Lshr: {
1284 // a=b>>c (unsigned) ==>
1285 // t1:ecx = c.lo & 0xff
1286 // t2 = b.lo
1287 // t3 = b.hi
1288 // t2 = shrd t2, t3, t1
1289 // t3 = shr t3, t1
1290 // test t1, 0x20
1291 // je L1
1292 // use(t2)
1293 // t2 = t3
1294 // t3 = 0
1295 // L1:
1296 // a.lo = t2
1297 // a.hi = t3
1298 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1299 Constant *BitTest = Ctx->getConstantInt32(0x20);
1300 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto921856d2015-07-07 11:56:26 -07001301 typename Traits::Insts::Label *Label =
1302 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001303 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001304 _mov(T_2, Src0Lo);
1305 _mov(T_3, Src0Hi);
1306 _shrd(T_2, T_3, T_1);
1307 _shr(T_3, T_1);
1308 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001309 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001310 // T_2 and T_3 are being assigned again because of the
1311 // intra-block control flow, so we need the _mov_nonkillable
1312 // variant to avoid liveness problems.
1313 _mov_nonkillable(T_2, T_3);
1314 _mov_nonkillable(T_3, Zero);
1315 Context.insert(Label);
1316 _mov(DestLo, T_2);
1317 _mov(DestHi, T_3);
1318 } break;
1319 case InstArithmetic::Ashr: {
1320 // a=b>>c (signed) ==>
1321 // t1:ecx = c.lo & 0xff
1322 // t2 = b.lo
1323 // t3 = b.hi
1324 // t2 = shrd t2, t3, t1
1325 // t3 = sar t3, t1
1326 // test t1, 0x20
1327 // je L1
1328 // use(t2)
1329 // t2 = t3
1330 // t3 = sar t3, 0x1f
1331 // L1:
1332 // a.lo = t2
1333 // a.hi = t3
1334 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1335 Constant *BitTest = Ctx->getConstantInt32(0x20);
1336 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
John Porto921856d2015-07-07 11:56:26 -07001337 typename Traits::Insts::Label *Label =
1338 Traits::Insts::Label::create(Func, this);
John Porto5d0acff2015-06-30 15:29:21 -07001339 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001340 _mov(T_2, Src0Lo);
1341 _mov(T_3, Src0Hi);
1342 _shrd(T_2, T_3, T_1);
1343 _sar(T_3, T_1);
1344 _test(T_1, BitTest);
John Porto5d0acff2015-06-30 15:29:21 -07001345 _br(Traits::Cond::Br_e, Label);
John Porto7e93c622015-06-23 10:58:57 -07001346 // T_2 and T_3 are being assigned again because of the
1347 // intra-block control flow, so T_2 needs the _mov_nonkillable
1348 // variant to avoid liveness problems. T_3 doesn't need special
1349 // treatment because it is reassigned via _sar instead of _mov.
1350 _mov_nonkillable(T_2, T_3);
1351 _sar(T_3, SignExtend);
1352 Context.insert(Label);
1353 _mov(DestLo, T_2);
1354 _mov(DestHi, T_3);
1355 } break;
1356 case InstArithmetic::Fadd:
1357 case InstArithmetic::Fsub:
1358 case InstArithmetic::Fmul:
1359 case InstArithmetic::Fdiv:
1360 case InstArithmetic::Frem:
1361 llvm_unreachable("FP instruction with i64 type");
1362 break;
1363 case InstArithmetic::Udiv:
1364 case InstArithmetic::Sdiv:
1365 case InstArithmetic::Urem:
1366 case InstArithmetic::Srem:
1367 llvm_unreachable("Call-helper-involved instruction for i64 type \
1368 should have already been handled before");
1369 break;
1370 }
1371 return;
1372 }
1373 if (isVectorType(Dest->getType())) {
1374 // TODO: Trap on integer divide and integer modulo by zero.
1375 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto921856d2015-07-07 11:56:26 -07001376 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001377 Src1 = legalizeToReg(Src1);
John Porto7e93c622015-06-23 10:58:57 -07001378 switch (Inst->getOp()) {
1379 case InstArithmetic::_num:
1380 llvm_unreachable("Unknown arithmetic operator");
1381 break;
1382 case InstArithmetic::Add: {
1383 Variable *T = makeReg(Dest->getType());
1384 _movp(T, Src0);
1385 _padd(T, Src1);
1386 _movp(Dest, T);
1387 } break;
1388 case InstArithmetic::And: {
1389 Variable *T = makeReg(Dest->getType());
1390 _movp(T, Src0);
1391 _pand(T, Src1);
1392 _movp(Dest, T);
1393 } break;
1394 case InstArithmetic::Or: {
1395 Variable *T = makeReg(Dest->getType());
1396 _movp(T, Src0);
1397 _por(T, Src1);
1398 _movp(Dest, T);
1399 } break;
1400 case InstArithmetic::Xor: {
1401 Variable *T = makeReg(Dest->getType());
1402 _movp(T, Src0);
1403 _pxor(T, Src1);
1404 _movp(Dest, T);
1405 } break;
1406 case InstArithmetic::Sub: {
1407 Variable *T = makeReg(Dest->getType());
1408 _movp(T, Src0);
1409 _psub(T, Src1);
1410 _movp(Dest, T);
1411 } break;
1412 case InstArithmetic::Mul: {
1413 bool TypesAreValidForPmull =
1414 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1415 bool InstructionSetIsValidForPmull =
John Porto5d0acff2015-06-30 15:29:21 -07001416 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07001417 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1418 Variable *T = makeReg(Dest->getType());
1419 _movp(T, Src0);
1420 _pmull(T, Src1);
1421 _movp(Dest, T);
1422 } else if (Dest->getType() == IceType_v4i32) {
1423 // Lowering sequence:
1424 // Note: The mask arguments have index 0 on the left.
1425 //
1426 // movups T1, Src0
1427 // pshufd T2, Src0, {1,0,3,0}
1428 // pshufd T3, Src1, {1,0,3,0}
1429 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1430 // pmuludq T1, Src1
1431 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1432 // pmuludq T2, T3
1433 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1434 // shufps T1, T2, {0,2,0,2}
1435 // pshufd T4, T1, {0,2,1,3}
1436 // movups Dest, T4
1437
1438 // Mask that directs pshufd to create a vector with entries
1439 // Src[1, 0, 3, 0]
1440 const unsigned Constant1030 = 0x31;
1441 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1442 // Mask that directs shufps to create a vector with entries
1443 // Dest[0, 2], Src[0, 2]
1444 const unsigned Mask0202 = 0x88;
1445 // Mask that directs pshufd to create a vector with entries
1446 // Src[0, 2, 1, 3]
1447 const unsigned Mask0213 = 0xd8;
1448 Variable *T1 = makeReg(IceType_v4i32);
1449 Variable *T2 = makeReg(IceType_v4i32);
1450 Variable *T3 = makeReg(IceType_v4i32);
1451 Variable *T4 = makeReg(IceType_v4i32);
1452 _movp(T1, Src0);
1453 _pshufd(T2, Src0, Mask1030);
1454 _pshufd(T3, Src1, Mask1030);
1455 _pmuludq(T1, Src1);
1456 _pmuludq(T2, T3);
1457 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1458 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1459 _movp(Dest, T4);
1460 } else {
1461 assert(Dest->getType() == IceType_v16i8);
1462 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1463 }
1464 } break;
1465 case InstArithmetic::Shl:
1466 case InstArithmetic::Lshr:
1467 case InstArithmetic::Ashr:
1468 case InstArithmetic::Udiv:
1469 case InstArithmetic::Urem:
1470 case InstArithmetic::Sdiv:
1471 case InstArithmetic::Srem:
1472 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1473 break;
1474 case InstArithmetic::Fadd: {
1475 Variable *T = makeReg(Dest->getType());
1476 _movp(T, Src0);
1477 _addps(T, Src1);
1478 _movp(Dest, T);
1479 } break;
1480 case InstArithmetic::Fsub: {
1481 Variable *T = makeReg(Dest->getType());
1482 _movp(T, Src0);
1483 _subps(T, Src1);
1484 _movp(Dest, T);
1485 } break;
1486 case InstArithmetic::Fmul: {
1487 Variable *T = makeReg(Dest->getType());
1488 _movp(T, Src0);
1489 _mulps(T, Src1);
1490 _movp(Dest, T);
1491 } break;
1492 case InstArithmetic::Fdiv: {
1493 Variable *T = makeReg(Dest->getType());
1494 _movp(T, Src0);
1495 _divps(T, Src1);
1496 _movp(Dest, T);
1497 } break;
1498 case InstArithmetic::Frem:
1499 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1500 break;
1501 }
1502 return;
1503 }
1504 Variable *T_edx = nullptr;
1505 Variable *T = nullptr;
1506 switch (Inst->getOp()) {
1507 case InstArithmetic::_num:
1508 llvm_unreachable("Unknown arithmetic operator");
1509 break;
1510 case InstArithmetic::Add:
1511 _mov(T, Src0);
1512 _add(T, Src1);
1513 _mov(Dest, T);
1514 break;
1515 case InstArithmetic::And:
1516 _mov(T, Src0);
1517 _and(T, Src1);
1518 _mov(Dest, T);
1519 break;
1520 case InstArithmetic::Or:
1521 _mov(T, Src0);
1522 _or(T, Src1);
1523 _mov(Dest, T);
1524 break;
1525 case InstArithmetic::Xor:
1526 _mov(T, Src0);
1527 _xor(T, Src1);
1528 _mov(Dest, T);
1529 break;
1530 case InstArithmetic::Sub:
1531 _mov(T, Src0);
1532 _sub(T, Src1);
1533 _mov(Dest, T);
1534 break;
1535 case InstArithmetic::Mul:
1536 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1537 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1538 return;
1539 }
1540 // The 8-bit version of imul only allows the form "imul r/m8"
1541 // where T must be in eax.
1542 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001543 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001544 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1545 } else {
1546 _mov(T, Src0);
1547 }
1548 _imul(T, Src1);
1549 _mov(Dest, T);
1550 break;
1551 case InstArithmetic::Shl:
1552 _mov(T, Src0);
Jim Stichnothc5c89572015-08-04 15:26:48 -07001553 if (!llvm::isa<ConstantInteger32>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001554 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001555 _shl(T, Src1);
1556 _mov(Dest, T);
1557 break;
1558 case InstArithmetic::Lshr:
1559 _mov(T, Src0);
Jim Stichnothc5c89572015-08-04 15:26:48 -07001560 if (!llvm::isa<ConstantInteger32>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001561 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001562 _shr(T, Src1);
1563 _mov(Dest, T);
1564 break;
1565 case InstArithmetic::Ashr:
1566 _mov(T, Src0);
Jim Stichnothc5c89572015-08-04 15:26:48 -07001567 if (!llvm::isa<ConstantInteger32>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001568 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
John Porto7e93c622015-06-23 10:58:57 -07001569 _sar(T, Src1);
1570 _mov(Dest, T);
1571 break;
1572 case InstArithmetic::Udiv:
1573 // div and idiv are the few arithmetic operators that do not allow
1574 // immediates as the operand.
1575 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1576 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001577 // For 8-bit unsigned division we need to zero-extend al into ah. A mov
1578 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
1579 // assembler refuses to encode %ah (encoding %spl with a REX prefix
1580 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
1581 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
1582 // d[%lh], which means the X86 target lowering (and the register
1583 // allocator) would have to be aware of this restriction. For now, we
1584 // simply zero %eax completely, and move the dividend into %al.
1585 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1586 Context.insert(InstFakeDef::create(Func, T_eax));
1587 _xor(T_eax, T_eax);
John Porto5d0acff2015-06-30 15:29:21 -07001588 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001589 _div(T, Src1, T);
John Porto7e93c622015-06-23 10:58:57 -07001590 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001591 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001592 } else {
1593 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto5d0acff2015-06-30 15:29:21 -07001594 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1595 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001596 _div(T, Src1, T_edx);
1597 _mov(Dest, T);
1598 }
1599 break;
1600 case InstArithmetic::Sdiv:
1601 // TODO(stichnot): Enable this after doing better performance
1602 // and cross testing.
1603 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1604 // Optimize division by constant power of 2, but not for Om1
1605 // or O0, just to keep things simple there.
1606 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1607 int32_t Divisor = C->getValue();
1608 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1609 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1610 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1611 Type Ty = Dest->getType();
1612 // LLVM does the following for dest=src/(1<<log):
1613 // t=src
1614 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1615 // shr t,typewidth-log
1616 // add t,src
1617 // sar t,log
1618 // dest=t
1619 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1620 _mov(T, Src0);
1621 // If for some reason we are dividing by 1, just treat it
1622 // like an assignment.
1623 if (LogDiv > 0) {
1624 // The initial sar is unnecessary when dividing by 2.
1625 if (LogDiv > 1)
1626 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1627 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1628 _add(T, Src0);
1629 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1630 }
1631 _mov(Dest, T);
1632 return;
1633 }
1634 }
1635 }
1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1637 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001638 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001639 _cbwdq(T, T);
1640 _idiv(T, Src1, T);
1641 _mov(Dest, T);
1642 } else {
John Porto5d0acff2015-06-30 15:29:21 -07001643 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
1644 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001645 _cbwdq(T_edx, T);
1646 _idiv(T, Src1, T_edx);
1647 _mov(Dest, T);
1648 }
1649 break;
1650 case InstArithmetic::Urem:
1651 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1652 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001653 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1654 Context.insert(InstFakeDef::create(Func, T_eax));
1655 _xor(T_eax, T_eax);
John Porto5d0acff2015-06-30 15:29:21 -07001656 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto59f2d922015-07-31 13:45:48 -07001657 _div(T, Src1, T);
John Porto448c16f2015-07-28 16:56:29 -07001658 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1659 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1660 // this ever becomes a problem we can introduce a pseudo rem instruction
1661 // that returns the remainder in %al directly (and uses a mov for copying
1662 // %ah to %al.)
1663 static constexpr uint8_t AlSizeInBits = 8;
1664 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
John Porto59f2d922015-07-31 13:45:48 -07001665 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001666 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001667 } else {
1668 Constant *Zero = Ctx->getConstantZero(IceType_i32);
John Porto1d235422015-08-12 12:37:53 -07001669 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1670 _mov(T_edx, Zero);
John Porto5d0acff2015-06-30 15:29:21 -07001671 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001672 _div(T_edx, Src1, T);
1673 _mov(Dest, T_edx);
1674 }
1675 break;
1676 case InstArithmetic::Srem:
1677 // TODO(stichnot): Enable this after doing better performance
1678 // and cross testing.
1679 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1680 // Optimize mod by constant power of 2, but not for Om1 or O0,
1681 // just to keep things simple there.
1682 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1683 int32_t Divisor = C->getValue();
1684 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1685 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1686 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1687 Type Ty = Dest->getType();
1688 // LLVM does the following for dest=src%(1<<log):
1689 // t=src
1690 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1691 // shr t,typewidth-log
1692 // add t,src
1693 // and t, -(1<<log)
1694 // sub t,src
1695 // neg t
1696 // dest=t
1697 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1698 // If for some reason we are dividing by 1, just assign 0.
1699 if (LogDiv == 0) {
1700 _mov(Dest, Ctx->getConstantZero(Ty));
1701 return;
1702 }
1703 _mov(T, Src0);
1704 // The initial sar is unnecessary when dividing by 2.
1705 if (LogDiv > 1)
1706 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1707 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1708 _add(T, Src0);
1709 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1710 _sub(T, Src0);
1711 _neg(T);
1712 _mov(Dest, T);
1713 return;
1714 }
1715 }
1716 }
1717 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1718 if (isByteSizedArithType(Dest->getType())) {
John Porto5d0acff2015-06-30 15:29:21 -07001719 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto448c16f2015-07-28 16:56:29 -07001720 // T is %al.
John Porto7e93c622015-06-23 10:58:57 -07001721 _cbwdq(T, T);
John Porto448c16f2015-07-28 16:56:29 -07001722 _idiv(T, Src1, T);
1723 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1724 Context.insert(InstFakeDef::create(Func, T_eax));
1725 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1726 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1727 // this ever becomes a problem we can introduce a pseudo rem instruction
1728 // that returns the remainder in %al directly (and uses a mov for copying
1729 // %ah to %al.)
1730 static constexpr uint8_t AlSizeInBits = 8;
1731 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1732 _mov(Dest, T);
1733 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001734 } else {
John Porto1d235422015-08-12 12:37:53 -07001735 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
John Porto5d0acff2015-06-30 15:29:21 -07001736 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001737 _cbwdq(T_edx, T);
1738 _idiv(T_edx, Src1, T);
1739 _mov(Dest, T_edx);
1740 }
1741 break;
1742 case InstArithmetic::Fadd:
1743 _mov(T, Src0);
1744 _addss(T, Src1);
1745 _mov(Dest, T);
1746 break;
1747 case InstArithmetic::Fsub:
1748 _mov(T, Src0);
1749 _subss(T, Src1);
1750 _mov(Dest, T);
1751 break;
1752 case InstArithmetic::Fmul:
1753 _mov(T, Src0);
1754 _mulss(T, Src1);
1755 _mov(Dest, T);
1756 break;
1757 case InstArithmetic::Fdiv:
1758 _mov(T, Src0);
1759 _divss(T, Src1);
1760 _mov(Dest, T);
1761 break;
1762 case InstArithmetic::Frem: {
1763 const SizeT MaxSrcs = 2;
1764 Type Ty = Dest->getType();
1765 InstCall *Call = makeHelperCall(
1766 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1767 Call->addArg(Src0);
1768 Call->addArg(Src1);
1769 return lowerCall(Call);
1770 }
1771 }
1772}
1773
1774template <class Machine>
1775void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1776 Variable *Dest = Inst->getDest();
1777 Operand *Src0 = Inst->getSrc(0);
1778 assert(Dest->getType() == Src0->getType());
John Porto1d235422015-08-12 12:37:53 -07001779 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07001780 Src0 = legalize(Src0);
1781 Operand *Src0Lo = loOperand(Src0);
1782 Operand *Src0Hi = hiOperand(Src0);
1783 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1784 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1785 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1786 _mov(T_Lo, Src0Lo);
1787 _mov(DestLo, T_Lo);
1788 _mov(T_Hi, Src0Hi);
1789 _mov(DestHi, T_Hi);
1790 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001791 Operand *Src0Legal;
John Porto7e93c622015-06-23 10:58:57 -07001792 if (Dest->hasReg()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001793 // If Dest already has a physical register, then only basic legalization
1794 // is needed, as the source operand can be a register, immediate, or
1795 // memory.
1796 Src0Legal = legalize(Src0);
John Porto7e93c622015-06-23 10:58:57 -07001797 } else {
1798 // If Dest could be a stack operand, then RI must be a physical
1799 // register or a scalar integer immediate.
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001800 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
John Porto7e93c622015-06-23 10:58:57 -07001801 }
1802 if (isVectorType(Dest->getType()))
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001803 _movp(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07001804 else
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001805 _mov(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07001806 }
1807}
1808
1809template <class Machine>
1810void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
1811 if (Inst->isUnconditional()) {
1812 _br(Inst->getTargetUnconditional());
1813 return;
1814 }
1815 Operand *Cond = Inst->getCondition();
1816
1817 // Handle folding opportunities.
1818 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1819 assert(Producer->isDeleted());
1820 switch (BoolFolding::getProducerKind(Producer)) {
1821 default:
1822 break;
1823 case BoolFolding::PK_Icmp32: {
1824 // TODO(stichnot): Refactor similarities between this block and
1825 // the corresponding code in lowerIcmp().
1826 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
1827 Operand *Src0 = Producer->getSrc(0);
1828 Operand *Src1 = legalize(Producer->getSrc(1));
1829 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
1830 _cmp(Src0RM, Src1);
1831 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
1832 Inst->getTargetFalse());
1833 return;
1834 }
1835 }
1836 }
1837
1838 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1839 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1840 _cmp(Src0, Zero);
John Porto5d0acff2015-06-30 15:29:21 -07001841 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07001842}
1843
1844template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07001845void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1846 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1847 InstCast::OpKind CastKind = Inst->getCastKind();
1848 Variable *Dest = Inst->getDest();
1849 switch (CastKind) {
1850 default:
1851 Func->setError("Cast type not supported");
1852 return;
1853 case InstCast::Sext: {
1854 // Src0RM is the source operand legalized to physical register or memory,
1855 // but not immediate, since the relevant x86 native instructions don't
1856 // allow an immediate operand. If the operand is an immediate, we could
1857 // consider computing the strength-reduced result at translation time,
1858 // but we're unlikely to see something like that in the bitcode that
1859 // the optimizer wouldn't have already taken care of.
1860 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1861 if (isVectorType(Dest->getType())) {
1862 Type DestTy = Dest->getType();
1863 if (DestTy == IceType_v16i8) {
1864 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1865 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1866 Variable *T = makeReg(DestTy);
1867 _movp(T, Src0RM);
1868 _pand(T, OneMask);
1869 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1870 _pcmpgt(T, Zeros);
1871 _movp(Dest, T);
1872 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07001873 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07001874 SizeT ShiftAmount =
1875 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
1876 1;
1877 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1878 Variable *T = makeReg(DestTy);
1879 _movp(T, Src0RM);
1880 _psll(T, ShiftConstant);
1881 _psra(T, ShiftConstant);
1882 _movp(Dest, T);
1883 }
John Porto1d235422015-08-12 12:37:53 -07001884 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07001885 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1886 Constant *Shift = Ctx->getConstantInt32(31);
1887 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1888 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1889 Variable *T_Lo = makeReg(DestLo->getType());
1890 if (Src0RM->getType() == IceType_i32) {
1891 _mov(T_Lo, Src0RM);
1892 } else if (Src0RM->getType() == IceType_i1) {
1893 _movzx(T_Lo, Src0RM);
1894 _shl(T_Lo, Shift);
1895 _sar(T_Lo, Shift);
1896 } else {
1897 _movsx(T_Lo, Src0RM);
1898 }
1899 _mov(DestLo, T_Lo);
1900 Variable *T_Hi = nullptr;
1901 _mov(T_Hi, T_Lo);
1902 if (Src0RM->getType() != IceType_i1)
1903 // For i1, the sar instruction is already done above.
1904 _sar(T_Hi, Shift);
1905 _mov(DestHi, T_Hi);
1906 } else if (Src0RM->getType() == IceType_i1) {
1907 // t1 = src
1908 // shl t1, dst_bitwidth - 1
1909 // sar t1, dst_bitwidth - 1
1910 // dst = t1
1911 size_t DestBits =
1912 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
1913 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
1914 Variable *T = makeReg(Dest->getType());
1915 if (typeWidthInBytes(Dest->getType()) <=
1916 typeWidthInBytes(Src0RM->getType())) {
1917 _mov(T, Src0RM);
1918 } else {
1919 // Widen the source using movsx or movzx. (It doesn't matter
1920 // which one, since the following shl/sar overwrite the bits.)
1921 _movzx(T, Src0RM);
1922 }
1923 _shl(T, ShiftAmount);
1924 _sar(T, ShiftAmount);
1925 _mov(Dest, T);
1926 } else {
1927 // t1 = movsx src; dst = t1
1928 Variable *T = makeReg(Dest->getType());
1929 _movsx(T, Src0RM);
1930 _mov(Dest, T);
1931 }
1932 break;
1933 }
1934 case InstCast::Zext: {
1935 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1936 if (isVectorType(Dest->getType())) {
1937 // onemask = materialize(1,1,...); dest = onemask & src
1938 Type DestTy = Dest->getType();
1939 Variable *OneMask = makeVectorOfOnes(DestTy);
1940 Variable *T = makeReg(DestTy);
1941 _movp(T, Src0RM);
1942 _pand(T, OneMask);
1943 _movp(Dest, T);
John Porto1d235422015-08-12 12:37:53 -07001944 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07001945 // t1=movzx src; dst.lo=t1; dst.hi=0
1946 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1947 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1948 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1949 Variable *Tmp = makeReg(DestLo->getType());
1950 if (Src0RM->getType() == IceType_i32) {
1951 _mov(Tmp, Src0RM);
1952 } else {
1953 _movzx(Tmp, Src0RM);
1954 }
1955 if (Src0RM->getType() == IceType_i1) {
1956 Constant *One = Ctx->getConstantInt32(1);
1957 _and(Tmp, One);
1958 }
1959 _mov(DestLo, Tmp);
1960 _mov(DestHi, Zero);
1961 } else if (Src0RM->getType() == IceType_i1) {
1962 // t = Src0RM; t &= 1; Dest = t
1963 Constant *One = Ctx->getConstantInt32(1);
1964 Type DestTy = Dest->getType();
John Porto1d235422015-08-12 12:37:53 -07001965 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001966 if (DestTy == IceType_i8) {
John Porto7e93c622015-06-23 10:58:57 -07001967 _mov(T, Src0RM);
1968 } else {
John Porto1d235422015-08-12 12:37:53 -07001969 assert(DestTy != IceType_i1);
1970 assert(Traits::Is64Bit || DestTy != IceType_i64);
John Porto7e93c622015-06-23 10:58:57 -07001971 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
John Porto1d235422015-08-12 12:37:53 -07001972 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
1973 // written to the stack (i.e., in -Om1) will be fully zero-extended.
1974 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07001975 _movzx(T, Src0RM);
1976 }
1977 _and(T, One);
1978 _mov(Dest, T);
1979 } else {
1980 // t1 = movzx src; dst = t1
1981 Variable *T = makeReg(Dest->getType());
1982 _movzx(T, Src0RM);
1983 _mov(Dest, T);
1984 }
1985 break;
1986 }
1987 case InstCast::Trunc: {
1988 if (isVectorType(Dest->getType())) {
1989 // onemask = materialize(1,1,...); dst = src & onemask
1990 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1991 Type Src0Ty = Src0RM->getType();
1992 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1993 Variable *T = makeReg(Dest->getType());
1994 _movp(T, Src0RM);
1995 _pand(T, OneMask);
1996 _movp(Dest, T);
1997 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07001998 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
John Porto1d235422015-08-12 12:37:53 -07001999 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -07002000 Src0 = loOperand(Src0);
2001 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2002 // t1 = trunc Src0RM; Dest = t1
2003 Variable *T = nullptr;
2004 _mov(T, Src0RM);
2005 if (Dest->getType() == IceType_i1)
2006 _and(T, Ctx->getConstantInt1(1));
2007 _mov(Dest, T);
2008 }
2009 break;
2010 }
2011 case InstCast::Fptrunc:
2012 case InstCast::Fpext: {
2013 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2014 // t1 = cvt Src0RM; Dest = t1
2015 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002016 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002017 _mov(Dest, T);
2018 break;
2019 }
2020 case InstCast::Fptosi:
2021 if (isVectorType(Dest->getType())) {
2022 assert(Dest->getType() == IceType_v4i32 &&
2023 Inst->getSrc(0)->getType() == IceType_v4f32);
2024 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002025 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002026 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002027 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002028 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002029 _movp(Dest, T);
John Porto1d235422015-08-12 12:37:53 -07002030 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002031 // Use a helper for converting floating-point values to 64-bit
2032 // integers. SSE2 appears to have no way to convert from xmm
2033 // registers to something like the edx:eax register pair, and
2034 // gcc and clang both want to use x87 instructions complete with
2035 // temporary manipulation of the status word. This helper is
2036 // not needed for x86-64.
2037 split64(Dest);
2038 const SizeT MaxSrcs = 1;
2039 Type SrcType = Inst->getSrc(0)->getType();
2040 InstCall *Call =
2041 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2042 : H_fptosi_f64_i64,
2043 Dest, MaxSrcs);
2044 Call->addArg(Inst->getSrc(0));
2045 lowerCall(Call);
2046 } else {
2047 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2048 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002049 Variable *T_1 = nullptr;
2050 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
2051 T_1 = makeReg(IceType_i64);
2052 } else {
2053 assert(Dest->getType() != IceType_i64);
2054 T_1 = makeReg(IceType_i32);
2055 }
2056 // cvt() requires its integer argument to be a GPR.
2057 T_1->setWeightInfinite();
John Porto7e93c622015-06-23 10:58:57 -07002058 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002059 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002060 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2061 if (Dest->getType() == IceType_i1)
2062 _and(T_2, Ctx->getConstantInt1(1));
2063 _mov(Dest, T_2);
2064 }
2065 break;
2066 case InstCast::Fptoui:
2067 if (isVectorType(Dest->getType())) {
2068 assert(Dest->getType() == IceType_v4i32 &&
2069 Inst->getSrc(0)->getType() == IceType_v4f32);
2070 const SizeT MaxSrcs = 1;
2071 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2072 Call->addArg(Inst->getSrc(0));
2073 lowerCall(Call);
2074 } else if (Dest->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07002075 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
John Porto7e93c622015-06-23 10:58:57 -07002076 // Use a helper for both x86-32 and x86-64.
John Porto1d235422015-08-12 12:37:53 -07002077 if (!Traits::Is64Bit)
2078 split64(Dest);
John Porto7e93c622015-06-23 10:58:57 -07002079 const SizeT MaxSrcs = 1;
2080 Type DestType = Dest->getType();
2081 Type SrcType = Inst->getSrc(0)->getType();
2082 IceString TargetString;
John Porto1d235422015-08-12 12:37:53 -07002083 if (Traits::Is64Bit) {
2084 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2085 : H_fptoui_f64_i64;
2086 } else if (isInt32Asserting32Or64(DestType)) {
John Porto7e93c622015-06-23 10:58:57 -07002087 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2088 : H_fptoui_f64_i32;
2089 } else {
2090 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2091 : H_fptoui_f64_i64;
2092 }
2093 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2094 Call->addArg(Inst->getSrc(0));
2095 lowerCall(Call);
2096 return;
2097 } else {
2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2099 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002100 assert(Dest->getType() != IceType_i64);
2101 Variable *T_1 = nullptr;
2102 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
2103 T_1 = makeReg(IceType_i64);
2104 } else {
2105 assert(Dest->getType() != IceType_i32);
2106 T_1 = makeReg(IceType_i32);
2107 }
2108 T_1->setWeightInfinite();
John Porto7e93c622015-06-23 10:58:57 -07002109 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002110 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002111 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2112 if (Dest->getType() == IceType_i1)
2113 _and(T_2, Ctx->getConstantInt1(1));
2114 _mov(Dest, T_2);
2115 }
2116 break;
2117 case InstCast::Sitofp:
2118 if (isVectorType(Dest->getType())) {
2119 assert(Dest->getType() == IceType_v4f32 &&
2120 Inst->getSrc(0)->getType() == IceType_v4i32);
2121 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002122 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002123 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002124 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002125 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07002126 _movp(Dest, T);
John Porto1d235422015-08-12 12:37:53 -07002127 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002128 // Use a helper for x86-32.
2129 const SizeT MaxSrcs = 1;
2130 Type DestType = Dest->getType();
2131 InstCall *Call =
2132 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2133 : H_sitofp_i64_f64,
2134 Dest, MaxSrcs);
2135 // TODO: Call the correct compiler-rt helper function.
2136 Call->addArg(Inst->getSrc(0));
2137 lowerCall(Call);
2138 return;
2139 } else {
2140 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2141 // Sign-extend the operand.
2142 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07002143 Variable *T_1 = nullptr;
2144 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2145 T_1 = makeReg(IceType_i64);
2146 } else {
2147 assert(Src0RM->getType() != IceType_i64);
2148 T_1 = makeReg(IceType_i32);
2149 }
2150 T_1->setWeightInfinite();
John Porto7e93c622015-06-23 10:58:57 -07002151 Variable *T_2 = makeReg(Dest->getType());
John Porto1d235422015-08-12 12:37:53 -07002152 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07002153 _mov(T_1, Src0RM);
2154 else
2155 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002157 _mov(Dest, T_2);
2158 }
2159 break;
2160 case InstCast::Uitofp: {
2161 Operand *Src0 = Inst->getSrc(0);
2162 if (isVectorType(Src0->getType())) {
2163 assert(Dest->getType() == IceType_v4f32 &&
2164 Src0->getType() == IceType_v4i32);
2165 const SizeT MaxSrcs = 1;
2166 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2167 Call->addArg(Src0);
2168 lowerCall(Call);
2169 } else if (Src0->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07002170 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
John Porto7e93c622015-06-23 10:58:57 -07002171 // Use a helper for x86-32 and x86-64. Also use a helper for
2172 // i32 on x86-32.
2173 const SizeT MaxSrcs = 1;
2174 Type DestType = Dest->getType();
2175 IceString TargetString;
2176 if (isInt32Asserting32Or64(Src0->getType())) {
2177 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2178 : H_uitofp_i32_f64;
2179 } else {
2180 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2181 : H_uitofp_i64_f64;
2182 }
2183 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2184 Call->addArg(Src0);
2185 lowerCall(Call);
2186 return;
2187 } else {
2188 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2189 // Zero-extend the operand.
2190 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07002191 Variable *T_1 = nullptr;
2192 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2193 T_1 = makeReg(IceType_i64);
2194 } else {
2195 assert(Src0RM->getType() != IceType_i64);
2196 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2197 T_1 = makeReg(IceType_i32);
2198 }
2199 T_1->setWeightInfinite();
John Porto7e93c622015-06-23 10:58:57 -07002200 Variable *T_2 = makeReg(Dest->getType());
John Porto1d235422015-08-12 12:37:53 -07002201 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07002202 _mov(T_1, Src0RM);
2203 else
2204 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002205 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002206 _mov(Dest, T_2);
2207 }
2208 break;
2209 }
2210 case InstCast::Bitcast: {
2211 Operand *Src0 = Inst->getSrc(0);
2212 if (Dest->getType() == Src0->getType()) {
2213 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2214 lowerAssign(Assign);
2215 return;
2216 }
2217 switch (Dest->getType()) {
2218 default:
2219 llvm_unreachable("Unexpected Bitcast dest type");
2220 case IceType_i8: {
2221 assert(Src0->getType() == IceType_v8i1);
2222 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2223 Call->addArg(Src0);
2224 lowerCall(Call);
2225 } break;
2226 case IceType_i16: {
2227 assert(Src0->getType() == IceType_v16i1);
2228 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2229 Call->addArg(Src0);
2230 lowerCall(Call);
2231 } break;
2232 case IceType_i32:
2233 case IceType_f32: {
2234 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2235 Type DestType = Dest->getType();
2236 Type SrcType = Src0RM->getType();
2237 (void)DestType;
2238 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2239 (DestType == IceType_f32 && SrcType == IceType_i32));
2240 // a.i32 = bitcast b.f32 ==>
2241 // t.f32 = b.f32
2242 // s.f32 = spill t.f32
2243 // a.i32 = s.f32
2244 Variable *T = nullptr;
2245 // TODO: Should be able to force a spill setup by calling legalize() with
2246 // Legal_Mem and not Legal_Reg or Legal_Imm.
John Porto921856d2015-07-07 11:56:26 -07002247 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002248 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
John Porto7e93c622015-06-23 10:58:57 -07002249 SpillVar->setLinkedTo(Dest);
2250 Variable *Spill = SpillVar;
2251 Spill->setWeight(RegWeight::Zero);
2252 _mov(T, Src0RM);
2253 _mov(Spill, T);
2254 _mov(Dest, Spill);
2255 } break;
2256 case IceType_i64: {
John Porto1d235422015-08-12 12:37:53 -07002257 assert(Src0->getType() == IceType_f64);
2258 if (Traits::Is64Bit) {
2259 // Movd requires its fp argument (in this case, the bitcast source) to
2260 // be an xmm register.
2261 Variable *Src0R = legalizeToReg(Src0);
2262 Variable *T = makeReg(IceType_i64);
2263 _movd(T, Src0R);
2264 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002265 } else {
John Porto1d235422015-08-12 12:37:53 -07002266 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2267 // a.i64 = bitcast b.f64 ==>
2268 // s.f64 = spill b.f64
2269 // t_lo.i32 = lo(s.f64)
2270 // a_lo.i32 = t_lo.i32
2271 // t_hi.i32 = hi(s.f64)
2272 // a_hi.i32 = t_hi.i32
2273 Operand *SpillLo, *SpillHi;
2274 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2275 typename Traits::SpillVariable *SpillVar =
2276 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2277 SpillVar->setLinkedTo(Src0Var);
2278 Variable *Spill = SpillVar;
2279 Spill->setWeight(RegWeight::Zero);
2280 _movq(Spill, Src0RM);
2281 SpillLo = Traits::VariableSplit::create(Func, Spill,
2282 Traits::VariableSplit::Low);
2283 SpillHi = Traits::VariableSplit::create(Func, Spill,
2284 Traits::VariableSplit::High);
2285 } else {
2286 SpillLo = loOperand(Src0RM);
2287 SpillHi = hiOperand(Src0RM);
2288 }
2289
2290 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2291 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2292 Variable *T_Lo = makeReg(IceType_i32);
2293 Variable *T_Hi = makeReg(IceType_i32);
2294
2295 _mov(T_Lo, SpillLo);
2296 _mov(DestLo, T_Lo);
2297 _mov(T_Hi, SpillHi);
2298 _mov(DestHi, T_Hi);
John Porto7e93c622015-06-23 10:58:57 -07002299 }
John Porto7e93c622015-06-23 10:58:57 -07002300 } break;
2301 case IceType_f64: {
John Porto7e93c622015-06-23 10:58:57 -07002302 assert(Src0->getType() == IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002303 if (Traits::Is64Bit) {
2304 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2305 Variable *T = makeReg(IceType_f64);
2306 // Movd requires its fp argument (in this case, the bitcast destination)
2307 // to be an xmm register.
2308 T->setWeightInfinite();
2309 _movd(T, Src0RM);
2310 _mov(Dest, T);
2311 } else {
2312 Src0 = legalize(Src0);
2313 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2314 Variable *T = Func->makeVariable(Dest->getType());
2315 _movq(T, Src0);
2316 _movq(Dest, T);
2317 break;
2318 }
2319 // a.f64 = bitcast b.i64 ==>
2320 // t_lo.i32 = b_lo.i32
2321 // FakeDef(s.f64)
2322 // lo(s.f64) = t_lo.i32
2323 // t_hi.i32 = b_hi.i32
2324 // hi(s.f64) = t_hi.i32
2325 // a.f64 = s.f64
2326 typename Traits::SpillVariable *SpillVar =
2327 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2328 SpillVar->setLinkedTo(Dest);
2329 Variable *Spill = SpillVar;
2330 Spill->setWeight(RegWeight::Zero);
John Porto7e93c622015-06-23 10:58:57 -07002331
John Porto1d235422015-08-12 12:37:53 -07002332 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2333 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2334 Func, Spill, Traits::VariableSplit::Low);
2335 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2336 Func, Spill, Traits::VariableSplit::High);
2337 _mov(T_Lo, loOperand(Src0));
2338 // Technically, the Spill is defined after the _store happens, but
2339 // SpillLo is considered a "use" of Spill so define Spill before it
2340 // is used.
2341 Context.insert(InstFakeDef::create(Func, Spill));
2342 _store(T_Lo, SpillLo);
2343 _mov(T_Hi, hiOperand(Src0));
2344 _store(T_Hi, SpillHi);
2345 _movq(Dest, Spill);
2346 }
John Porto7e93c622015-06-23 10:58:57 -07002347 } break;
2348 case IceType_v8i1: {
2349 assert(Src0->getType() == IceType_i8);
2350 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002351 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002352 // Arguments to functions are required to be at least 32 bits wide.
2353 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2354 Call->addArg(Src0AsI32);
2355 lowerCall(Call);
2356 } break;
2357 case IceType_v16i1: {
2358 assert(Src0->getType() == IceType_i16);
2359 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002360 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002361 // Arguments to functions are required to be at least 32 bits wide.
2362 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2363 Call->addArg(Src0AsI32);
2364 lowerCall(Call);
2365 } break;
2366 case IceType_v8i16:
2367 case IceType_v16i8:
2368 case IceType_v4i32:
2369 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07002370 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07002371 } break;
2372 }
2373 break;
2374 }
2375 }
2376}
2377
2378template <class Machine>
2379void TargetX86Base<Machine>::lowerExtractElement(
2380 const InstExtractElement *Inst) {
2381 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2382 ConstantInteger32 *ElementIndex =
2383 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2384 // Only constant indices are allowed in PNaCl IR.
2385 assert(ElementIndex);
2386
2387 unsigned Index = ElementIndex->getValue();
2388 Type Ty = SourceVectNotLegalized->getType();
2389 Type ElementTy = typeElementType(Ty);
2390 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2391 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2392
2393 // TODO(wala): Determine the best lowering sequences for each type.
2394 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07002395 InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07002396 if (CanUsePextr && Ty != IceType_v4f32) {
2397 // Use pextrb, pextrw, or pextrd.
2398 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07002399 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002400 _pextr(ExtractedElementR, SourceVectR, Mask);
2401 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2402 // Use pshufd and movd/movss.
2403 Variable *T = nullptr;
2404 if (Index) {
2405 // The shuffle only needs to occur if the element to be extracted
2406 // is not at the lowest index.
2407 Constant *Mask = Ctx->getConstantInt32(Index);
2408 T = makeReg(Ty);
2409 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2410 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002411 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002412 }
2413
2414 if (InVectorElementTy == IceType_i32) {
2415 _movd(ExtractedElementR, T);
2416 } else { // Ty == IceType_f32
2417 // TODO(wala): _movss is only used here because _mov does not
2418 // allow a vector source and a scalar destination. _mov should be
2419 // able to be used here.
2420 // _movss is a binary instruction, so the FakeDef is needed to
2421 // keep the live range analysis consistent.
2422 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2423 _movss(ExtractedElementR, T);
2424 }
2425 } else {
2426 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2427 // Spill the value to a stack slot and do the extraction in memory.
2428 //
2429 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2430 // support for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07002431 Variable *Slot = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002432 Slot->setWeight(RegWeight::Zero);
Andrew Scull97f460d2015-07-21 10:07:42 -07002433 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07002434
2435 // Compute the location of the element in memory.
2436 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07002437 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07002438 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2439 _mov(ExtractedElementR, Loc);
2440 }
2441
2442 if (ElementTy == IceType_i1) {
2443 // Truncate extracted integers to i1s if necessary.
2444 Variable *T = makeReg(IceType_i1);
2445 InstCast *Cast =
2446 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2447 lowerCast(Cast);
2448 ExtractedElementR = T;
2449 }
2450
2451 // Copy the element to the destination.
2452 Variable *Dest = Inst->getDest();
2453 _mov(Dest, ExtractedElementR);
2454}
2455
2456template <class Machine>
2457void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
2458 Operand *Src0 = Inst->getSrc(0);
2459 Operand *Src1 = Inst->getSrc(1);
2460 Variable *Dest = Inst->getDest();
2461
2462 if (isVectorType(Dest->getType())) {
2463 InstFcmp::FCond Condition = Inst->getCondition();
2464 size_t Index = static_cast<size_t>(Condition);
2465 assert(Index < Traits::TableFcmpSize);
2466
2467 if (Traits::TableFcmp[Index].SwapVectorOperands) {
2468 Operand *T = Src0;
2469 Src0 = Src1;
2470 Src1 = T;
2471 }
2472
2473 Variable *T = nullptr;
2474
2475 if (Condition == InstFcmp::True) {
2476 // makeVectorOfOnes() requires an integer vector type.
2477 T = makeVectorOfMinusOnes(IceType_v4i32);
2478 } else if (Condition == InstFcmp::False) {
2479 T = makeVectorOfZeros(Dest->getType());
2480 } else {
2481 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2482 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002483 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002484 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002485
2486 switch (Condition) {
2487 default: {
John Porto5d0acff2015-06-30 15:29:21 -07002488 typename Traits::Cond::CmppsCond Predicate =
2489 Traits::TableFcmp[Index].Predicate;
2490 assert(Predicate != Traits::Cond::Cmpps_Invalid);
John Porto7e93c622015-06-23 10:58:57 -07002491 T = makeReg(Src0RM->getType());
2492 _movp(T, Src0RM);
2493 _cmpps(T, Src1RM, Predicate);
2494 } break;
2495 case InstFcmp::One: {
2496 // Check both unequal and ordered.
2497 T = makeReg(Src0RM->getType());
2498 Variable *T2 = makeReg(Src0RM->getType());
2499 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002500 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
John Porto7e93c622015-06-23 10:58:57 -07002501 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002502 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
John Porto7e93c622015-06-23 10:58:57 -07002503 _pand(T, T2);
2504 } break;
2505 case InstFcmp::Ueq: {
2506 // Check both equal or unordered.
2507 T = makeReg(Src0RM->getType());
2508 Variable *T2 = makeReg(Src0RM->getType());
2509 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002510 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
John Porto7e93c622015-06-23 10:58:57 -07002511 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002512 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
John Porto7e93c622015-06-23 10:58:57 -07002513 _por(T, T2);
2514 } break;
2515 }
2516 }
2517
2518 _movp(Dest, T);
2519 eliminateNextVectorSextInstruction(Dest);
2520 return;
2521 }
2522
2523 // Lowering a = fcmp cond, b, c
2524 // ucomiss b, c /* only if C1 != Br_None */
2525 // /* but swap b,c order if SwapOperands==true */
2526 // mov a, <default>
2527 // j<C1> label /* only if C1 != Br_None */
2528 // j<C2> label /* only if C2 != Br_None */
2529 // FakeUse(a) /* only if C1 != Br_None */
2530 // mov a, !<default> /* only if C1 != Br_None */
2531 // label: /* only if C1 != Br_None */
2532 //
2533 // setcc lowering when C1 != Br_None && C2 == Br_None:
2534 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2535 // setcc a, C1
2536 InstFcmp::FCond Condition = Inst->getCondition();
2537 size_t Index = static_cast<size_t>(Condition);
2538 assert(Index < Traits::TableFcmpSize);
2539 if (Traits::TableFcmp[Index].SwapScalarOperands)
2540 std::swap(Src0, Src1);
John Porto5d0acff2015-06-30 15:29:21 -07002541 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2542 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07002543 if (HasC1) {
2544 Src0 = legalize(Src0);
2545 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2546 Variable *T = nullptr;
2547 _mov(T, Src0);
2548 _ucomiss(T, Src1RM);
2549 if (!HasC2) {
2550 assert(Traits::TableFcmp[Index].Default);
2551 _setcc(Dest, Traits::TableFcmp[Index].C1);
2552 return;
2553 }
2554 }
2555 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
2556 _mov(Dest, Default);
2557 if (HasC1) {
John Porto921856d2015-07-07 11:56:26 -07002558 typename Traits::Insts::Label *Label =
2559 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07002560 _br(Traits::TableFcmp[Index].C1, Label);
2561 if (HasC2) {
2562 _br(Traits::TableFcmp[Index].C2, Label);
2563 }
2564 Constant *NonDefault =
2565 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
2566 _mov_nonkillable(Dest, NonDefault);
2567 Context.insert(Label);
2568 }
2569}
2570
2571template <class Machine>
2572void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
2573 Operand *Src0 = legalize(Inst->getSrc(0));
2574 Operand *Src1 = legalize(Inst->getSrc(1));
2575 Variable *Dest = Inst->getDest();
2576
2577 if (isVectorType(Dest->getType())) {
2578 Type Ty = Src0->getType();
2579 // Promote i1 vectors to 128 bit integer vector types.
2580 if (typeElementType(Ty) == IceType_i1) {
2581 Type NewTy = IceType_NUM;
2582 switch (Ty) {
2583 default:
2584 llvm_unreachable("unexpected type");
2585 break;
2586 case IceType_v4i1:
2587 NewTy = IceType_v4i32;
2588 break;
2589 case IceType_v8i1:
2590 NewTy = IceType_v8i16;
2591 break;
2592 case IceType_v16i1:
2593 NewTy = IceType_v16i8;
2594 break;
2595 }
John Porto5aeed952015-07-21 13:39:09 -07002596 Variable *NewSrc0 = Func->makeVariable(NewTy);
2597 Variable *NewSrc1 = Func->makeVariable(NewTy);
John Porto7e93c622015-06-23 10:58:57 -07002598 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2599 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2600 Src0 = NewSrc0;
2601 Src1 = NewSrc1;
2602 Ty = NewTy;
2603 }
2604
2605 InstIcmp::ICond Condition = Inst->getCondition();
2606
2607 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2608 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2609
2610 // SSE2 only has signed comparison operations. Transform unsigned
2611 // inputs in a manner that allows for the use of signed comparison
2612 // operations by flipping the high order bits.
2613 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2614 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2615 Variable *T0 = makeReg(Ty);
2616 Variable *T1 = makeReg(Ty);
2617 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2618 _movp(T0, Src0RM);
2619 _pxor(T0, HighOrderBits);
2620 _movp(T1, Src1RM);
2621 _pxor(T1, HighOrderBits);
2622 Src0RM = T0;
2623 Src1RM = T1;
2624 }
2625
2626 Variable *T = makeReg(Ty);
2627 switch (Condition) {
2628 default:
2629 llvm_unreachable("unexpected condition");
2630 break;
2631 case InstIcmp::Eq: {
John Porto921856d2015-07-07 11:56:26 -07002632 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002633 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002634 _movp(T, Src0RM);
2635 _pcmpeq(T, Src1RM);
2636 } break;
2637 case InstIcmp::Ne: {
John Porto921856d2015-07-07 11:56:26 -07002638 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002639 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002640 _movp(T, Src0RM);
2641 _pcmpeq(T, Src1RM);
2642 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2643 _pxor(T, MinusOne);
2644 } break;
2645 case InstIcmp::Ugt:
2646 case InstIcmp::Sgt: {
John Porto921856d2015-07-07 11:56:26 -07002647 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002648 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002649 _movp(T, Src0RM);
2650 _pcmpgt(T, Src1RM);
2651 } break;
2652 case InstIcmp::Uge:
2653 case InstIcmp::Sge: {
2654 // !(Src1RM > Src0RM)
John Porto921856d2015-07-07 11:56:26 -07002655 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002656 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002657 _movp(T, Src1RM);
2658 _pcmpgt(T, Src0RM);
2659 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2660 _pxor(T, MinusOne);
2661 } break;
2662 case InstIcmp::Ult:
2663 case InstIcmp::Slt: {
John Porto921856d2015-07-07 11:56:26 -07002664 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002665 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002666 _movp(T, Src1RM);
2667 _pcmpgt(T, Src0RM);
2668 } break;
2669 case InstIcmp::Ule:
2670 case InstIcmp::Sle: {
2671 // !(Src0RM > Src1RM)
John Porto921856d2015-07-07 11:56:26 -07002672 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002673 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002674 _movp(T, Src0RM);
2675 _pcmpgt(T, Src1RM);
2676 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2677 _pxor(T, MinusOne);
2678 } break;
2679 }
2680
2681 _movp(Dest, T);
2682 eliminateNextVectorSextInstruction(Dest);
2683 return;
2684 }
2685
John Porto1d235422015-08-12 12:37:53 -07002686 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2687 lowerIcmp64(Inst);
John Porto7e93c622015-06-23 10:58:57 -07002688 return;
2689 }
2690
2691 // cmp b, c
2692 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2693 _cmp(Src0RM, Src1);
2694 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
2695}
2696
John Porto1d235422015-08-12 12:37:53 -07002697template <typename Machine>
2698template <typename T>
2699typename std::enable_if<!T::Is64Bit, void>::type
2700TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2702 Operand *Src0 = legalize(Inst->getSrc(0));
2703 Operand *Src1 = legalize(Inst->getSrc(1));
2704 Variable *Dest = Inst->getDest();
2705 InstIcmp::ICond Condition = Inst->getCondition();
2706 size_t Index = static_cast<size_t>(Condition);
2707 assert(Index < Traits::TableIcmp64Size);
2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2712 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2713 Constant *One = Ctx->getConstantInt32(1);
2714 typename Traits::Insts::Label *LabelFalse =
2715 Traits::Insts::Label::create(Func, this);
2716 typename Traits::Insts::Label *LabelTrue =
2717 Traits::Insts::Label::create(Func, this);
2718 _mov(Dest, One);
2719 _cmp(Src0HiRM, Src1HiRI);
2720 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2721 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2722 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2723 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2724 _cmp(Src0LoRM, Src1LoRI);
2725 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2726 Context.insert(LabelFalse);
2727 _mov_nonkillable(Dest, Zero);
2728 Context.insert(LabelTrue);
2729}
2730
John Porto7e93c622015-06-23 10:58:57 -07002731template <class Machine>
2732void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2733 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2734 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2735 ConstantInteger32 *ElementIndex =
2736 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2737 // Only constant indices are allowed in PNaCl IR.
2738 assert(ElementIndex);
2739 unsigned Index = ElementIndex->getValue();
2740 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
2741
2742 Type Ty = SourceVectNotLegalized->getType();
2743 Type ElementTy = typeElementType(Ty);
2744 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2745
2746 if (ElementTy == IceType_i1) {
2747 // Expand the element to the appropriate size for it to be inserted
2748 // in the vector.
John Porto5aeed952015-07-21 13:39:09 -07002749 Variable *Expanded = Func->makeVariable(InVectorElementTy);
John Porto7e93c622015-06-23 10:58:57 -07002750 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2751 ElementToInsertNotLegalized);
2752 lowerCast(Cast);
2753 ElementToInsertNotLegalized = Expanded;
2754 }
2755
2756 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07002757 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07002758 // Use insertps, pinsrb, pinsrw, or pinsrd.
2759 Operand *ElementRM =
2760 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2761 Operand *SourceVectRM =
2762 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2763 Variable *T = makeReg(Ty);
2764 _movp(T, SourceVectRM);
2765 if (Ty == IceType_v4f32)
2766 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
2767 else
2768 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
2769 _movp(Inst->getDest(), T);
2770 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2771 // Use shufps or movss.
2772 Variable *ElementR = nullptr;
2773 Operand *SourceVectRM =
2774 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2775
2776 if (InVectorElementTy == IceType_f32) {
2777 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07002778 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002779 } else {
2780 // Copy an integer to an XMM register.
2781 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2782 ElementR = makeReg(Ty);
2783 _movd(ElementR, T);
2784 }
2785
2786 if (Index == 0) {
2787 Variable *T = makeReg(Ty);
2788 _movp(T, SourceVectRM);
2789 _movss(T, ElementR);
2790 _movp(Inst->getDest(), T);
2791 return;
2792 }
2793
2794 // shufps treats the source and desination operands as vectors of
2795 // four doublewords. The destination's two high doublewords are
2796 // selected from the source operand and the two low doublewords are
2797 // selected from the (original value of) the destination operand.
2798 // An insertelement operation can be effected with a sequence of two
2799 // shufps operations with appropriate masks. In all cases below,
2800 // Element[0] is being inserted into SourceVectOperand. Indices are
2801 // ordered from left to right.
2802 //
2803 // insertelement into index 1 (result is stored in ElementR):
2804 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2805 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
2806 //
2807 // insertelement into index 2 (result is stored in T):
2808 // T := SourceVectRM
2809 // ElementR := ElementR[0, 0] T[0, 3]
2810 // T := T[0, 1] ElementR[0, 3]
2811 //
2812 // insertelement into index 3 (result is stored in T):
2813 // T := SourceVectRM
2814 // ElementR := ElementR[0, 0] T[0, 2]
2815 // T := T[0, 1] ElementR[3, 0]
2816 const unsigned char Mask1[3] = {0, 192, 128};
2817 const unsigned char Mask2[3] = {227, 196, 52};
2818
2819 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
2820 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
2821
2822 if (Index == 1) {
2823 _shufps(ElementR, SourceVectRM, Mask1Constant);
2824 _shufps(ElementR, SourceVectRM, Mask2Constant);
2825 _movp(Inst->getDest(), ElementR);
2826 } else {
2827 Variable *T = makeReg(Ty);
2828 _movp(T, SourceVectRM);
2829 _shufps(ElementR, T, Mask1Constant);
2830 _shufps(T, ElementR, Mask2Constant);
2831 _movp(Inst->getDest(), T);
2832 }
2833 } else {
2834 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2835 // Spill the value to a stack slot and perform the insertion in
2836 // memory.
2837 //
2838 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2839 // support for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07002840 Variable *Slot = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002841 Slot->setWeight(RegWeight::Zero);
Andrew Scull97f460d2015-07-21 10:07:42 -07002842 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07002843
2844 // Compute the location of the position to insert in memory.
2845 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07002846 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07002847 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07002848 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07002849
2850 Variable *T = makeReg(Ty);
2851 _movp(T, Slot);
2852 _movp(Inst->getDest(), T);
2853 }
2854}
2855
2856template <class Machine>
2857void TargetX86Base<Machine>::lowerIntrinsicCall(
2858 const InstIntrinsicCall *Instr) {
2859 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
2860 case Intrinsics::AtomicCmpxchg: {
2861 if (!Intrinsics::isMemoryOrderValid(
2862 ID, getConstantMemoryOrder(Instr->getArg(3)),
2863 getConstantMemoryOrder(Instr->getArg(4)))) {
2864 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
2865 return;
2866 }
2867 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07002868 Operand *PtrToMem = legalize(Instr->getArg(0));
2869 Operand *Expected = legalize(Instr->getArg(1));
2870 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07002871 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2872 return;
2873 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2874 return;
2875 }
2876 case Intrinsics::AtomicFence:
2877 if (!Intrinsics::isMemoryOrderValid(
2878 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
2879 Func->setError("Unexpected memory ordering for AtomicFence");
2880 return;
2881 }
2882 _mfence();
2883 return;
2884 case Intrinsics::AtomicFenceAll:
2885 // NOTE: FenceAll should prevent and load/store from being moved
2886 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2887 // instruction is currently marked coarsely as "HasSideEffects".
2888 _mfence();
2889 return;
2890 case Intrinsics::AtomicIsLockFree: {
2891 // X86 is always lock free for 8/16/32/64 bit accesses.
2892 // TODO(jvoung): Since the result is constant when given a constant
2893 // byte size, this opens up DCE opportunities.
2894 Operand *ByteSize = Instr->getArg(0);
2895 Variable *Dest = Instr->getDest();
2896 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
2897 Constant *Result;
2898 switch (CI->getValue()) {
2899 default:
2900 // Some x86-64 processors support the cmpxchg16b intruction, which
2901 // can make 16-byte operations lock free (when used with the LOCK
2902 // prefix). However, that's not supported in 32-bit mode, so just
2903 // return 0 even for large sizes.
2904 Result = Ctx->getConstantZero(IceType_i32);
2905 break;
2906 case 1:
2907 case 2:
2908 case 4:
2909 case 8:
2910 Result = Ctx->getConstantInt32(1);
2911 break;
2912 }
2913 _mov(Dest, Result);
2914 return;
2915 }
2916 // The PNaCl ABI requires the byte size to be a compile-time constant.
2917 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2918 return;
2919 }
2920 case Intrinsics::AtomicLoad: {
2921 // We require the memory address to be naturally aligned.
2922 // Given that is the case, then normal loads are atomic.
2923 if (!Intrinsics::isMemoryOrderValid(
2924 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2925 Func->setError("Unexpected memory ordering for AtomicLoad");
2926 return;
2927 }
2928 Variable *Dest = Instr->getDest();
John Porto1d235422015-08-12 12:37:53 -07002929 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002930 // Follow what GCC does and use a movq instead of what lowerLoad()
2931 // normally does (split the load into two).
2932 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2933 // can't happen anyway, since this is x86-32 and integer arithmetic only
2934 // happens on 32-bit quantities.
2935 Variable *T = makeReg(IceType_f64);
John Porto921856d2015-07-07 11:56:26 -07002936 typename Traits::X86OperandMem *Addr =
2937 formMemoryOperand(Instr->getArg(0), IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002938 _movq(T, Addr);
2939 // Then cast the bits back out of the XMM register to the i64 Dest.
2940 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2941 lowerCast(Cast);
2942 // Make sure that the atomic load isn't elided when unused.
2943 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2944 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2945 return;
2946 }
2947 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2948 lowerLoad(Load);
2949 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2950 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2951 // insert the FakeUse on the last-inserted instruction's dest.
2952 Context.insert(
2953 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2954 return;
2955 }
2956 case Intrinsics::AtomicRMW:
2957 if (!Intrinsics::isMemoryOrderValid(
2958 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
2959 Func->setError("Unexpected memory ordering for AtomicRMW");
2960 return;
2961 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07002962 lowerAtomicRMW(
2963 Instr->getDest(),
2964 static_cast<uint32_t>(
2965 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
2966 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07002967 return;
2968 case Intrinsics::AtomicStore: {
2969 if (!Intrinsics::isMemoryOrderValid(
2970 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2971 Func->setError("Unexpected memory ordering for AtomicStore");
2972 return;
2973 }
2974 // We require the memory address to be naturally aligned.
2975 // Given that is the case, then normal stores are atomic.
2976 // Add a fence after the store to make it visible.
2977 Operand *Value = Instr->getArg(0);
2978 Operand *Ptr = Instr->getArg(1);
John Porto1d235422015-08-12 12:37:53 -07002979 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002980 // Use a movq instead of what lowerStore() normally does
2981 // (split the store into two), following what GCC does.
2982 // Cast the bits from int -> to an xmm register first.
2983 Variable *T = makeReg(IceType_f64);
2984 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2985 lowerCast(Cast);
2986 // Then store XMM w/ a movq.
John Porto921856d2015-07-07 11:56:26 -07002987 typename Traits::X86OperandMem *Addr =
2988 formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07002989 _storeq(T, Addr);
2990 _mfence();
2991 return;
2992 }
2993 InstStore *Store = InstStore::create(Func, Value, Ptr);
2994 lowerStore(Store);
2995 _mfence();
2996 return;
2997 }
2998 case Intrinsics::Bswap: {
2999 Variable *Dest = Instr->getDest();
3000 Operand *Val = Instr->getArg(0);
3001 // In 32-bit mode, bswap only works on 32-bit arguments, and the
3002 // argument must be a register. Use rotate left for 16-bit bswap.
John Porto1d235422015-08-12 12:37:53 -07003003 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07003004 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07003005 Variable *T_Lo = legalizeToReg(loOperand(Val));
3006 Variable *T_Hi = legalizeToReg(hiOperand(Val));
John Porto7e93c622015-06-23 10:58:57 -07003007 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3008 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3009 _bswap(T_Lo);
3010 _bswap(T_Hi);
3011 _mov(DestLo, T_Hi);
3012 _mov(DestHi, T_Lo);
John Porto1d235422015-08-12 12:37:53 -07003013 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
3014 Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07003015 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07003016 _bswap(T);
3017 _mov(Dest, T);
3018 } else {
3019 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07003020 Constant *Eight = Ctx->getConstantInt16(8);
3021 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07003022 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07003023 _mov(T, Val);
3024 _rol(T, Eight);
3025 _mov(Dest, T);
3026 }
3027 return;
3028 }
3029 case Intrinsics::Ctpop: {
3030 Variable *Dest = Instr->getDest();
John Porto1d235422015-08-12 12:37:53 -07003031 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07003032 Operand *Val = Instr->getArg(0);
John Porto1d235422015-08-12 12:37:53 -07003033 Type ValTy = Val->getType();
3034 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
3035
3036 if (!Traits::Is64Bit) {
3037 T = Dest;
3038 } else {
3039 T = makeReg(IceType_i64);
3040 if (ValTy == IceType_i32) {
3041 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
3042 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
3043 // ensure we will not have any bits set on Val's upper 32 bits.
3044 Variable *V = makeReg(IceType_i64);
3045 _movzx(V, Val);
3046 Val = V;
3047 }
3048 ValTy = IceType_i64;
3049 }
3050
3051 InstCall *Call = makeHelperCall(
3052 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
John Porto7e93c622015-06-23 10:58:57 -07003053 Call->addArg(Val);
3054 lowerCall(Call);
3055 // The popcount helpers always return 32-bit values, while the intrinsic's
3056 // signature matches the native POPCNT instruction and fills a 64-bit reg
3057 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3058 // the user doesn't do that in the IR. If the user does that in the IR,
3059 // then this zero'ing instruction is dead and gets optimized out.
John Porto1d235422015-08-12 12:37:53 -07003060 if (!Traits::Is64Bit) {
3061 assert(T == Dest);
3062 if (Val->getType() == IceType_i64) {
3063 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3064 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3065 _mov(DestHi, Zero);
3066 }
3067 } else {
3068 assert(Val->getType() == IceType_i64);
3069 // T is 64 bit. It needs to be copied to dest. We need to:
3070 //
3071 // T_1.32 = trunc T.64 to i32
3072 // T_2.64 = zext T_1.32 to i64
3073 // Dest.<<right_size>> = T_2.<<right_size>>
3074 //
3075 // which ensures the upper 32 bits will always be cleared. Just doing a
3076 //
3077 // mov Dest.32 = trunc T.32 to i32
3078 //
3079 // is dangerous because there's a chance the compiler will optimize this
3080 // copy out. To use _movzx we need two new registers (one 32-, and
3081 // another 64-bit wide.)
3082 Variable *T_1 = makeReg(IceType_i32);
3083 _mov(T_1, T);
3084 Variable *T_2 = makeReg(IceType_i64);
3085 _movzx(T_2, T_1);
3086 _mov(Dest, T_2);
John Porto7e93c622015-06-23 10:58:57 -07003087 }
3088 return;
3089 }
3090 case Intrinsics::Ctlz: {
3091 // The "is zero undef" parameter is ignored and we always return
3092 // a well-defined value.
3093 Operand *Val = legalize(Instr->getArg(0));
3094 Operand *FirstVal;
3095 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07003096 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003097 FirstVal = loOperand(Val);
3098 SecondVal = hiOperand(Val);
3099 } else {
3100 FirstVal = Val;
3101 }
3102 const bool IsCttz = false;
3103 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3104 SecondVal);
3105 return;
3106 }
3107 case Intrinsics::Cttz: {
3108 // The "is zero undef" parameter is ignored and we always return
3109 // a well-defined value.
3110 Operand *Val = legalize(Instr->getArg(0));
3111 Operand *FirstVal;
3112 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07003113 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003114 FirstVal = hiOperand(Val);
3115 SecondVal = loOperand(Val);
3116 } else {
3117 FirstVal = Val;
3118 }
3119 const bool IsCttz = true;
3120 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3121 SecondVal);
3122 return;
3123 }
3124 case Intrinsics::Fabs: {
3125 Operand *Src = legalize(Instr->getArg(0));
3126 Type Ty = Src->getType();
3127 Variable *Dest = Instr->getDest();
3128 Variable *T = makeVectorOfFabsMask(Ty);
3129 // The pand instruction operates on an m128 memory operand, so if
3130 // Src is an f32 or f64, we need to make sure it's in a register.
3131 if (isVectorType(Ty)) {
John Porto921856d2015-07-07 11:56:26 -07003132 if (llvm::isa<typename Traits::X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07003133 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003134 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003135 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003136 }
3137 _pand(T, Src);
3138 if (isVectorType(Ty))
3139 _movp(Dest, T);
3140 else
3141 _mov(Dest, T);
3142 return;
3143 }
3144 case Intrinsics::Longjmp: {
3145 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
3146 Call->addArg(Instr->getArg(0));
3147 Call->addArg(Instr->getArg(1));
3148 lowerCall(Call);
3149 return;
3150 }
3151 case Intrinsics::Memcpy: {
Andrew Scull9df4a372015-08-07 09:19:35 -07003152 lowerMemcpy(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003153 return;
3154 }
3155 case Intrinsics::Memmove: {
3156 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
3157 Call->addArg(Instr->getArg(0));
3158 Call->addArg(Instr->getArg(1));
3159 Call->addArg(Instr->getArg(2));
3160 lowerCall(Call);
3161 return;
3162 }
3163 case Intrinsics::Memset: {
Andrew Scull713dbde2015-08-04 14:25:27 -07003164 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003165 return;
3166 }
3167 case Intrinsics::NaClReadTP: {
3168 if (Ctx->getFlags().getUseSandboxing()) {
John Porto5aeed952015-07-21 13:39:09 -07003169 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07003170 Variable *Dest = Instr->getDest();
3171 Variable *T = nullptr;
3172 _mov(T, Src);
3173 _mov(Dest, T);
3174 } else {
3175 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3176 lowerCall(Call);
3177 }
3178 return;
3179 }
3180 case Intrinsics::Setjmp: {
3181 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
3182 Call->addArg(Instr->getArg(0));
3183 lowerCall(Call);
3184 return;
3185 }
3186 case Intrinsics::Sqrt: {
3187 Operand *Src = legalize(Instr->getArg(0));
3188 Variable *Dest = Instr->getDest();
3189 Variable *T = makeReg(Dest->getType());
3190 _sqrtss(T, Src);
3191 _mov(Dest, T);
3192 return;
3193 }
3194 case Intrinsics::Stacksave: {
John Porto5d0acff2015-06-30 15:29:21 -07003195 Variable *esp =
3196 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003197 Variable *Dest = Instr->getDest();
3198 _mov(Dest, esp);
3199 return;
3200 }
3201 case Intrinsics::Stackrestore: {
John Porto5d0acff2015-06-30 15:29:21 -07003202 Variable *esp =
3203 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003204 _mov_nonkillable(esp, Instr->getArg(0));
3205 return;
3206 }
3207 case Intrinsics::Trap:
3208 _ud2();
3209 return;
3210 case Intrinsics::UnknownIntrinsic:
3211 Func->setError("Should not be lowering UnknownIntrinsic");
3212 return;
3213 }
3214 return;
3215}
3216
3217template <class Machine>
3218void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3219 Operand *Ptr, Operand *Expected,
3220 Operand *Desired) {
John Porto1d235422015-08-12 12:37:53 -07003221 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003222 // Reserve the pre-colored registers first, before adding any more
3223 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07003224 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3225 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3226 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3227 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07003228 _mov(T_eax, loOperand(Expected));
3229 _mov(T_edx, hiOperand(Expected));
3230 _mov(T_ebx, loOperand(Desired));
3231 _mov(T_ecx, hiOperand(Desired));
John Porto921856d2015-07-07 11:56:26 -07003232 typename Traits::X86OperandMem *Addr =
3233 formMemoryOperand(Ptr, Expected->getType());
John Porto7e93c622015-06-23 10:58:57 -07003234 const bool Locked = true;
3235 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3236 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3237 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3238 _mov(DestLo, T_eax);
3239 _mov(DestHi, T_edx);
3240 return;
3241 }
John Porto5d0acff2015-06-30 15:29:21 -07003242 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07003243 _mov(T_eax, Expected);
John Porto921856d2015-07-07 11:56:26 -07003244 typename Traits::X86OperandMem *Addr =
3245 formMemoryOperand(Ptr, Expected->getType());
Andrew Scull97f460d2015-07-21 10:07:42 -07003246 Variable *DesiredReg = legalizeToReg(Desired);
John Porto7e93c622015-06-23 10:58:57 -07003247 const bool Locked = true;
3248 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3249 _mov(DestPrev, T_eax);
3250}
3251
3252template <class Machine>
3253bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3254 Operand *PtrToMem,
3255 Operand *Expected,
3256 Operand *Desired) {
3257 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3258 return false;
3259 // Peek ahead a few instructions and see how Dest is used.
3260 // It's very common to have:
3261 //
3262 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3263 // [%y_phi = ...] // list of phi stores
3264 // %p = icmp eq i32 %x, %expected
3265 // br i1 %p, label %l1, label %l2
3266 //
3267 // which we can optimize into:
3268 //
3269 // %x = <cmpxchg code>
3270 // [%y_phi = ...] // list of phi stores
3271 // br eq, %l1, %l2
3272 InstList::iterator I = Context.getCur();
3273 // I is currently the InstIntrinsicCall. Peek past that.
3274 // This assumes that the atomic cmpxchg has not been lowered yet,
3275 // so that the instructions seen in the scan from "Cur" is simple.
3276 assert(llvm::isa<InstIntrinsicCall>(*I));
3277 Inst *NextInst = Context.getNextInst(I);
3278 if (!NextInst)
3279 return false;
3280 // There might be phi assignments right before the compare+branch, since this
3281 // could be a backward branch for a loop. This placement of assignments is
3282 // determined by placePhiStores().
3283 std::vector<InstAssign *> PhiAssigns;
3284 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3285 if (PhiAssign->getDest() == Dest)
3286 return false;
3287 PhiAssigns.push_back(PhiAssign);
3288 NextInst = Context.getNextInst(I);
3289 if (!NextInst)
3290 return false;
3291 }
3292 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3293 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3294 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3295 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3296 return false;
3297 }
3298 NextInst = Context.getNextInst(I);
3299 if (!NextInst)
3300 return false;
3301 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3302 if (!NextBr->isUnconditional() &&
3303 NextCmp->getDest() == NextBr->getCondition() &&
3304 NextBr->isLastUse(NextCmp->getDest())) {
3305 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3306 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3307 // Lower the phi assignments now, before the branch (same placement
3308 // as before).
3309 InstAssign *PhiAssign = PhiAssigns[i];
3310 PhiAssign->setDeleted();
3311 lowerAssign(PhiAssign);
3312 Context.advanceNext();
3313 }
John Porto5d0acff2015-06-30 15:29:21 -07003314 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
3315 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07003316 // Skip over the old compare and branch, by deleting them.
3317 NextCmp->setDeleted();
3318 NextBr->setDeleted();
3319 Context.advanceNext();
3320 Context.advanceNext();
3321 return true;
3322 }
3323 }
3324 }
3325 return false;
3326}
3327
3328template <class Machine>
3329void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3330 Operand *Ptr, Operand *Val) {
3331 bool NeedsCmpxchg = false;
3332 LowerBinOp Op_Lo = nullptr;
3333 LowerBinOp Op_Hi = nullptr;
3334 switch (Operation) {
3335 default:
3336 Func->setError("Unknown AtomicRMW operation");
3337 return;
3338 case Intrinsics::AtomicAdd: {
John Porto1d235422015-08-12 12:37:53 -07003339 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003340 // All the fall-through paths must set this to true, but use this
3341 // for asserting.
3342 NeedsCmpxchg = true;
3343 Op_Lo = &TargetX86Base<Machine>::_add;
3344 Op_Hi = &TargetX86Base<Machine>::_adc;
3345 break;
3346 }
John Porto921856d2015-07-07 11:56:26 -07003347 typename Traits::X86OperandMem *Addr =
3348 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003349 const bool Locked = true;
3350 Variable *T = nullptr;
3351 _mov(T, Val);
3352 _xadd(Addr, T, Locked);
3353 _mov(Dest, T);
3354 return;
3355 }
3356 case Intrinsics::AtomicSub: {
John Porto1d235422015-08-12 12:37:53 -07003357 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003358 NeedsCmpxchg = true;
3359 Op_Lo = &TargetX86Base<Machine>::_sub;
3360 Op_Hi = &TargetX86Base<Machine>::_sbb;
3361 break;
3362 }
John Porto921856d2015-07-07 11:56:26 -07003363 typename Traits::X86OperandMem *Addr =
3364 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003365 const bool Locked = true;
3366 Variable *T = nullptr;
3367 _mov(T, Val);
3368 _neg(T);
3369 _xadd(Addr, T, Locked);
3370 _mov(Dest, T);
3371 return;
3372 }
3373 case Intrinsics::AtomicOr:
3374 // TODO(jvoung): If Dest is null or dead, then some of these
3375 // operations do not need an "exchange", but just a locked op.
3376 // That appears to be "worth" it for sub, or, and, and xor.
3377 // xadd is probably fine vs lock add for add, and xchg is fine
3378 // vs an atomic store.
3379 NeedsCmpxchg = true;
3380 Op_Lo = &TargetX86Base<Machine>::_or;
3381 Op_Hi = &TargetX86Base<Machine>::_or;
3382 break;
3383 case Intrinsics::AtomicAnd:
3384 NeedsCmpxchg = true;
3385 Op_Lo = &TargetX86Base<Machine>::_and;
3386 Op_Hi = &TargetX86Base<Machine>::_and;
3387 break;
3388 case Intrinsics::AtomicXor:
3389 NeedsCmpxchg = true;
3390 Op_Lo = &TargetX86Base<Machine>::_xor;
3391 Op_Hi = &TargetX86Base<Machine>::_xor;
3392 break;
3393 case Intrinsics::AtomicExchange:
John Porto1d235422015-08-12 12:37:53 -07003394 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003395 NeedsCmpxchg = true;
3396 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3397 // just need to be moved to the ecx and ebx registers.
3398 Op_Lo = nullptr;
3399 Op_Hi = nullptr;
3400 break;
3401 }
John Porto921856d2015-07-07 11:56:26 -07003402 typename Traits::X86OperandMem *Addr =
3403 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003404 Variable *T = nullptr;
3405 _mov(T, Val);
3406 _xchg(Addr, T);
3407 _mov(Dest, T);
3408 return;
3409 }
3410 // Otherwise, we need a cmpxchg loop.
3411 (void)NeedsCmpxchg;
3412 assert(NeedsCmpxchg);
3413 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3414}
3415
3416template <class Machine>
3417void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3418 LowerBinOp Op_Hi,
3419 Variable *Dest,
3420 Operand *Ptr,
3421 Operand *Val) {
3422 // Expand a more complex RMW operation as a cmpxchg loop:
3423 // For 64-bit:
3424 // mov eax, [ptr]
3425 // mov edx, [ptr + 4]
3426 // .LABEL:
3427 // mov ebx, eax
3428 // <Op_Lo> ebx, <desired_adj_lo>
3429 // mov ecx, edx
3430 // <Op_Hi> ecx, <desired_adj_hi>
3431 // lock cmpxchg8b [ptr]
3432 // jne .LABEL
3433 // mov <dest_lo>, eax
3434 // mov <dest_lo>, edx
3435 //
3436 // For 32-bit:
3437 // mov eax, [ptr]
3438 // .LABEL:
3439 // mov <reg>, eax
3440 // op <reg>, [desired_adj]
3441 // lock cmpxchg [ptr], <reg>
3442 // jne .LABEL
3443 // mov <dest>, eax
3444 //
3445 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3446 Val = legalize(Val);
3447 Type Ty = Val->getType();
John Porto1d235422015-08-12 12:37:53 -07003448 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07003449 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3450 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto921856d2015-07-07 11:56:26 -07003451 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07003452 _mov(T_eax, loOperand(Addr));
3453 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07003454 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3455 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto921856d2015-07-07 11:56:26 -07003456 typename Traits::Insts::Label *Label =
3457 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003458 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3459 if (!IsXchg8b) {
3460 Context.insert(Label);
3461 _mov(T_ebx, T_eax);
3462 (this->*Op_Lo)(T_ebx, loOperand(Val));
3463 _mov(T_ecx, T_edx);
3464 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3465 } else {
3466 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3467 // It just needs the Val loaded into ebx and ecx.
3468 // That can also be done before the loop.
3469 _mov(T_ebx, loOperand(Val));
3470 _mov(T_ecx, hiOperand(Val));
3471 Context.insert(Label);
3472 }
3473 const bool Locked = true;
3474 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003475 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003476 if (!IsXchg8b) {
3477 // If Val is a variable, model the extended live range of Val through
3478 // the end of the loop, since it will be re-used by the loop.
3479 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3480 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3481 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3482 Context.insert(InstFakeUse::create(Func, ValLo));
3483 Context.insert(InstFakeUse::create(Func, ValHi));
3484 }
3485 } else {
3486 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3487 Context.insert(InstFakeUse::create(Func, T_ebx));
3488 Context.insert(InstFakeUse::create(Func, T_ecx));
3489 }
3490 // The address base (if any) is also reused in the loop.
3491 if (Variable *Base = Addr->getBase())
3492 Context.insert(InstFakeUse::create(Func, Base));
3493 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3494 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3495 _mov(DestLo, T_eax);
3496 _mov(DestHi, T_edx);
3497 return;
3498 }
John Porto921856d2015-07-07 11:56:26 -07003499 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto5d0acff2015-06-30 15:29:21 -07003500 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07003501 _mov(T_eax, Addr);
John Porto921856d2015-07-07 11:56:26 -07003502 typename Traits::Insts::Label *Label =
3503 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003504 Context.insert(Label);
3505 // We want to pick a different register for T than Eax, so don't use
3506 // _mov(T == nullptr, T_eax).
3507 Variable *T = makeReg(Ty);
3508 _mov(T, T_eax);
3509 (this->*Op_Lo)(T, Val);
3510 const bool Locked = true;
3511 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003512 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003513 // If Val is a variable, model the extended live range of Val through
3514 // the end of the loop, since it will be re-used by the loop.
3515 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3516 Context.insert(InstFakeUse::create(Func, ValVar));
3517 }
3518 // The address base (if any) is also reused in the loop.
3519 if (Variable *Base = Addr->getBase())
3520 Context.insert(InstFakeUse::create(Func, Base));
3521 _mov(Dest, T_eax);
3522}
3523
Andrew Scull9612d322015-07-06 14:53:25 -07003524/// Lowers count {trailing, leading} zeros intrinsic.
3525///
3526/// We could do constant folding here, but that should have
3527/// been done by the front-end/middle-end optimizations.
John Porto7e93c622015-06-23 10:58:57 -07003528template <class Machine>
3529void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3530 Operand *FirstVal,
3531 Operand *SecondVal) {
3532 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3533 // Then the instructions will handle the Val == 0 case much more simply
3534 // and won't require conversion from bit position to number of zeros.
3535 //
3536 // Otherwise:
3537 // bsr IF_NOT_ZERO, Val
3538 // mov T_DEST, 63
3539 // cmovne T_DEST, IF_NOT_ZERO
3540 // xor T_DEST, 31
3541 // mov DEST, T_DEST
3542 //
3543 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3544 // register. Also, bsf and bsr require their dest to be a register.
3545 //
3546 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3547 // E.g., for 000... 00001100, bsr will say that the most significant bit
3548 // set is at position 3, while the number of leading zeros is 28. Xor is
3549 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3550 //
3551 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3552 // are all zero, and compute the result for that case (checking the lower
3553 // 32 bits). Then actually compute the result for the upper bits and
3554 // cmov in the result from the lower computation if the earlier speculation
3555 // was correct.
3556 //
3557 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3558 // bit position conversion, and the speculation is reversed.
3559 assert(Ty == IceType_i32 || Ty == IceType_i64);
3560 Variable *T = makeReg(IceType_i32);
3561 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3562 if (Cttz) {
3563 _bsf(T, FirstValRM);
3564 } else {
3565 _bsr(T, FirstValRM);
3566 }
3567 Variable *T_Dest = makeReg(IceType_i32);
3568 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3569 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3570 if (Cttz) {
3571 _mov(T_Dest, ThirtyTwo);
3572 } else {
3573 Constant *SixtyThree = Ctx->getConstantInt32(63);
3574 _mov(T_Dest, SixtyThree);
3575 }
John Porto5d0acff2015-06-30 15:29:21 -07003576 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07003577 if (!Cttz) {
3578 _xor(T_Dest, ThirtyOne);
3579 }
John Porto1d235422015-08-12 12:37:53 -07003580 if (Traits::Is64Bit || Ty == IceType_i32) {
John Porto7e93c622015-06-23 10:58:57 -07003581 _mov(Dest, T_Dest);
3582 return;
3583 }
3584 _add(T_Dest, ThirtyTwo);
3585 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3586 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3587 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07003588 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07003589 Variable *T_Dest2 = makeReg(IceType_i32);
3590 if (Cttz) {
3591 _bsf(T_Dest2, SecondVar);
3592 } else {
3593 _bsr(T_Dest2, SecondVar);
3594 _xor(T_Dest2, ThirtyOne);
3595 }
3596 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07003597 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07003598 _mov(DestLo, T_Dest2);
3599 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3600}
3601
Andrew Scull86df4e92015-07-30 13:54:44 -07003602template <class Machine>
Andrew Scull9df4a372015-08-07 09:19:35 -07003603void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src,
3604 Operand *Count) {
3605 // There is a load and store for each chunk in the unroll
3606 constexpr uint32_t UNROLL_LIMIT = 8;
3607 constexpr uint32_t BytesPerStorep = 16;
3608 constexpr uint32_t BytesPerStoreq = 8;
3609 constexpr uint32_t BytesPerStorei32 = 4;
3610 constexpr uint32_t BytesPerStorei16 = 2;
3611 constexpr uint32_t BytesPerStorei8 = 1;
3612
3613 // Check if the operands are constants
3614 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
3615 const bool IsCountConst = CountConst != nullptr;
3616 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3617
3618 if (IsCountConst && CountValue <= BytesPerStorep * UNROLL_LIMIT) {
3619 // Unlikely, but nothing to do if it does happen
3620 if (CountValue == 0)
3621 return;
3622
3623 Variable *SrcBase = legalizeToReg(Src);
3624 Variable *DestBase = legalizeToReg(Dest);
3625
3626 auto lowerCopy = [this, DestBase, SrcBase](Type Ty, uint32_t OffsetAmt) {
3627 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
3628 // TODO(ascull): this or add nullptr test to _movp, _movq
3629 Variable *Data = makeReg(Ty);
3630
3631 // TODO(ascull): is 64-bit better with vector or scalar movq?
3632 auto *SrcMem = Traits::X86OperandMem::create(Func, Ty, SrcBase, Offset);
3633 if (isVectorType(Ty))
3634 _movp(Data, SrcMem);
3635 else if (Ty == IceType_f64)
3636 _movq(Data, SrcMem);
3637 else
3638 _mov(Data, SrcMem);
3639
3640 auto *DestMem = Traits::X86OperandMem::create(Func, Ty, DestBase, Offset);
3641 if (isVectorType(Ty))
3642 _storep(Data, DestMem);
3643 else if (Ty == IceType_f64)
3644 _storeq(Data, DestMem);
3645 else
3646 _store(Data, DestMem);
3647 };
3648
3649 // Lowers the assignment to the remaining bytes. Assumes the original size
3650 // was large enough to allow for overlaps.
3651 auto lowerLeftOvers = [this, lowerCopy, CountValue](uint32_t Size) {
3652 if (Size > BytesPerStoreq) {
3653 lowerCopy(IceType_v16i8, CountValue - BytesPerStorep);
3654 } else if (Size > BytesPerStorei32) {
3655 lowerCopy(IceType_f64, CountValue - BytesPerStoreq);
3656 } else if (Size > BytesPerStorei16) {
3657 lowerCopy(IceType_i32, CountValue - BytesPerStorei32);
3658 } else if (Size > BytesPerStorei8) {
3659 lowerCopy(IceType_i16, CountValue - BytesPerStorei16);
3660 } else if (Size == BytesPerStorei8) {
3661 lowerCopy(IceType_i8, CountValue - BytesPerStorei8);
3662 }
3663 };
3664
3665 if (CountValue >= BytesPerStorep) {
3666 // Use large vector operations
3667 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) {
3668 N -= BytesPerStorep;
3669 lowerCopy(IceType_v16i8, N);
3670 }
3671 lowerLeftOvers(CountValue & 0xF);
3672 return;
3673 }
3674
3675 // Too small to use large vector operations so use small ones instead
3676 if (CountValue >= BytesPerStoreq) {
3677 lowerCopy(IceType_f64, 0);
3678 lowerLeftOvers(CountValue - BytesPerStoreq);
3679 return;
3680 }
3681
3682 // Too small for vector operations so use scalar ones
3683 if (CountValue >= BytesPerStorei32) {
3684 lowerCopy(IceType_i32, 0);
3685 lowerLeftOvers(CountValue - BytesPerStorei32);
3686 return;
3687 }
3688
3689 // 3 is the awkward size as it is too small for the vector or 32-bit
3690 // operations and will not work with lowerLeftOvers as there is no valid
3691 // overlap.
3692 if (CountValue == 3) {
3693 lowerCopy(IceType_i16, 0);
3694 lowerCopy(IceType_i8, 2);
3695 return;
3696 }
3697
3698 // 1 or 2 can be done in a single scalar copy
3699 lowerLeftOvers(CountValue);
3700 return;
3701 }
3702
3703 // Fall back on a function call
3704 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
3705 Call->addArg(Dest);
3706 Call->addArg(Src);
3707 Call->addArg(Count);
3708 lowerCall(Call);
3709}
3710
3711template <class Machine>
Andrew Scull713dbde2015-08-04 14:25:27 -07003712void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val,
3713 Operand *Count) {
3714 constexpr uint32_t UNROLL_LIMIT = 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07003715 constexpr uint32_t BytesPerStorep = 16;
3716 constexpr uint32_t BytesPerStoreq = 8;
3717 constexpr uint32_t BytesPerStorei32 = 4;
3718 constexpr uint32_t BytesPerStorei16 = 2;
3719 constexpr uint32_t BytesPerStorei8 = 1;
Andrew Scull713dbde2015-08-04 14:25:27 -07003720 assert(Val->getType() == IceType_i8);
3721
3722 // Check if the operands are constants
3723 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
3724 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
3725 const bool IsCountConst = CountConst != nullptr;
3726 const bool IsValConst = ValConst != nullptr;
3727 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3728 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
3729
3730 // Unlikely, but nothing to do if it does happen
3731 if (IsCountConst && CountValue == 0)
3732 return;
3733
3734 // TODO(ascull): if the count is constant but val is not it would be possible
3735 // to inline by spreading the value across 4 bytes and accessing subregs e.g.
3736 // eax, ax and al.
3737 if (IsCountConst && IsValConst) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003738 Variable *Base = nullptr;
3739 const uint32_t SpreadValue =
3740 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
3741 Variable *VecReg = nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07003742
Andrew Scull9df4a372015-08-07 09:19:35 -07003743 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
Jim Stichnoth992f91d2015-08-10 11:18:38 -07003744 uint32_t OffsetAmt) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003745 assert(Base != nullptr);
3746 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07003747
Andrew Scull9df4a372015-08-07 09:19:35 -07003748 // TODO(ascull): is 64-bit better with vector or scalar movq?
3749 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
3750 if (isVectorType(Ty)) {
Andrew Scull713dbde2015-08-04 14:25:27 -07003751 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07003752 _storep(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07003753 } else if (Ty == IceType_i64) {
Andrew Scull713dbde2015-08-04 14:25:27 -07003754 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07003755 _storeq(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07003756 } else {
3757 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
Andrew Scull713dbde2015-08-04 14:25:27 -07003758 }
3759 };
3760
Andrew Scull9df4a372015-08-07 09:19:35 -07003761 // Lowers the assignment to the remaining bytes. Assumes the original size
3762 // was large enough to allow for overlaps.
3763 auto lowerLeftOvers = [this, lowerSet, CountValue](uint32_t Size) {
3764 if (Size > BytesPerStoreq) {
3765 lowerSet(IceType_v16i8, CountValue - BytesPerStorep);
3766 } else if (Size > BytesPerStorei32) {
3767 lowerSet(IceType_i64, CountValue - BytesPerStoreq);
3768 } else if (Size > BytesPerStorei16) {
3769 lowerSet(IceType_i32, CountValue - BytesPerStorei32);
3770 } else if (Size > BytesPerStorei8) {
3771 lowerSet(IceType_i16, CountValue - BytesPerStorei16);
3772 } else if (Size == BytesPerStorei8) {
3773 lowerSet(IceType_i8, CountValue - BytesPerStorei8);
3774 }
3775 };
3776
3777 // When the value is zero it can be loaded into a vector register cheaply
3778 // using the xor trick.
3779 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
Andrew Scull713dbde2015-08-04 14:25:27 -07003780 CountValue <= BytesPerStorep * UNROLL_LIMIT) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003781 Base = legalizeToReg(Dest);
3782 VecReg = makeVectorOfZeros(IceType_v16i8);
Andrew Scull713dbde2015-08-04 14:25:27 -07003783
3784 // Too small to use large vector operations so use small ones instead
Andrew Scull9df4a372015-08-07 09:19:35 -07003785 if (CountValue < BytesPerStorep) {
3786 lowerSet(IceType_i64, 0);
3787 lowerLeftOvers(CountValue - BytesPerStoreq);
Andrew Scull713dbde2015-08-04 14:25:27 -07003788 return;
3789 }
3790
Andrew Scull713dbde2015-08-04 14:25:27 -07003791 // Use large vector operations
3792 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) {
3793 N -= 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07003794 lowerSet(IceType_v16i8, N);
Andrew Scull713dbde2015-08-04 14:25:27 -07003795 }
Andrew Scull9df4a372015-08-07 09:19:35 -07003796 lowerLeftOvers(CountValue & 0xF);
Andrew Scull713dbde2015-08-04 14:25:27 -07003797 return;
3798 }
3799
3800 // TODO(ascull): load val into reg and select subregs e.g. eax, ax, al?
Andrew Scull9df4a372015-08-07 09:19:35 -07003801 if (CountValue <= BytesPerStorei32 * UNROLL_LIMIT) {
3802 Base = legalizeToReg(Dest);
3803 // 3 is the awkward size as it is too small for the vector or 32-bit
3804 // operations and will not work with lowerLeftOvers as there is no valid
3805 // overlap.
3806 if (CountValue == 3) {
3807 lowerSet(IceType_i16, 0);
3808 lowerSet(IceType_i8, 2);
3809 return;
Andrew Scull713dbde2015-08-04 14:25:27 -07003810 }
Andrew Scull9df4a372015-08-07 09:19:35 -07003811
3812 // TODO(ascull); 64-bit can do better with 64-bit mov
3813 for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) {
3814 N -= 4;
3815 lowerSet(IceType_i32, N);
3816 }
3817 lowerLeftOvers(CountValue & 0x3);
Andrew Scull713dbde2015-08-04 14:25:27 -07003818 return;
3819 }
3820 }
3821
3822 // Fall back on calling the memset function. The value operand needs to be
3823 // extended to a stack slot size because the PNaCl ABI requires arguments to
3824 // be at least 32 bits wide.
3825 Operand *ValExt;
3826 if (IsValConst) {
3827 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
3828 } else {
3829 Variable *ValExtVar = Func->makeVariable(stackSlotType());
3830 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
3831 ValExt = ValExtVar;
3832 }
3833 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3834 Call->addArg(Dest);
3835 Call->addArg(ValExt);
3836 Call->addArg(Count);
3837 lowerCall(Call);
3838}
3839
3840template <class Machine>
Andrew Scull86df4e92015-07-30 13:54:44 -07003841void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) {
3842 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
3843 if (NeedSandboxing) {
3844 _bundle_lock();
3845 const SizeT BundleSize =
3846 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
3847 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1)));
3848 }
3849 _jmp(Target);
3850 if (NeedSandboxing)
3851 _bundle_unlock();
3852}
3853
John Porto5aeed952015-07-21 13:39:09 -07003854inline bool isAdd(const Inst *Inst) {
John Porto7e93c622015-06-23 10:58:57 -07003855 if (const InstArithmetic *Arith =
3856 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3857 return (Arith->getOp() == InstArithmetic::Add);
3858 }
3859 return false;
3860}
3861
John Porto5aeed952015-07-21 13:39:09 -07003862inline void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3863 const Variable *Index, uint16_t Shift,
3864 int32_t Offset, const Inst *Reason) {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07003865 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07003866 return;
3867 if (!Func->isVerbose(IceV_AddrOpt))
3868 return;
3869 OstreamLocker L(Func->getContext());
3870 Ostream &Str = Func->getContext()->getStrDump();
3871 Str << "Instruction: ";
3872 Reason->dumpDecorated(Func);
3873 Str << " results in Base=";
3874 if (Base)
3875 Base->dump(Func);
3876 else
3877 Str << "<null>";
3878 Str << ", Index=";
3879 if (Index)
3880 Index->dump(Func);
3881 else
3882 Str << "<null>";
3883 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3884}
3885
John Porto5aeed952015-07-21 13:39:09 -07003886inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
3887 Variable *&Var, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003888 // Var originates from Var=SrcVar ==>
3889 // set Var:=SrcVar
3890 if (Var == nullptr)
3891 return false;
3892 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3893 assert(!VMetadata->isMultiDef(Var));
3894 if (llvm::isa<InstAssign>(VarAssign)) {
3895 Operand *SrcOp = VarAssign->getSrc(0);
3896 assert(SrcOp);
3897 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
3898 if (!VMetadata->isMultiDef(SrcVar) &&
3899 // TODO: ensure SrcVar stays single-BB
3900 true) {
3901 Var = SrcVar;
3902 Reason = VarAssign;
3903 return true;
3904 }
3905 }
3906 }
3907 }
3908 return false;
3909}
3910
John Porto5aeed952015-07-21 13:39:09 -07003911inline bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata,
3912 Variable *&Base, Variable *&Index,
3913 uint16_t &Shift, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003914 // Index==nullptr && Base is Base=Var1+Var2 ==>
3915 // set Base=Var1, Index=Var2, Shift=0
3916 if (Base == nullptr)
3917 return false;
3918 if (Index != nullptr)
3919 return false;
3920 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
3921 if (BaseInst == nullptr)
3922 return false;
3923 assert(!VMetadata->isMultiDef(Base));
3924 if (BaseInst->getSrcSize() < 2)
3925 return false;
3926 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
3927 if (VMetadata->isMultiDef(Var1))
3928 return false;
3929 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
3930 if (VMetadata->isMultiDef(Var2))
3931 return false;
3932 if (isAdd(BaseInst) &&
3933 // TODO: ensure Var1 and Var2 stay single-BB
3934 true) {
3935 Base = Var1;
3936 Index = Var2;
3937 Shift = 0; // should already have been 0
3938 Reason = BaseInst;
3939 return true;
3940 }
3941 }
3942 }
3943 return false;
3944}
3945
John Porto5aeed952015-07-21 13:39:09 -07003946inline bool matchShiftedIndex(const VariablesMetadata *VMetadata,
3947 Variable *&Index, uint16_t &Shift,
3948 const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003949 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
3950 // Index=Var, Shift+=log2(Const)
3951 if (Index == nullptr)
3952 return false;
3953 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
3954 if (IndexInst == nullptr)
3955 return false;
3956 assert(!VMetadata->isMultiDef(Index));
3957 if (IndexInst->getSrcSize() < 2)
3958 return false;
3959 if (const InstArithmetic *ArithInst =
3960 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3961 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3962 if (ConstantInteger32 *Const =
3963 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
3964 if (ArithInst->getOp() == InstArithmetic::Mul &&
3965 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
3966 uint64_t Mult = Const->getValue();
3967 uint32_t LogMult;
3968 switch (Mult) {
3969 case 1:
3970 LogMult = 0;
3971 break;
3972 case 2:
3973 LogMult = 1;
3974 break;
3975 case 4:
3976 LogMult = 2;
3977 break;
3978 case 8:
3979 LogMult = 3;
3980 break;
3981 default:
3982 return false;
3983 }
3984 if (Shift + LogMult <= 3) {
3985 Index = Var;
3986 Shift += LogMult;
3987 Reason = IndexInst;
3988 return true;
3989 }
3990 }
3991 }
3992 }
3993 }
3994 return false;
3995}
3996
John Porto5aeed952015-07-21 13:39:09 -07003997inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
3998 int32_t &Offset, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07003999 // Base is Base=Var+Const || Base is Base=Const+Var ==>
4000 // set Base=Var, Offset+=Const
4001 // Base is Base=Var-Const ==>
4002 // set Base=Var, Offset-=Const
4003 if (Base == nullptr)
4004 return false;
4005 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
4006 if (BaseInst == nullptr)
4007 return false;
4008 assert(!VMetadata->isMultiDef(Base));
4009 if (const InstArithmetic *ArithInst =
4010 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
4011 if (ArithInst->getOp() != InstArithmetic::Add &&
4012 ArithInst->getOp() != InstArithmetic::Sub)
4013 return false;
4014 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
4015 Variable *Var = nullptr;
4016 ConstantInteger32 *Const = nullptr;
4017 if (Variable *VariableOperand =
4018 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
4019 Var = VariableOperand;
4020 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
4021 } else if (IsAdd) {
4022 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
4023 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
4024 }
4025 if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
4026 return false;
4027 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
4028 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
4029 return false;
4030 Base = Var;
4031 Offset += MoreOffset;
4032 Reason = BaseInst;
4033 return true;
4034 }
4035 return false;
4036}
4037
John Porto5aeed952015-07-21 13:39:09 -07004038inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
4039 Variable *&Index, uint16_t &Shift,
4040 int32_t &Offset) {
John Porto7e93c622015-06-23 10:58:57 -07004041 Func->resetCurrentNode();
4042 if (Func->isVerbose(IceV_AddrOpt)) {
4043 OstreamLocker L(Func->getContext());
4044 Ostream &Str = Func->getContext()->getStrDump();
4045 Str << "\nStarting computeAddressOpt for instruction:\n ";
4046 Instr->dumpDecorated(Func);
4047 }
4048 (void)Offset; // TODO: pattern-match for non-zero offsets.
4049 if (Base == nullptr)
4050 return;
4051 // If the Base has more than one use or is live across multiple
4052 // blocks, then don't go further. Alternatively (?), never consider
4053 // a transformation that would change a variable that is currently
4054 // *not* live across basic block boundaries into one that *is*.
4055 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
4056 return;
4057
4058 const VariablesMetadata *VMetadata = Func->getVMetadata();
4059 bool Continue = true;
4060 while (Continue) {
4061 const Inst *Reason = nullptr;
4062 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
4063 matchTransitiveAssign(VMetadata, Index, Reason) ||
4064 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
4065 matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
4066 matchOffsetBase(VMetadata, Base, Offset, Reason)) {
4067 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
4068 } else {
4069 Continue = false;
4070 }
4071
4072 // Index is Index=Var<<Const && Const+Shift<=3 ==>
4073 // Index=Var, Shift+=Const
4074
4075 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
4076 // Index=Var, Shift+=log2(Const)
4077
4078 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
4079 // swap(Index,Base)
4080 // Similar for Base=Const*Var and Base=Var<<Const
4081
4082 // Index is Index=Var+Const ==>
4083 // set Index=Var, Offset+=(Const<<Shift)
4084
4085 // Index is Index=Const+Var ==>
4086 // set Index=Var, Offset+=(Const<<Shift)
4087
4088 // Index is Index=Var-Const ==>
4089 // set Index=Var, Offset-=(Const<<Shift)
4090
4091 // TODO: consider overflow issues with respect to Offset.
4092 // TODO: handle symbolic constants.
4093 }
4094}
4095
4096template <class Machine>
4097void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07004098 // A Load instruction can be treated the same as an Assign instruction, after
4099 // the source operand is transformed into an Traits::X86OperandMem operand.
4100 // Note that the address mode optimization already creates an
4101 // Traits::X86OperandMem operand, so it doesn't need another level of
4102 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07004103 Variable *DestLoad = Load->getDest();
4104 Type Ty = DestLoad->getType();
4105 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4106 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4107 lowerAssign(Assign);
4108}
4109
4110template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4111 Inst *Inst = Context.getCur();
4112 Variable *Dest = Inst->getDest();
4113 Operand *Addr = Inst->getSrc(0);
4114 Variable *Index = nullptr;
4115 uint16_t Shift = 0;
4116 int32_t Offset = 0; // TODO: make Constant
John Porto921856d2015-07-07 11:56:26 -07004117 // Vanilla ICE load instructions should not use the segment registers, and
4118 // computeAddressOpt only works at the level of Variables and Constants, not
4119 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004120 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004121 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4122 Traits::X86OperandMem::DefaultSegment;
John Porto7e93c622015-06-23 10:58:57 -07004123 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4124 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4125 if (Base && Addr != Base) {
4126 Inst->setDeleted();
4127 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004128 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
4129 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004130 Context.insert(InstLoad::create(Func, Dest, Addr));
4131 }
4132}
4133
4134template <class Machine>
4135void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4136 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4137 if (RNG.getTrueWithProbability(Probability)) {
4138 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4139 }
4140}
4141
4142template <class Machine>
4143void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4144 Func->setError("Phi found in regular instruction list");
4145}
4146
4147template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07004148void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4149 Variable *Dest = Inst->getDest();
4150 Type DestTy = Dest->getType();
4151 Operand *SrcT = Inst->getTrueOperand();
4152 Operand *SrcF = Inst->getFalseOperand();
4153 Operand *Condition = Inst->getCondition();
4154
4155 if (isVectorType(DestTy)) {
4156 Type SrcTy = SrcT->getType();
4157 Variable *T = makeReg(SrcTy);
4158 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4159 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004160 if (InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07004161 // TODO(wala): If the condition operand is a constant, use blendps
4162 // or pblendw.
4163 //
4164 // Use blendvps or pblendvb to implement select.
4165 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4166 SrcTy == IceType_v4f32) {
4167 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004168 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004169 _movp(xmm0, ConditionRM);
4170 _psll(xmm0, Ctx->getConstantInt8(31));
4171 _movp(T, SrcFRM);
4172 _blendvps(T, SrcTRM, xmm0);
4173 _movp(Dest, T);
4174 } else {
4175 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4176 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4177 : IceType_v16i8;
John Porto5d0acff2015-06-30 15:29:21 -07004178 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004179 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4180 _movp(T, SrcFRM);
4181 _pblendvb(T, SrcTRM, xmm0);
4182 _movp(Dest, T);
4183 }
4184 return;
4185 }
John Porto5d0acff2015-06-30 15:29:21 -07004186 // Lower select without Traits::SSE4.1:
John Porto7e93c622015-06-23 10:58:57 -07004187 // a=d?b:c ==>
4188 // if elementtype(d) != i1:
4189 // d=sext(d);
4190 // a=(b&d)|(c&~d);
4191 Variable *T2 = makeReg(SrcTy);
4192 // Sign extend the condition operand if applicable.
4193 if (SrcTy == IceType_v4f32) {
4194 // The sext operation takes only integer arguments.
John Porto5aeed952015-07-21 13:39:09 -07004195 Variable *T3 = Func->makeVariable(IceType_v4i32);
John Porto7e93c622015-06-23 10:58:57 -07004196 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4197 _movp(T, T3);
4198 } else if (typeElementType(SrcTy) != IceType_i1) {
4199 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4200 } else {
4201 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4202 _movp(T, ConditionRM);
4203 }
4204 _movp(T2, T);
4205 _pand(T, SrcTRM);
4206 _pandn(T2, SrcFRM);
4207 _por(T, T2);
4208 _movp(Dest, T);
4209
4210 return;
4211 }
4212
John Porto5d0acff2015-06-30 15:29:21 -07004213 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
John Porto7e93c622015-06-23 10:58:57 -07004214 Operand *CmpOpnd0 = nullptr;
4215 Operand *CmpOpnd1 = nullptr;
4216 // Handle folding opportunities.
4217 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4218 assert(Producer->isDeleted());
4219 switch (BoolFolding::getProducerKind(Producer)) {
4220 default:
4221 break;
4222 case BoolFolding::PK_Icmp32: {
4223 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4224 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
4225 CmpOpnd1 = legalize(Producer->getSrc(1));
4226 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4227 } break;
4228 }
4229 }
4230 if (CmpOpnd0 == nullptr) {
4231 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4232 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4233 }
4234 assert(CmpOpnd0);
4235 assert(CmpOpnd1);
4236
4237 _cmp(CmpOpnd0, CmpOpnd1);
4238 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4239 // The cmov instruction doesn't allow 8-bit or FP operands, so
4240 // we need explicit control flow.
4241 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto921856d2015-07-07 11:56:26 -07004242 typename Traits::Insts::Label *Label =
4243 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004244 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4245 _mov(Dest, SrcT);
4246 _br(Cond, Label);
4247 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4248 _mov_nonkillable(Dest, SrcF);
4249 Context.insert(Label);
4250 return;
4251 }
4252 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4253 // But if SrcT is immediate, we might be able to do better, as
4254 // the cmov instruction doesn't allow an immediate operand:
4255 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4256 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4257 std::swap(SrcT, SrcF);
John Porto921856d2015-07-07 11:56:26 -07004258 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07004259 }
John Porto1d235422015-08-12 12:37:53 -07004260 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004261 SrcT = legalizeUndef(SrcT);
4262 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07004263 // Set the low portion.
4264 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4265 Variable *TLo = nullptr;
4266 Operand *SrcFLo = legalize(loOperand(SrcF));
4267 _mov(TLo, SrcFLo);
4268 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4269 _cmov(TLo, SrcTLo, Cond);
4270 _mov(DestLo, TLo);
4271 // Set the high portion.
4272 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4273 Variable *THi = nullptr;
4274 Operand *SrcFHi = legalize(hiOperand(SrcF));
4275 _mov(THi, SrcFHi);
4276 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4277 _cmov(THi, SrcTHi, Cond);
4278 _mov(DestHi, THi);
4279 return;
4280 }
4281
John Porto1d235422015-08-12 12:37:53 -07004282 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4283 (Traits::Is64Bit && DestTy == IceType_i64));
John Porto7e93c622015-06-23 10:58:57 -07004284 Variable *T = nullptr;
4285 SrcF = legalize(SrcF);
4286 _mov(T, SrcF);
4287 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4288 _cmov(T, SrcT, Cond);
4289 _mov(Dest, T);
4290}
4291
4292template <class Machine>
4293void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4294 Operand *Value = Inst->getData();
4295 Operand *Addr = Inst->getAddr();
John Porto921856d2015-07-07 11:56:26 -07004296 typename Traits::X86OperandMem *NewAddr =
4297 formMemoryOperand(Addr, Value->getType());
John Porto7e93c622015-06-23 10:58:57 -07004298 Type Ty = NewAddr->getType();
4299
John Porto1d235422015-08-12 12:37:53 -07004300 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004301 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07004302 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4303 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004304 _store(ValueHi,
4305 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4306 _store(ValueLo,
4307 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07004308 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004309 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07004310 } else {
4311 Value = legalize(Value, Legal_Reg | Legal_Imm);
4312 _store(Value, NewAddr);
4313 }
4314}
4315
4316template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4317 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4318 Operand *Data = Inst->getData();
4319 Operand *Addr = Inst->getAddr();
4320 Variable *Index = nullptr;
4321 uint16_t Shift = 0;
4322 int32_t Offset = 0; // TODO: make Constant
4323 Variable *Base = llvm::dyn_cast<Variable>(Addr);
John Porto921856d2015-07-07 11:56:26 -07004324 // Vanilla ICE store instructions should not use the segment registers, and
4325 // computeAddressOpt only works at the level of Variables and Constants, not
4326 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004327 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004328 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4329 Traits::X86OperandMem::DefaultSegment;
John Porto7e93c622015-06-23 10:58:57 -07004330 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4331 if (Base && Addr != Base) {
4332 Inst->setDeleted();
4333 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004334 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4335 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004336 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4337 if (Inst->getDest())
4338 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4339 Context.insert(NewStore);
4340 }
4341}
4342
4343template <class Machine>
Andrew Scull87f80c12015-07-20 10:19:16 -07004344Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4345 uint64_t Min, uint64_t Max) {
4346 // TODO(ascull): 64-bit should not reach here but only because it is not
4347 // implemented yet. This should be able to handle the 64-bit case.
John Porto1d235422015-08-12 12:37:53 -07004348 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
Andrew Scull87f80c12015-07-20 10:19:16 -07004349 // Subtracting 0 is a nop so don't do it
4350 if (Min != 0) {
4351 // Avoid clobbering the comparison by copying it
4352 Variable *T = nullptr;
4353 _mov(T, Comparison);
4354 _sub(T, Ctx->getConstantInt32(Min));
4355 Comparison = T;
4356 }
4357
4358 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
4359
4360 return Comparison;
4361}
4362
4363template <class Machine>
4364void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
4365 Operand *Comparison, bool DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004366 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004367 switch (Case.getKind()) {
4368 case CaseCluster::JumpTable: {
4369 typename Traits::Insts::Label *SkipJumpTable;
4370
4371 Operand *RangeIndex =
4372 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004373 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004374 // Skip over jump table logic if comparison not in range and no default
4375 SkipJumpTable = Traits::Insts::Label::create(Func, this);
4376 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07004377 } else {
4378 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004379 }
Andrew Scull87f80c12015-07-20 10:19:16 -07004380
4381 InstJumpTable *JumpTable = Case.getJumpTable();
4382 Context.insert(JumpTable);
4383
4384 // Make sure the index is a register of the same width as the base
4385 Variable *Index;
4386 if (RangeIndex->getType() != getPointerType()) {
4387 Index = makeReg(getPointerType());
4388 _movzx(Index, RangeIndex);
4389 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004390 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07004391 }
4392
4393 constexpr RelocOffsetT RelocOffset = 0;
4394 constexpr bool SuppressMangling = true;
Andrew Scull86df4e92015-07-30 13:54:44 -07004395 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
4396 Constant *Base = Ctx->getConstantSym(
4397 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()),
4398 SuppressMangling);
Andrew Scull87f80c12015-07-20 10:19:16 -07004399 Constant *Offset = nullptr;
4400 uint16_t Shift = typeWidthInBytesLog2(getPointerType());
4401 // TODO(ascull): remove need for legalize by allowing null base in memop
Andrew Scull86df4e92015-07-30 13:54:44 -07004402 auto *TargetInMemory = Traits::X86OperandMem::create(
Andrew Scull97f460d2015-07-21 10:07:42 -07004403 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
Andrew Scull87f80c12015-07-20 10:19:16 -07004404 Variable *Target = nullptr;
Andrew Scull86df4e92015-07-30 13:54:44 -07004405 _mov(Target, TargetInMemory);
4406 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07004407
Andrew Scull86df4e92015-07-30 13:54:44 -07004408 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07004409 Context.insert(SkipJumpTable);
4410 return;
4411 }
4412 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07004413 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004414 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07004415 if (!DoneCmp) {
4416 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004417 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004418 }
4419 _br(Traits::Cond::Br_e, Case.getTarget());
4420 } else if (DoneCmp && Case.isPairRange()) {
4421 // Range of two items with first item aleady compared against
4422 _br(Traits::Cond::Br_e, Case.getTarget());
4423 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
4424 _cmp(Comparison, Value);
4425 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004426 } else {
4427 // Range
4428 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004429 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004430 }
Andrew Scull86df4e92015-07-30 13:54:44 -07004431 if (DefaultTarget != nullptr)
4432 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004433 return;
4434 }
4435 }
4436}
4437
4438template <class Machine>
4439void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004440 // Group cases together and navigate through them with a binary search
4441 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4442 Operand *Src0 = Inst->getComparison();
Andrew Scull86df4e92015-07-30 13:54:44 -07004443 CfgNode *DefaultTarget = Inst->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07004444
4445 assert(CaseClusters.size() != 0); // Should always be at least one
4446
John Porto1d235422015-08-12 12:37:53 -07004447 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004448 Src0 = legalize(Src0); // get Base/Index into physical registers
4449 Operand *Src0Lo = loOperand(Src0);
4450 Operand *Src0Hi = hiOperand(Src0);
4451 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4452 // TODO(ascull): handle 64-bit case properly (currently naive version)
4453 // This might be handled by a higher level lowering of switches.
4454 SizeT NumCases = Inst->getNumCases();
4455 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004456 Src0Lo = legalizeToReg(Src0Lo);
4457 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07004458 } else {
4459 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4460 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4461 }
4462 for (SizeT I = 0; I < NumCases; ++I) {
4463 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4464 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4465 typename Traits::Insts::Label *Label =
4466 Traits::Insts::Label::create(Func, this);
4467 _cmp(Src0Lo, ValueLo);
4468 _br(Traits::Cond::Br_ne, Label);
4469 _cmp(Src0Hi, ValueHi);
4470 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4471 Context.insert(Label);
4472 }
4473 _br(Inst->getLabelDefault());
4474 return;
4475 } else {
4476 // All the values are 32-bit so just check the operand is too and then
4477 // fall through to the 32-bit implementation. This is a common case.
4478 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4479 Constant *Zero = Ctx->getConstantInt32(0);
4480 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07004481 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004482 Src0 = Src0Lo;
4483 }
John Porto7e93c622015-06-23 10:58:57 -07004484 }
4485
Andrew Scull87f80c12015-07-20 10:19:16 -07004486 // 32-bit lowering
4487
4488 if (CaseClusters.size() == 1) {
4489 // Jump straight to default if needed. Currently a common case as jump
4490 // tables occur on their own.
4491 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07004492 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004493 return;
4494 }
4495
4496 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07004497 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07004498
4499 // A span is over the clusters
4500 struct SearchSpan {
4501 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
4502 : Begin(Begin), Size(Size), Label(Label) {}
4503
4504 SizeT Begin;
4505 SizeT Size;
4506 typename Traits::Insts::Label *Label;
4507 };
Andrew Scull8447bba2015-07-23 11:41:18 -07004508 // The stack will only grow to the height of the tree so 12 should be plenty
4509 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07004510 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
4511 bool DoneCmp = false;
4512
4513 while (!SearchSpanStack.empty()) {
4514 SearchSpan Span = SearchSpanStack.top();
4515 SearchSpanStack.pop();
4516
4517 if (Span.Label != nullptr)
4518 Context.insert(Span.Label);
4519
4520 switch (Span.Size) {
4521 case 0:
4522 llvm::report_fatal_error("Invalid SearchSpan size");
4523 break;
4524
4525 case 1:
4526 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004527 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004528 DoneCmp = false;
4529 break;
4530
Andrew Scull86df4e92015-07-30 13:54:44 -07004531 case 2: {
4532 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
4533 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
4534
4535 // Placing a range last may allow register clobbering during the range
4536 // test. That means there is no need to clone the register. If it is a
4537 // unit range the comparison may have already been done in the binary
4538 // search (DoneCmp) and so it should be placed first. If this is a range
4539 // of two items and the comparison with the low value has already been
4540 // done, comparing with the other element is cheaper than a range test.
4541 // If the low end of the range is zero then there is no subtraction and
4542 // nothing to be gained.
4543 if (!CaseA->isUnitRange() &&
4544 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
4545 std::swap(CaseA, CaseB);
4546 DoneCmp = false;
4547 }
4548
4549 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07004550 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07004551 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
4552 SearchSpanStack.empty() ? nullptr : DefaultTarget);
4553 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07004554
4555 default:
4556 // Pick the middle item and branch b or ae
4557 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
4558 const CaseCluster &Pivot = CaseClusters[PivotIndex];
4559 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004560 typename Traits::Insts::Label *Label =
4561 Traits::Insts::Label::create(Func, this);
4562 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004563 // TODO(ascull): does it alway have to be far?
4564 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07004565 // Lower the left and (pivot+right) sides, falling through to the right
4566 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
4567 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
4568 DoneCmp = true;
4569 break;
4570 }
4571 }
4572
Andrew Scull86df4e92015-07-30 13:54:44 -07004573 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004574}
4575
4576template <class Machine>
4577void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4578 Variable *Dest, Operand *Src0,
4579 Operand *Src1) {
4580 assert(isVectorType(Dest->getType()));
4581 Type Ty = Dest->getType();
4582 Type ElementTy = typeElementType(Ty);
4583 SizeT NumElements = typeNumElements(Ty);
4584
4585 Operand *T = Ctx->getConstantUndef(Ty);
4586 for (SizeT I = 0; I < NumElements; ++I) {
4587 Constant *Index = Ctx->getConstantInt32(I);
4588
4589 // Extract the next two inputs.
John Porto5aeed952015-07-21 13:39:09 -07004590 Variable *Op0 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004591 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
John Porto5aeed952015-07-21 13:39:09 -07004592 Variable *Op1 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004593 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4594
4595 // Perform the arithmetic as a scalar operation.
John Porto5aeed952015-07-21 13:39:09 -07004596 Variable *Res = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07004597 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4598
4599 // Insert the result into position.
John Porto5aeed952015-07-21 13:39:09 -07004600 Variable *DestT = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07004601 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4602 T = DestT;
4603 }
4604
4605 lowerAssign(InstAssign::create(Func, Dest, T));
4606}
4607
Andrew Scull9612d322015-07-06 14:53:25 -07004608/// The following pattern occurs often in lowered C and C++ code:
4609///
4610/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4611/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4612///
4613/// We can eliminate the sext operation by copying the result of pcmpeqd,
4614/// pcmpgtd, or cmpps (which produce sign extended results) to the result
4615/// of the sext operation.
John Porto7e93c622015-06-23 10:58:57 -07004616template <class Machine>
4617void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4618 Variable *SignExtendedResult) {
4619 if (InstCast *NextCast =
4620 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4621 if (NextCast->getCastKind() == InstCast::Sext &&
4622 NextCast->getSrc(0) == SignExtendedResult) {
4623 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07004624 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07004625 // Skip over the instruction.
4626 Context.advanceNext();
4627 }
4628 }
4629}
4630
4631template <class Machine>
4632void TargetX86Base<Machine>::lowerUnreachable(
4633 const InstUnreachable * /*Inst*/) {
4634 _ud2();
4635}
4636
4637template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004638void TargetX86Base<Machine>::lowerRMW(
4639 const typename Traits::Insts::FakeRMW *RMW) {
John Porto7e93c622015-06-23 10:58:57 -07004640 // If the beacon variable's live range does not end in this
4641 // instruction, then it must end in the modified Store instruction
4642 // that follows. This means that the original Store instruction is
4643 // still there, either because the value being stored is used beyond
4644 // the Store instruction, or because dead code elimination did not
4645 // happen. In either case, we cancel RMW lowering (and the caller
4646 // deletes the RMW instruction).
4647 if (!RMW->isLastUse(RMW->getBeacon()))
4648 return;
4649 Operand *Src = RMW->getData();
4650 Type Ty = Src->getType();
John Porto921856d2015-07-07 11:56:26 -07004651 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
John Porto1d235422015-08-12 12:37:53 -07004652 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004653 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07004654 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4655 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004656 typename Traits::X86OperandMem *AddrLo =
4657 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4658 typename Traits::X86OperandMem *AddrHi =
4659 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07004660 switch (RMW->getOp()) {
4661 default:
4662 // TODO(stichnot): Implement other arithmetic operators.
4663 break;
4664 case InstArithmetic::Add:
4665 _add_rmw(AddrLo, SrcLo);
4666 _adc_rmw(AddrHi, SrcHi);
4667 return;
4668 case InstArithmetic::Sub:
4669 _sub_rmw(AddrLo, SrcLo);
4670 _sbb_rmw(AddrHi, SrcHi);
4671 return;
4672 case InstArithmetic::And:
4673 _and_rmw(AddrLo, SrcLo);
4674 _and_rmw(AddrHi, SrcHi);
4675 return;
4676 case InstArithmetic::Or:
4677 _or_rmw(AddrLo, SrcLo);
4678 _or_rmw(AddrHi, SrcHi);
4679 return;
4680 case InstArithmetic::Xor:
4681 _xor_rmw(AddrLo, SrcLo);
4682 _xor_rmw(AddrHi, SrcHi);
4683 return;
4684 }
4685 } else {
John Porto1d235422015-08-12 12:37:53 -07004686 // x86-32: i8, i16, i32
4687 // x86-64: i8, i16, i32, i64
John Porto7e93c622015-06-23 10:58:57 -07004688 switch (RMW->getOp()) {
4689 default:
4690 // TODO(stichnot): Implement other arithmetic operators.
4691 break;
4692 case InstArithmetic::Add:
4693 Src = legalize(Src, Legal_Reg | Legal_Imm);
4694 _add_rmw(Addr, Src);
4695 return;
4696 case InstArithmetic::Sub:
4697 Src = legalize(Src, Legal_Reg | Legal_Imm);
4698 _sub_rmw(Addr, Src);
4699 return;
4700 case InstArithmetic::And:
4701 Src = legalize(Src, Legal_Reg | Legal_Imm);
4702 _and_rmw(Addr, Src);
4703 return;
4704 case InstArithmetic::Or:
4705 Src = legalize(Src, Legal_Reg | Legal_Imm);
4706 _or_rmw(Addr, Src);
4707 return;
4708 case InstArithmetic::Xor:
4709 Src = legalize(Src, Legal_Reg | Legal_Imm);
4710 _xor_rmw(Addr, Src);
4711 return;
4712 }
4713 }
4714 llvm::report_fatal_error("Couldn't lower RMW instruction");
4715}
4716
4717template <class Machine>
4718void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
John Porto921856d2015-07-07 11:56:26 -07004719 if (const auto *RMW =
4720 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07004721 lowerRMW(RMW);
4722 } else {
4723 TargetLowering::lowerOther(Instr);
4724 }
4725}
4726
Andrew Scull9612d322015-07-06 14:53:25 -07004727/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4728/// preserve integrity of liveness analysis. Undef values are also
4729/// turned into zeroes, since loOperand() and hiOperand() don't expect
4730/// Undef input.
John Porto7e93c622015-06-23 10:58:57 -07004731template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
John Porto1d235422015-08-12 12:37:53 -07004732 if (Traits::Is64Bit) {
4733 // On x86-64 we don't need to prelower phis -- the architecture can handle
4734 // 64-bit integer natively.
4735 return;
4736 }
4737
4738 // Pause constant blinding or pooling, blinding or pooling will be done
4739 // later during phi lowering assignments
John Porto7e93c622015-06-23 10:58:57 -07004740 BoolFlagSaver B(RandomizationPoolingPaused, true);
Jan Voung53483692015-07-16 10:47:46 -07004741 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4742 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07004743}
4744
John Porto7e93c622015-06-23 10:58:57 -07004745// There is no support for loading or emitting vector constants, so the
4746// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4747// etc. are initialized with register operations.
4748//
4749// TODO(wala): Add limited support for vector constants so that
4750// complex initialization in registers is unnecessary.
4751
4752template <class Machine>
4753Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
4754 Variable *Reg = makeReg(Ty, RegNum);
4755 // Insert a FakeDef, since otherwise the live range of Reg might
4756 // be overestimated.
4757 Context.insert(InstFakeDef::create(Func, Reg));
4758 _pxor(Reg, Reg);
4759 return Reg;
4760}
4761
4762template <class Machine>
4763Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
4764 int32_t RegNum) {
4765 Variable *MinusOnes = makeReg(Ty, RegNum);
4766 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
4767 Context.insert(InstFakeDef::create(Func, MinusOnes));
4768 _pcmpeq(MinusOnes, MinusOnes);
4769 return MinusOnes;
4770}
4771
4772template <class Machine>
4773Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
4774 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
4775 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
4776 _psub(Dest, MinusOne);
4777 return Dest;
4778}
4779
4780template <class Machine>
4781Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
4782 int32_t RegNum) {
4783 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4784 Ty == IceType_v16i8);
4785 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4786 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4787 SizeT Shift =
4788 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
4789 _psll(Reg, Ctx->getConstantInt8(Shift));
4790 return Reg;
4791 } else {
4792 // SSE has no left shift operation for vectors of 8 bit integers.
4793 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4794 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4795 Variable *Reg = makeReg(Ty, RegNum);
4796 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4797 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4798 return Reg;
4799 }
4800}
4801
Andrew Scull9612d322015-07-06 14:53:25 -07004802/// Construct a mask in a register that can be and'ed with a
4803/// floating-point value to mask off its sign bit. The value will be
4804/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
4805/// for f64. Construct it as vector of ones logically right shifted
4806/// one bit. TODO(stichnot): Fix the wala TODO above, to represent
4807/// vector constants in memory.
John Porto7e93c622015-06-23 10:58:57 -07004808template <class Machine>
4809Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4810 int32_t RegNum) {
4811 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4812 _psrl(Reg, Ctx->getConstantInt8(1));
4813 return Reg;
4814}
4815
4816template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07004817typename TargetX86Base<Machine>::Traits::X86OperandMem *
John Porto7e93c622015-06-23 10:58:57 -07004818TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4819 uint32_t Offset) {
4820 // Ensure that Loc is a stack slot.
4821 assert(Slot->getWeight().isZero());
4822 assert(Slot->getRegNum() == Variable::NoRegister);
4823 // Compute the location of Loc in memory.
4824 // TODO(wala,stichnot): lea should not be required. The address of
4825 // the stack slot is known at compile time (although not until after
4826 // addProlog()).
4827 const Type PointerType = IceType_i32;
4828 Variable *Loc = makeReg(PointerType);
4829 _lea(Loc, Slot);
4830 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07004831 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07004832}
4833
Andrew Scull9612d322015-07-06 14:53:25 -07004834/// Helper for legalize() to emit the right code to lower an operand to a
4835/// register of the appropriate type.
John Porto7e93c622015-06-23 10:58:57 -07004836template <class Machine>
4837Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
4838 Type Ty = Src->getType();
4839 Variable *Reg = makeReg(Ty, RegNum);
4840 if (isVectorType(Ty)) {
4841 _movp(Reg, Src);
4842 } else {
4843 _mov(Reg, Src);
4844 }
4845 return Reg;
4846}
4847
4848template <class Machine>
4849Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
4850 int32_t RegNum) {
4851 Type Ty = From->getType();
4852 // Assert that a physical register is allowed. To date, all calls
4853 // to legalize() allow a physical register. If a physical register
4854 // needs to be explicitly disallowed, then new code will need to be
4855 // written to force a spill.
4856 assert(Allowed & Legal_Reg);
4857 // If we're asking for a specific physical register, make sure we're
4858 // not allowing any other operand kinds. (This could be future
4859 // work, e.g. allow the shl shift amount to be either an immediate
4860 // or in ecx.)
4861 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4862
John Porto921856d2015-07-07 11:56:26 -07004863 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
John Porto7e93c622015-06-23 10:58:57 -07004864 // Before doing anything with a Mem operand, we need to ensure
4865 // that the Base and Index components are in physical registers.
4866 Variable *Base = Mem->getBase();
4867 Variable *Index = Mem->getIndex();
4868 Variable *RegBase = nullptr;
4869 Variable *RegIndex = nullptr;
4870 if (Base) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004871 RegBase = legalizeToReg(Base);
John Porto7e93c622015-06-23 10:58:57 -07004872 }
4873 if (Index) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004874 RegIndex = legalizeToReg(Index);
John Porto7e93c622015-06-23 10:58:57 -07004875 }
4876 if (Base != RegBase || Index != RegIndex) {
John Porto921856d2015-07-07 11:56:26 -07004877 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
4878 RegIndex, Mem->getShift(),
4879 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07004880 }
4881
4882 // For all Memory Operands, we do randomization/pooling here
4883 From = randomizeOrPoolImmediate(Mem);
4884
4885 if (!(Allowed & Legal_Mem)) {
4886 From = copyToReg(From, RegNum);
4887 }
4888 return From;
4889 }
4890 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
4891 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004892 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07004893 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07004894 return From;
4895 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07004896 }
4897 // There should be no constants of vector type (other than undef).
4898 assert(!isVectorType(Ty));
4899
John Porto1d235422015-08-12 12:37:53 -07004900 // If the operand is a 64 bit constant integer we need to legalize it to a
4901 // register in x86-64.
4902 if (Traits::Is64Bit) {
4903 if (llvm::isa<ConstantInteger64>(Const)) {
4904 Variable *V = copyToReg(Const, RegNum);
4905 V->setWeightInfinite();
4906 return V;
4907 }
4908 }
4909
John Porto7e93c622015-06-23 10:58:57 -07004910 // If the operand is an 32 bit constant integer, we should check
4911 // whether we need to randomize it or pool it.
4912 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
4913 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
4914 if (NewConst != Const) {
4915 return NewConst;
4916 }
4917 }
4918
4919 // Convert a scalar floating point constant into an explicit
4920 // memory operand.
4921 if (isScalarFloatingType(Ty)) {
4922 Variable *Base = nullptr;
4923 std::string Buffer;
4924 llvm::raw_string_ostream StrBuf(Buffer);
4925 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
4926 llvm::cast<Constant>(From)->setShouldBePooled(true);
4927 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
John Porto921856d2015-07-07 11:56:26 -07004928 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07004929 }
4930 bool NeedsReg = false;
4931 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
4932 // Immediate specifically not allowed
4933 NeedsReg = true;
4934 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
4935 // On x86, FP constants are lowered to mem operands.
4936 NeedsReg = true;
4937 if (NeedsReg) {
4938 From = copyToReg(From, RegNum);
4939 }
4940 return From;
4941 }
4942 if (auto Var = llvm::dyn_cast<Variable>(From)) {
4943 // Check if the variable is guaranteed a physical register. This
4944 // can happen either when the variable is pre-colored or when it is
4945 // assigned infinite weight.
4946 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
4947 // We need a new physical register for the operand if:
4948 // Mem is not allowed and Var isn't guaranteed a physical
4949 // register, or
4950 // RegNum is required and Var->getRegNum() doesn't match.
4951 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
4952 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
4953 From = copyToReg(From, RegNum);
4954 }
4955 return From;
4956 }
4957 llvm_unreachable("Unhandled operand kind in legalize()");
4958 return From;
4959}
4960
Andrew Scull9612d322015-07-06 14:53:25 -07004961/// Provide a trivial wrapper to legalize() for this common usage.
John Porto7e93c622015-06-23 10:58:57 -07004962template <class Machine>
Andrew Scull97f460d2015-07-21 10:07:42 -07004963Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07004964 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
4965}
4966
Jan Voungfbdd2442015-07-15 12:36:20 -07004967/// Legalize undef values to concrete values.
4968template <class Machine>
4969Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {
4970 Type Ty = From->getType();
4971 if (llvm::isa<ConstantUndef>(From)) {
4972 // Lower undefs to zero. Another option is to lower undefs to an
4973 // uninitialized register; however, using an uninitialized register
4974 // results in less predictable code.
4975 //
4976 // If in the future the implementation is changed to lower undef
4977 // values to uninitialized registers, a FakeDef will be needed:
4978 // Context.insert(InstFakeDef::create(Func, Reg));
4979 // This is in order to ensure that the live range of Reg is not
4980 // overestimated. If the constant being lowered is a 64 bit value,
4981 // then the result should be split and the lo and hi components will
4982 // need to go in uninitialized registers.
4983 if (isVectorType(Ty))
4984 return makeVectorOfZeros(Ty, RegNum);
4985 return Ctx->getConstantZero(Ty);
4986 }
4987 return From;
4988}
4989
Andrew Scull9612d322015-07-06 14:53:25 -07004990/// For the cmp instruction, if Src1 is an immediate, or known to be a
4991/// physical register, we can allow Src0 to be a memory operand.
4992/// Otherwise, Src0 must be copied into a physical register.
4993/// (Actually, either Src0 or Src1 can be chosen for the physical
4994/// register, but unfortunately we have to commit to one or the other
4995/// before register allocation.)
John Porto7e93c622015-06-23 10:58:57 -07004996template <class Machine>
4997Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
4998 Operand *Src1) {
4999 bool IsSrc1ImmOrReg = false;
5000 if (llvm::isa<Constant>(Src1)) {
5001 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07005002 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07005003 if (Var->hasReg())
5004 IsSrc1ImmOrReg = true;
5005 }
5006 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5007}
5008
5009template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005010typename TargetX86Base<Machine>::Traits::X86OperandMem *
5011TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
5012 bool DoLegalize) {
5013 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
5014 // It may be the case that address mode optimization already creates an
5015 // Traits::X86OperandMem, so in that case it wouldn't need another level of
5016 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07005017 if (!Mem) {
5018 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5019 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5020 assert(Base || Offset);
5021 if (Offset) {
5022 // During memory operand building, we do not blind or pool
5023 // the constant offset, we will work on the whole memory
5024 // operand later as one entity later, this save one instruction.
5025 // By turning blinding and pooling off, we guarantee
Jan Voungfbdd2442015-07-15 12:36:20 -07005026 // legalize(Offset) will return a Constant*.
John Porto7e93c622015-06-23 10:58:57 -07005027 {
5028 BoolFlagSaver B(RandomizationPoolingPaused, true);
5029
5030 Offset = llvm::cast<Constant>(legalize(Offset));
5031 }
5032
5033 assert(llvm::isa<ConstantInteger32>(Offset) ||
5034 llvm::isa<ConstantRelocatable>(Offset));
5035 }
John Porto921856d2015-07-07 11:56:26 -07005036 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07005037 }
5038 // Do legalization, which contains randomization/pooling
5039 // or do randomization/pooling.
John Porto921856d2015-07-07 11:56:26 -07005040 return llvm::cast<typename Traits::X86OperandMem>(
John Porto7e93c622015-06-23 10:58:57 -07005041 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5042}
5043
5044template <class Machine>
5045Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5046 // There aren't any 64-bit integer registers for x86-32.
John Porto1d235422015-08-12 12:37:53 -07005047 assert(Traits::Is64Bit || Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07005048 Variable *Reg = Func->makeVariable(Type);
John Porto7e93c622015-06-23 10:58:57 -07005049 if (RegNum == Variable::NoRegister)
5050 Reg->setWeightInfinite();
5051 else
5052 Reg->setRegNum(RegNum);
5053 return Reg;
5054}
5055
5056template <class Machine> void TargetX86Base<Machine>::postLower() {
5057 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5058 return;
5059 inferTwoAddress();
5060}
5061
5062template <class Machine>
5063void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5064 llvm::SmallVectorImpl<int32_t> &Permutation,
5065 const llvm::SmallBitVector &ExcludeRegisters) const {
John Porto921856d2015-07-07 11:56:26 -07005066 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
5067 ExcludeRegisters);
John Porto7e93c622015-06-23 10:58:57 -07005068}
5069
5070template <class Machine>
5071void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005072 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005073 return;
5074 Ostream &Str = Ctx->getStrEmit();
5075 Str << getConstantPrefix() << C->getValue();
5076}
5077
5078template <class Machine>
John Porto1d235422015-08-12 12:37:53 -07005079void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
5080 if (!Traits::Is64Bit) {
5081 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5082 } else {
5083 if (!BuildDefs::dump())
5084 return;
5085 Ostream &Str = Ctx->getStrEmit();
5086 Str << getConstantPrefix() << C->getValue();
5087 }
John Porto7e93c622015-06-23 10:58:57 -07005088}
5089
5090template <class Machine>
5091void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005092 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005093 return;
5094 Ostream &Str = Ctx->getStrEmit();
5095 C->emitPoolLabel(Str);
5096}
5097
5098template <class Machine>
5099void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005100 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005101 return;
5102 Ostream &Str = Ctx->getStrEmit();
5103 C->emitPoolLabel(Str);
5104}
5105
5106template <class Machine>
5107void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5108 llvm::report_fatal_error("undef value encountered by emitter.");
5109}
5110
Andrew Scull9612d322015-07-06 14:53:25 -07005111/// Randomize or pool an Immediate.
John Porto7e93c622015-06-23 10:58:57 -07005112template <class Machine>
5113Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5114 int32_t RegNum) {
5115 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5116 llvm::isa<ConstantRelocatable>(Immediate));
5117 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5118 RandomizationPoolingPaused == true) {
5119 // Immediates randomization/pooling off or paused
5120 return Immediate;
5121 }
5122 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
5123 Ctx->statsUpdateRPImms();
5124 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5125 RPI_Randomize) {
5126 // blind the constant
5127 // FROM:
5128 // imm
5129 // TO:
5130 // insert: mov imm+cookie, Reg
5131 // insert: lea -cookie[Reg], Reg
5132 // => Reg
5133 // If we have already assigned a phy register, we must come from
5134 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5135 // the assigned register as this assignment is that start of its use-def
5136 // chain. So we add RegNum argument here.
5137 // Note we use 'lea' instruction instead of 'xor' to avoid affecting
5138 // the flags.
5139 Variable *Reg = makeReg(IceType_i32, RegNum);
5140 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5141 uint32_t Value = Integer->getValue();
5142 uint32_t Cookie = Ctx->getRandomizationCookie();
5143 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5144 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
John Porto921856d2015-07-07 11:56:26 -07005145 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5146 nullptr, 0));
John Porto7e93c622015-06-23 10:58:57 -07005147 // make sure liveness analysis won't kill this variable, otherwise a
Jan Voungf645d852015-07-09 10:35:09 -07005148 // liveness assertion will be triggered.
John Porto7e93c622015-06-23 10:58:57 -07005149 _set_dest_nonkillable();
5150 if (Immediate->getType() != IceType_i32) {
5151 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5152 _mov(TruncReg, Reg);
5153 return TruncReg;
5154 }
5155 return Reg;
5156 }
5157 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5158 // pool the constant
5159 // FROM:
5160 // imm
5161 // TO:
5162 // insert: mov $label, Reg
5163 // => Reg
5164 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5165 Immediate->setShouldBePooled(true);
5166 // if we have already assigned a phy register, we must come from
5167 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
5168 // the assigned register as this assignment is that start of its use-def
5169 // chain. So we add RegNum argument here.
5170 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5171 IceString Label;
5172 llvm::raw_string_ostream Label_stream(Label);
5173 Immediate->emitPoolLabel(Label_stream);
5174 const RelocOffsetT Offset = 0;
5175 const bool SuppressMangling = true;
5176 Constant *Symbol =
5177 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005178 typename Traits::X86OperandMem *MemOperand =
5179 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5180 Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005181 _mov(Reg, MemOperand);
5182 return Reg;
5183 }
5184 assert("Unsupported -randomize-pool-immediates option" && false);
5185 }
5186 // the constant Immediate is not eligible for blinding/pooling
5187 return Immediate;
5188}
5189
5190template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005191typename TargetX86Base<Machine>::Traits::X86OperandMem *
5192TargetX86Base<Machine>::randomizeOrPoolImmediate(
5193 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07005194 assert(MemOperand);
5195 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5196 RandomizationPoolingPaused == true) {
5197 // immediates randomization/pooling is turned off
5198 return MemOperand;
5199 }
5200
5201 // If this memory operand is already a randommized one, we do
5202 // not randomize it again.
5203 if (MemOperand->getRandomized())
5204 return MemOperand;
5205
5206 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5207 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5208 // The offset of this mem operand should be blinded or pooled
5209 Ctx->statsUpdateRPImms();
5210 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5211 RPI_Randomize) {
5212 // blind the constant offset
5213 // FROM:
5214 // offset[base, index, shift]
5215 // TO:
5216 // insert: lea offset+cookie[base], RegTemp
5217 // => -cookie[RegTemp, index, shift]
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005218 uint32_t Value =
5219 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5220 ->getValue();
John Porto7e93c622015-06-23 10:58:57 -07005221 uint32_t Cookie = Ctx->getRandomizationCookie();
5222 Constant *Mask1 = Ctx->getConstantInt(
5223 MemOperand->getOffset()->getType(), Cookie + Value);
5224 Constant *Mask2 =
5225 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5226
John Porto921856d2015-07-07 11:56:26 -07005227 typename Traits::X86OperandMem *TempMemOperand =
5228 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5229 MemOperand->getBase(), Mask1);
John Porto7e93c622015-06-23 10:58:57 -07005230 // If we have already assigned a physical register, we must come from
5231 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
John Porto1d235422015-08-12 12:37:53 -07005232 // the assigned register as this assignment is that start of its
5233 // use-def chain. So we add RegNum argument here.
John Porto7e93c622015-06-23 10:58:57 -07005234 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5235 _lea(RegTemp, TempMemOperand);
5236 // As source operand doesn't use the dstreg, we don't need to add
5237 // _set_dest_nonkillable().
5238 // But if we use the same Dest Reg, that is, with RegNum
5239 // assigned, we should add this _set_dest_nonkillable()
5240 if (RegNum != Variable::NoRegister)
5241 _set_dest_nonkillable();
5242
John Porto921856d2015-07-07 11:56:26 -07005243 typename Traits::X86OperandMem *NewMemOperand =
5244 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5245 Mask2, MemOperand->getIndex(),
5246 MemOperand->getShift(),
5247 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005248
Jan Voungfbdd2442015-07-15 12:36:20 -07005249 // Label this memory operand as randomized, so we won't randomize it
5250 // again in case we call legalize() multiple times on this memory
John Porto7e93c622015-06-23 10:58:57 -07005251 // operand.
5252 NewMemOperand->setRandomized(true);
5253 return NewMemOperand;
5254 }
5255 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5256 // pool the constant offset
5257 // FROM:
5258 // offset[base, index, shift]
5259 // TO:
5260 // insert: mov $label, RegTemp
5261 // insert: lea [base, RegTemp], RegTemp
5262 // =>[RegTemp, index, shift]
5263 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5264 RPI_Pool);
5265 // Memory operand should never exist as source operands in phi
5266 // lowering assignments, so there is no need to reuse any registers
5267 // here. For phi lowering, we should not ask for new physical
5268 // registers in general.
5269 // However, if we do meet Memory Operand during phi lowering, we
5270 // should not blind or pool the immediates for now.
5271 if (RegNum != Variable::NoRegister)
5272 return MemOperand;
5273 Variable *RegTemp = makeReg(IceType_i32);
5274 IceString Label;
5275 llvm::raw_string_ostream Label_stream(Label);
5276 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5277 MemOperand->getOffset()->setShouldBePooled(true);
5278 const RelocOffsetT SymOffset = 0;
5279 bool SuppressMangling = true;
5280 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5281 SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005282 typename Traits::X86OperandMem *SymbolOperand =
5283 Traits::X86OperandMem::create(
5284 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005285 _mov(RegTemp, SymbolOperand);
5286 // If we have a base variable here, we should add the lea instruction
5287 // to add the value of the base variable to RegTemp. If there is no
5288 // base variable, we won't need this lea instruction.
5289 if (MemOperand->getBase()) {
John Porto921856d2015-07-07 11:56:26 -07005290 typename Traits::X86OperandMem *CalculateOperand =
5291 Traits::X86OperandMem::create(
5292 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5293 RegTemp, 0, MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005294 _lea(RegTemp, CalculateOperand);
5295 _set_dest_nonkillable();
5296 }
John Porto921856d2015-07-07 11:56:26 -07005297 typename Traits::X86OperandMem *NewMemOperand =
5298 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5299 nullptr, MemOperand->getIndex(),
5300 MemOperand->getShift(),
5301 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005302 return NewMemOperand;
5303 }
5304 assert("Unsupported -randomize-pool-immediates option" && false);
5305 }
5306 }
5307 // the offset is not eligible for blinding or pooling, return the original
5308 // mem operand
5309 return MemOperand;
5310}
5311
5312} // end of namespace X86Internal
5313} // end of namespace Ice
5314
5315#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H