blob: 1ad9b2941799f01b340cbfff45cbe1d9f6ea2775 [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
Andrew Scull57e12682015-09-16 11:30:19 -070011/// This file implements the TargetLoweringX86Base class, which consists almost
12/// entirely of the lowering sequence for each high-level instruction.
Andrew Scull9612d322015-07-06 14:53:25 -070013///
John Porto7e93c622015-06-23 10:58:57 -070014//===----------------------------------------------------------------------===//
15
16#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18
John Porto7e93c622015-06-23 10:58:57 -070019#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceClFlags.h"
22#include "IceDefs.h"
23#include "IceELFObjectWriter.h"
24#include "IceGlobalInits.h"
John Portoec3f5652015-08-31 15:07:09 -070025#include "IceInstVarIter.h"
John Porto7e93c622015-06-23 10:58:57 -070026#include "IceLiveness.h"
27#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070028#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070029#include "IceUtils.h"
John Porto67f8de92015-06-25 10:14:17 -070030#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070031
Andrew Scull87f80c12015-07-20 10:19:16 -070032#include <stack>
33
John Porto7e93c622015-06-23 10:58:57 -070034namespace Ice {
35namespace X86Internal {
36
John Porto921856d2015-07-07 11:56:26 -070037/// A helper class to ease the settings of RandomizationPoolingPause to disable
38/// constant blinding or pooling for some translation phases.
John Porto7e93c622015-06-23 10:58:57 -070039class BoolFlagSaver {
40 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43
44public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; }
47
48private:
49 const bool OldValue;
50 bool &Flag;
51};
52
53template <class MachineTraits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55
56public:
57 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -070060 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -070061 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -070062 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -070063 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -070064 /// IsLiveOut is initialized conservatively to true, and is set to false when
Andrew Scull57e12682015-09-16 11:30:19 -070065 /// we encounter an instruction that ends Var's live range. We disable the
66 /// folding optimization when Var is live beyond this basic block. Note that
Andrew Scull9612d322015-07-06 14:53:25 -070067 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -070069 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the
Andrew Scull57e12682015-09-16 11:30:19 -070071 // basic block. If IsComplex is true and there is more than one use of Var,
John Porto7e93c622015-06-23 10:58:57 -070072 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0;
74};
75
76template <class MachineTraits> class BoolFolding {
77public:
78 enum BoolFoldingProducerKind {
79 PK_None,
John Porto1d235422015-08-12 12:37:53 -070080 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
John Porto7e93c622015-06-23 10:58:57 -070081 PK_Icmp32,
82 PK_Icmp64,
83 PK_Fcmp,
84 PK_Trunc
85 };
86
Andrew Scull9612d322015-07-06 14:53:25 -070087 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -070088 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -070089 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -070090 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
91
92private:
93 BoolFolding(const BoolFolding &) = delete;
94 BoolFolding &operator=(const BoolFolding &) = delete;
95
96public:
97 BoolFolding() = default;
98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
100 static bool hasComplexLowering(const Inst *Instr);
101 void init(CfgNode *Node);
102 const Inst *getProducerFor(const Operand *Opnd) const;
103 void dump(const Cfg *Func) const;
104
105private:
Andrew Scull9612d322015-07-06 14:53:25 -0700106 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700107 bool containsValid(SizeT VarNum) const {
108 auto Element = Producers.find(VarNum);
109 return Element != Producers.end() && Element->second.Instr != nullptr;
110 }
111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Andrew Scull9612d322015-07-06 14:53:25 -0700112 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Porto7e93c622015-06-23 10:58:57 -0700113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
114};
115
116template <class MachineTraits>
117BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
119
120template <class MachineTraits>
121typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
122BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
123 if (llvm::isa<InstIcmp>(Instr)) {
John Porto1d235422015-08-12 12:37:53 -0700124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -0700125 return PK_Icmp32;
David Sehrd9810252015-10-16 13:23:17 -0700126 return PK_Icmp64;
John Porto7e93c622015-06-23 10:58:57 -0700127 }
128 return PK_None; // TODO(stichnot): remove this
129
130 if (llvm::isa<InstFcmp>(Instr))
131 return PK_Fcmp;
132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
133 switch (Cast->getCastKind()) {
134 default:
135 return PK_None;
136 case InstCast::Trunc:
137 return PK_Trunc;
138 }
139 }
140 return PK_None;
141}
142
143template <class MachineTraits>
144typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
145BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
146 if (llvm::isa<InstBr>(Instr))
147 return CK_Br;
148 if (llvm::isa<InstSelect>(Instr))
149 return CK_Select;
150 return CK_None; // TODO(stichnot): remove this
151
152 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
153 switch (Cast->getCastKind()) {
154 default:
155 return CK_None;
156 case InstCast::Sext:
157 return CK_Sext;
158 case InstCast::Zext:
159 return CK_Zext;
160 }
161 }
162 return CK_None;
163}
164
John Porto921856d2015-07-07 11:56:26 -0700165/// Returns true if the producing instruction has a "complex" lowering sequence.
166/// This generally means that its lowering sequence requires more than one
167/// conditional branch, namely 64-bit integer compares and some floating-point
Andrew Scull57e12682015-09-16 11:30:19 -0700168/// compares. When this is true, and there is more than one consumer, we prefer
John Porto921856d2015-07-07 11:56:26 -0700169/// to disable the folding optimization because it minimizes branches.
John Porto7e93c622015-06-23 10:58:57 -0700170template <class MachineTraits>
171bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
172 switch (getProducerKind(Instr)) {
173 default:
174 return false;
175 case PK_Icmp64:
176 return true;
177 case PK_Fcmp:
178 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
John Porto5d0acff2015-06-30 15:29:21 -0700179 .C2 != MachineTraits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700180 }
181}
182
183template <class MachineTraits>
184void BoolFolding<MachineTraits>::init(CfgNode *Node) {
185 Producers.clear();
186 for (Inst &Instr : Node->getInsts()) {
187 // Check whether Instr is a valid producer.
188 Variable *Var = Instr.getDest();
189 if (!Instr.isDeleted() // only consider non-deleted instructions
190 && Var // only instructions with an actual dest var
191 && Var->getType() == IceType_i1 // only bool-type dest vars
192 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
193 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
194 }
195 // Check each src variable against the map.
John Portoec3f5652015-08-31 15:07:09 -0700196 FOREACH_VAR_IN_INST(Var, Instr) {
197 SizeT VarNum = Var->getIndex();
198 if (containsValid(VarNum)) {
199 if (IndexOfVarOperandInInst(Var) !=
200 0 // All valid consumers use Var as the first source operand
201 ||
202 getConsumerKind(&Instr) == CK_None // must be white-listed
David Sehrd9810252015-10-16 13:23:17 -0700203 ||
204 (getConsumerKind(&Instr) != CK_Br && // Icmp64 only folds in branch
205 getProducerKind(Producers[VarNum].Instr) != PK_Icmp32) ||
206 (Producers[VarNum].IsComplex && // complex can't be multi-use
207 Producers[VarNum].NumUses > 0)) {
John Portoec3f5652015-08-31 15:07:09 -0700208 setInvalid(VarNum);
209 continue;
210 }
211 ++Producers[VarNum].NumUses;
212 if (Instr.isLastUse(Var)) {
213 Producers[VarNum].IsLiveOut = false;
John Porto7e93c622015-06-23 10:58:57 -0700214 }
215 }
216 }
217 }
218 for (auto &I : Producers) {
219 // Ignore entries previously marked invalid.
220 if (I.second.Instr == nullptr)
221 continue;
222 // Disable the producer if its dest may be live beyond this block.
223 if (I.second.IsLiveOut) {
224 setInvalid(I.first);
225 continue;
226 }
Andrew Scull57e12682015-09-16 11:30:19 -0700227 // Mark as "dead" rather than outright deleting. This is so that other
John Porto921856d2015-07-07 11:56:26 -0700228 // peephole style optimizations during or before lowering have access to
Andrew Scull57e12682015-09-16 11:30:19 -0700229 // this instruction in undeleted form. See for example
John Porto921856d2015-07-07 11:56:26 -0700230 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700231 I.second.Instr->setDead();
232 }
233}
234
235template <class MachineTraits>
236const Inst *
237BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
238 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
239 if (Var == nullptr)
240 return nullptr;
241 SizeT VarNum = Var->getIndex();
242 auto Element = Producers.find(VarNum);
243 if (Element == Producers.end())
244 return nullptr;
245 return Element->second.Instr;
246}
247
248template <class MachineTraits>
249void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700250 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700251 return;
252 OstreamLocker L(Func->getContext());
253 Ostream &Str = Func->getContext()->getStrDump();
254 for (auto &I : Producers) {
255 if (I.second.Instr == nullptr)
256 continue;
257 Str << "Found foldable producer:\n ";
258 I.second.Instr->dump(Func);
259 Str << "\n";
260 }
261}
262
263template <class Machine>
264void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
265 FoldingInfo.init(Node);
266 FoldingInfo.dump(Func);
267}
268
269template <class Machine>
270TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
John Porto5aeed952015-07-21 13:39:09 -0700271 : TargetLowering(Func) {
John Porto7e93c622015-06-23 10:58:57 -0700272 static_assert(
273 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
274 (TargetInstructionSet::X86InstructionSet_End -
275 TargetInstructionSet::X86InstructionSet_Begin),
276 "Traits::InstructionSet range different from TargetInstructionSet");
277 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
278 TargetInstructionSet::BaseInstructionSet) {
279 InstructionSet = static_cast<typename Traits::InstructionSet>(
280 (Func->getContext()->getFlags().getTargetInstructionSet() -
281 TargetInstructionSet::X86InstructionSet_Begin) +
282 Traits::InstructionSet::Begin);
283 }
John Porto921856d2015-07-07 11:56:26 -0700284 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
285 // initialize in some sort of static initializer for the class.
John Portobb0a5fe2015-09-04 11:23:41 -0700286 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);
John Porto7e93c622015-06-23 10:58:57 -0700287}
288
289template <class Machine> void TargetX86Base<Machine>::translateO2() {
290 TimerMarker T(TimerStack::TT_O2, Func);
291
292 if (!Ctx->getFlags().getPhiEdgeSplit()) {
293 // Lower Phi instructions.
294 Func->placePhiLoads();
295 if (Func->hasError())
296 return;
297 Func->placePhiStores();
298 if (Func->hasError())
299 return;
300 Func->deletePhis();
301 if (Func->hasError())
302 return;
303 Func->dump("After Phi lowering");
304 }
305
Andrew Scullaa6c1092015-09-03 17:50:30 -0700306 // Run this early so it can be used to focus optimizations on potentially hot
307 // code.
Andrew Scull57e12682015-09-16 11:30:19 -0700308 // TODO(stichnot,ascull): currently only used for regalloc not
309 // expensive high level optimizations which could be focused on potentially
310 // hot code.
Andrew Scullaa6c1092015-09-03 17:50:30 -0700311 Func->computeLoopNestDepth();
312 Func->dump("After loop nest depth analysis");
313
John Porto7e93c622015-06-23 10:58:57 -0700314 // Address mode optimization.
315 Func->getVMetadata()->init(VMK_SingleDefs);
316 Func->doAddressOpt();
317
Andrew Scull57e12682015-09-16 11:30:19 -0700318 // Find read-modify-write opportunities. Do this after address mode
John Porto7e93c622015-06-23 10:58:57 -0700319 // optimization so that doAddressOpt() doesn't need to be applied to RMW
320 // instructions as well.
321 findRMW();
322 Func->dump("After RMW transform");
323
324 // Argument lowering
325 Func->doArgLowering();
326
Andrew Scull57e12682015-09-16 11:30:19 -0700327 // Target lowering. This requires liveness analysis for some parts of the
328 // lowering decisions, such as compare/branch fusing. If non-lightweight
John Porto921856d2015-07-07 11:56:26 -0700329 // liveness analysis is used, the instructions need to be renumbered first
330 // TODO: This renumbering should only be necessary if we're actually
331 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700332 Func->renumberInstructions();
333 if (Func->hasError())
334 return;
335
Andrew Scull57e12682015-09-16 11:30:19 -0700336 // TODO: It should be sufficient to use the fastest liveness calculation,
337 // i.e. livenessLightweight(). However, for some reason that slows down the
338 // rest of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700339 Func->liveness(Liveness_Basic);
340 if (Func->hasError())
341 return;
342 Func->dump("After x86 address mode opt");
343
344 // Disable constant blinding or pooling for load optimization.
345 {
346 BoolFlagSaver B(RandomizationPoolingPaused, true);
347 doLoadOpt();
348 }
349 Func->genCode();
350 if (Func->hasError())
351 return;
352 Func->dump("After x86 codegen");
353
Andrew Scullaa6c1092015-09-03 17:50:30 -0700354 // Register allocation. This requires instruction renumbering and full
355 // liveness analysis. Loops must be identified before liveness so variable
356 // use weights are correct.
John Porto7e93c622015-06-23 10:58:57 -0700357 Func->renumberInstructions();
358 if (Func->hasError())
359 return;
360 Func->liveness(Liveness_Intervals);
361 if (Func->hasError())
362 return;
Andrew Scull57e12682015-09-16 11:30:19 -0700363 // Validate the live range computations. The expensive validation call is
John Porto921856d2015-07-07 11:56:26 -0700364 // deliberately only made when assertions are enabled.
John Porto7e93c622015-06-23 10:58:57 -0700365 assert(Func->validateLiveness());
John Porto921856d2015-07-07 11:56:26 -0700366 // The post-codegen dump is done here, after liveness analysis and associated
367 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700368 Func->dump("After initial x8632 codegen");
369 Func->getVMetadata()->init(VMK_All);
370 regAlloc(RAK_Global);
371 if (Func->hasError())
372 return;
373 Func->dump("After linear scan regalloc");
374
375 if (Ctx->getFlags().getPhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700376 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700377 Func->dump("After advanced Phi lowering");
378 }
379
380 // Stack frame mapping.
381 Func->genFrame();
382 if (Func->hasError())
383 return;
384 Func->dump("After stack frame mapping");
385
386 Func->contractEmptyNodes();
387 Func->reorderNodes();
388
Qining Lu969f6a32015-07-31 09:58:34 -0700389 // Shuffle basic block order if -reorder-basic-blocks is enabled.
390 Func->shuffleNodes();
391
Andrew Scull57e12682015-09-16 11:30:19 -0700392 // Branch optimization. This needs to be done just before code emission. In
John Porto921856d2015-07-07 11:56:26 -0700393 // particular, no transformations that insert or reorder CfgNodes should be
Andrew Scull57e12682015-09-16 11:30:19 -0700394 // done after branch optimization. We go ahead and do it before nop insertion
John Porto921856d2015-07-07 11:56:26 -0700395 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700396 Func->doBranchOpt();
397 Func->dump("After branch optimization");
398
Qining Lu969f6a32015-07-31 09:58:34 -0700399 // Nop insertion if -nop-insertion is enabled.
400 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700401
402 // Mark nodes that require sandbox alignment
403 if (Ctx->getFlags().getUseSandboxing())
404 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700405}
406
407template <class Machine> void TargetX86Base<Machine>::translateOm1() {
408 TimerMarker T(TimerStack::TT_Om1, Func);
409
410 Func->placePhiLoads();
411 if (Func->hasError())
412 return;
413 Func->placePhiStores();
414 if (Func->hasError())
415 return;
416 Func->deletePhis();
417 if (Func->hasError())
418 return;
419 Func->dump("After Phi lowering");
420
421 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700422 Func->genCode();
423 if (Func->hasError())
424 return;
425 Func->dump("After initial x8632 codegen");
426
427 regAlloc(RAK_InfOnly);
428 if (Func->hasError())
429 return;
430 Func->dump("After regalloc of infinite-weight variables");
431
432 Func->genFrame();
433 if (Func->hasError())
434 return;
435 Func->dump("After stack frame mapping");
436
Qining Lu969f6a32015-07-31 09:58:34 -0700437 // Shuffle basic block order if -reorder-basic-blocks is enabled.
438 Func->shuffleNodes();
439
440 // Nop insertion if -nop-insertion is enabled.
441 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700442
443 // Mark nodes that require sandbox alignment
444 if (Ctx->getFlags().getUseSandboxing())
445 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700446}
447
John Porto5aeed952015-07-21 13:39:09 -0700448inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700449 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700450 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700451 if (isVectorType(Ty))
452 return false;
453 bool isI64 = Ty == IceType_i64;
454
455 switch (Arith->getOp()) {
456 // Not handled for lack of simple lowering:
457 // shift on i64
458 // mul, udiv, urem, sdiv, srem, frem
459 // Not handled for lack of RMW instructions:
460 // fadd, fsub, fmul, fdiv (also vector types)
461 default:
462 return false;
463 case InstArithmetic::Add:
464 case InstArithmetic::Sub:
465 case InstArithmetic::And:
466 case InstArithmetic::Or:
467 case InstArithmetic::Xor:
468 return true;
469 case InstArithmetic::Shl:
470 case InstArithmetic::Lshr:
471 case InstArithmetic::Ashr:
472 return false; // TODO(stichnot): implement
473 return !isI64;
474 }
475}
476
John Porto921856d2015-07-07 11:56:26 -0700477template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700478bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
479 if (A == B)
480 return true;
John Porto921856d2015-07-07 11:56:26 -0700481 if (auto *MemA = llvm::dyn_cast<
482 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
483 if (auto *MemB = llvm::dyn_cast<
484 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
John Porto7e93c622015-06-23 10:58:57 -0700485 return MemA->getBase() == MemB->getBase() &&
486 MemA->getOffset() == MemB->getOffset() &&
487 MemA->getIndex() == MemB->getIndex() &&
488 MemA->getShift() == MemB->getShift() &&
489 MemA->getSegmentRegister() == MemB->getSegmentRegister();
490 }
491 }
492 return false;
493}
494
495template <class Machine> void TargetX86Base<Machine>::findRMW() {
496 Func->dump("Before RMW");
Andrew Scull00741a02015-09-16 19:04:09 -0700497 if (Func->isVerbose(IceV_RMW))
498 Func->getContext()->lockStr();
John Porto7e93c622015-06-23 10:58:57 -0700499 for (CfgNode *Node : Func->getNodes()) {
500 // Walk through the instructions, considering each sequence of 3
Andrew Scull57e12682015-09-16 11:30:19 -0700501 // instructions, and look for the particular RMW pattern. Note that this
502 // search can be "broken" (false negatives) if there are intervening
503 // deleted instructions, or intervening instructions that could be safely
504 // moved out of the way to reveal an RMW pattern.
John Porto7e93c622015-06-23 10:58:57 -0700505 auto E = Node->getInsts().end();
506 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
507 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
508 // Make I3 skip over deleted instructions.
509 while (I3 != E && I3->isDeleted())
510 ++I3;
511 if (I1 == E || I2 == E || I3 == E)
512 continue;
513 assert(!I1->isDeleted());
514 assert(!I2->isDeleted());
515 assert(!I3->isDeleted());
Andrew Scull00741a02015-09-16 19:04:09 -0700516 auto *Load = llvm::dyn_cast<InstLoad>(I1);
517 auto *Arith = llvm::dyn_cast<InstArithmetic>(I2);
518 auto *Store = llvm::dyn_cast<InstStore>(I3);
519 if (!Load || !Arith || !Store)
520 continue;
521 // Look for:
522 // a = Load addr
523 // b = <op> a, other
524 // Store b, addr
525 // Change to:
526 // a = Load addr
527 // b = <op> a, other
528 // x = FakeDef
529 // RMW <op>, addr, other, x
530 // b = Store b, addr, x
Jim Stichnoth230d4102015-09-25 17:40:32 -0700531 // Note that inferTwoAddress() makes sure setDestRedefined() gets called
532 // on the updated Store instruction, to avoid liveness problems later.
Andrew Scull00741a02015-09-16 19:04:09 -0700533 //
534 // With this transformation, the Store instruction acquires a Dest
535 // variable and is now subject to dead code elimination if there are no
Jim Stichnoth230d4102015-09-25 17:40:32 -0700536 // more uses of "b". Variable "x" is a beacon for determining whether the
537 // Store instruction gets dead-code eliminated. If the Store instruction
538 // is eliminated, then it must be the case that the RMW instruction ends
539 // x's live range, and therefore the RMW instruction will be retained and
540 // later lowered. On the other hand, if the RMW instruction does not end
541 // x's live range, then the Store instruction must still be present, and
542 // therefore the RMW instruction is ignored during lowering because it is
543 // redundant with the Store instruction.
Andrew Scull00741a02015-09-16 19:04:09 -0700544 //
545 // Note that if "a" has further uses, the RMW transformation may still
546 // trigger, resulting in two loads and one store, which is worse than the
547 // original one load and one store. However, this is probably rare, and
548 // caching probably keeps it just as fast.
549 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
550 Store->getAddr()))
551 continue;
552 Operand *ArithSrcFromLoad = Arith->getSrc(0);
553 Operand *ArithSrcOther = Arith->getSrc(1);
554 if (ArithSrcFromLoad != Load->getDest()) {
555 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
556 continue;
557 std::swap(ArithSrcFromLoad, ArithSrcOther);
John Porto7e93c622015-06-23 10:58:57 -0700558 }
Andrew Scull00741a02015-09-16 19:04:09 -0700559 if (Arith->getDest() != Store->getData())
560 continue;
561 if (!canRMW(Arith))
562 continue;
563 if (Func->isVerbose(IceV_RMW)) {
564 Ostream &Str = Func->getContext()->getStrDump();
565 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
566 Load->dump(Func);
567 Str << "\n ";
568 Arith->dump(Func);
569 Str << "\n ";
570 Store->dump(Func);
571 Str << "\n";
572 }
573 Variable *Beacon = Func->makeVariable(IceType_i32);
574 Beacon->setMustNotHaveReg();
575 Store->setRmwBeacon(Beacon);
576 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
577 Node->getInsts().insert(I3, BeaconDef);
578 auto *RMW = Traits::Insts::FakeRMW::create(
579 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
580 Node->getInsts().insert(I3, RMW);
John Porto7e93c622015-06-23 10:58:57 -0700581 }
582 }
Andrew Scull00741a02015-09-16 19:04:09 -0700583 if (Func->isVerbose(IceV_RMW))
584 Func->getContext()->unlockStr();
John Porto7e93c622015-06-23 10:58:57 -0700585}
586
587// Converts a ConstantInteger32 operand into its constant value, or
588// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700589inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
John Porto7e93c622015-06-23 10:58:57 -0700591 return Integer->getValue();
592 return Intrinsics::MemoryOrderInvalid;
593}
594
Andrew Scull57e12682015-09-16 11:30:19 -0700595/// Determines whether the dest of a Load instruction can be folded into one of
596/// the src operands of a 2-operand instruction. This is true as long as the
597/// load dest matches exactly one of the binary instruction's src operands.
598/// Replaces Src0 or Src1 with LoadSrc if the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700599inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
600 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700601 if (Src0 == LoadDest && Src1 != LoadDest) {
602 Src0 = LoadSrc;
603 return true;
604 }
605 if (Src0 != LoadDest && Src1 == LoadDest) {
606 Src1 = LoadSrc;
607 return true;
608 }
609 return false;
610}
611
612template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
613 for (CfgNode *Node : Func->getNodes()) {
614 Context.init(Node);
615 while (!Context.atEnd()) {
616 Variable *LoadDest = nullptr;
617 Operand *LoadSrc = nullptr;
618 Inst *CurInst = Context.getCur();
619 Inst *Next = Context.getNextInst();
Andrew Scull57e12682015-09-16 11:30:19 -0700620 // Determine whether the current instruction is a Load instruction or
621 // equivalent.
John Porto7e93c622015-06-23 10:58:57 -0700622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
623 // An InstLoad always qualifies.
624 LoadDest = Load->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700625 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
627 LoadDest->getType(), DoLegalize);
628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
630 // ordering, and can be implemented in a single instruction (i.e., not
631 // i64 on x86-32).
John Porto7e93c622015-06-23 10:58:57 -0700632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
633 if (ID == Intrinsics::AtomicLoad &&
John Porto1d235422015-08-12 12:37:53 -0700634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
John Porto7e93c622015-06-23 10:58:57 -0700635 Intrinsics::isMemoryOrderValid(
636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
637 LoadDest = Intrin->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700638 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
640 DoLegalize);
641 }
642 }
Andrew Scull57e12682015-09-16 11:30:19 -0700643 // A Load instruction can be folded into the following instruction only
644 // if the following instruction ends the Load's Dest variable's live
645 // range.
John Porto7e93c622015-06-23 10:58:57 -0700646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
647 assert(LoadSrc);
648 Inst *NewInst = nullptr;
649 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
650 Operand *Src0 = Arith->getSrc(0);
651 Operand *Src1 = Arith->getSrc(1);
652 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
653 NewInst = InstArithmetic::create(Func, Arith->getOp(),
654 Arith->getDest(), Src0, Src1);
655 }
656 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
657 Operand *Src0 = Icmp->getSrc(0);
658 Operand *Src1 = Icmp->getSrc(1);
659 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
660 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
661 Icmp->getDest(), Src0, Src1);
662 }
663 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
664 Operand *Src0 = Fcmp->getSrc(0);
665 Operand *Src1 = Fcmp->getSrc(1);
666 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
667 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
668 Fcmp->getDest(), Src0, Src1);
669 }
670 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
671 Operand *Src0 = Select->getTrueOperand();
672 Operand *Src1 = Select->getFalseOperand();
673 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
674 NewInst = InstSelect::create(Func, Select->getDest(),
675 Select->getCondition(), Src0, Src1);
676 }
677 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700678 // The load dest can always be folded into a Cast instruction.
John Porto7e93c622015-06-23 10:58:57 -0700679 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
680 if (Src0 == LoadDest) {
681 NewInst = InstCast::create(Func, Cast->getCastKind(),
682 Cast->getDest(), LoadSrc);
683 }
684 }
685 if (NewInst) {
686 CurInst->setDeleted();
687 Next->setDeleted();
688 Context.insert(NewInst);
Andrew Scull57e12682015-09-16 11:30:19 -0700689 // Update NewInst->LiveRangesEnded so that target lowering may
690 // benefit. Also update NewInst->HasSideEffects.
John Porto7e93c622015-06-23 10:58:57 -0700691 NewInst->spliceLivenessInfo(Next, CurInst);
692 }
693 }
694 Context.advanceCur();
695 Context.advanceNext();
696 }
697 }
698 Func->dump("After load optimization");
699}
700
701template <class Machine>
702bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
John Porto921856d2015-07-07 11:56:26 -0700703 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700704 return Br->optimizeBranch(NextNode);
705 }
706 return false;
707}
708
709template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -0700710Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
John Porto1d235422015-08-12 12:37:53 -0700711 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
712 if (RegNum == Traits::RegisterSet::Reg_esp ||
713 RegNum == Traits::RegisterSet::Reg_ebp)
714 Ty = Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -0700715 if (Ty == IceType_void)
716 Ty = IceType_i32;
717 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700718 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
John Porto7e93c622015-06-23 10:58:57 -0700719 assert(RegNum < PhysicalRegisters[Ty].size());
720 Variable *Reg = PhysicalRegisters[Ty][RegNum];
721 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700722 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700723 Reg->setRegNum(RegNum);
724 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth69660552015-09-18 06:41:02 -0700725 // Specially mark a named physical register as an "argument" so that it is
726 // considered live upon function entry. Otherwise it's possible to get
727 // liveness validation errors for saving callee-save registers.
728 Func->addImplicitArg(Reg);
729 // Don't bother tracking the live range of a named physical register.
730 Reg->setIgnoreLiveness();
John Porto7e93c622015-06-23 10:58:57 -0700731 }
732 return Reg;
733}
734
735template <class Machine>
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700736IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const {
737 return Traits::getRegName(RegNum);
John Porto7e93c622015-06-23 10:58:57 -0700738}
739
740template <class Machine>
741void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700742 if (!BuildDefs::dump())
743 return;
John Porto7e93c622015-06-23 10:58:57 -0700744 Ostream &Str = Ctx->getStrEmit();
745 if (Var->hasReg()) {
746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
747 return;
748 }
Andrew Scull11c9a322015-08-28 14:24:14 -0700749 if (Var->mustHaveReg()) {
John Porto7e93c622015-06-23 10:58:57 -0700750 llvm_unreachable("Infinite-weight Variable has no register assigned");
751 }
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700752 const int32_t Offset = Var->getStackOffset();
753 int32_t OffsetAdj = 0;
Jan Voung28068ad2015-07-31 12:58:46 -0700754 int32_t BaseRegNum = Var->getBaseRegNum();
755 if (BaseRegNum == Variable::NoRegister) {
756 BaseRegNum = getFrameOrStackReg();
757 if (!hasFramePointer())
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700758 OffsetAdj = getStackAdjustment();
Jan Voung28068ad2015-07-31 12:58:46 -0700759 }
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700760 // Print in the form "OffsetAdj+Offset(%reg)", taking care that:
761 // - OffsetAdj may be 0
762 // - Offset is never printed when it is 0
763 // - Offset may be positive or symbolic, so a "+" might be needed
764
765 // Only print nonzero OffsetAdj.
766 if (OffsetAdj) {
767 Str << OffsetAdj;
768 }
769 const bool DecorateAsm = Func->getContext()->getFlags().getDecorateAsm();
770 // Only print Offset when it is nonzero, regardless of DecorateAsm.
771 if (Offset) {
772 if (OffsetAdj && (DecorateAsm || Offset > 0)) {
773 Str << "+";
774 }
775 if (DecorateAsm) {
776 Str << Var->getSymbolicStackOffset(Func);
777 } else {
778 Str << Offset;
779 }
780 }
John Porto1d235422015-08-12 12:37:53 -0700781 const Type FrameSPTy = Traits::WordType;
Jan Voung28068ad2015-07-31 12:58:46 -0700782 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
John Porto7e93c622015-06-23 10:58:57 -0700783}
784
785template <class Machine>
John Porto5d0acff2015-06-30 15:29:21 -0700786typename TargetX86Base<Machine>::Traits::Address
John Porto7e93c622015-06-23 10:58:57 -0700787TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
788 if (Var->hasReg())
789 llvm_unreachable("Stack Variable has a register assigned");
Andrew Scull11c9a322015-08-28 14:24:14 -0700790 if (Var->mustHaveReg()) {
John Porto7e93c622015-06-23 10:58:57 -0700791 llvm_unreachable("Infinite-weight Variable has no register assigned");
792 }
793 int32_t Offset = Var->getStackOffset();
Jan Voung28068ad2015-07-31 12:58:46 -0700794 int32_t BaseRegNum = Var->getBaseRegNum();
795 if (Var->getBaseRegNum() == Variable::NoRegister) {
796 BaseRegNum = getFrameOrStackReg();
797 if (!hasFramePointer())
798 Offset += getStackAdjustment();
799 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,
801 AssemblerFixup::NoFixup);
John Porto7e93c622015-06-23 10:58:57 -0700802}
803
Andrew Scull9612d322015-07-06 14:53:25 -0700804/// Helper function for addProlog().
805///
Andrew Scull57e12682015-09-16 11:30:19 -0700806/// This assumes Arg is an argument passed on the stack. This sets the frame
807/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
808/// I64 arg that has been split into Lo and Hi components, it calls itself
809/// recursively on the components, taking care to handle Lo first because of the
810/// little-endian architecture. Lastly, this function generates an instruction
811/// to copy Arg into its assigned register if applicable.
John Porto7e93c622015-06-23 10:58:57 -0700812template <class Machine>
813void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
814 Variable *FramePtr,
815 size_t BasicFrameOffset,
Jim Stichnoth55f931f2015-09-23 16:33:08 -0700816 size_t StackAdjBytes,
John Porto7e93c622015-06-23 10:58:57 -0700817 size_t &InArgsSizeBytes) {
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700818 if (!Traits::Is64Bit) {
819 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
820 Variable *Lo = Arg64On32->getLo();
821 Variable *Hi = Arg64On32->getHi();
Jim Stichnoth55f931f2015-09-23 16:33:08 -0700822 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,
823 InArgsSizeBytes);
824 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,
825 InArgsSizeBytes);
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700826 return;
827 }
John Porto7e93c622015-06-23 10:58:57 -0700828 }
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700829 Type Ty = Arg->getType();
John Porto7e93c622015-06-23 10:58:57 -0700830 if (isVectorType(Ty)) {
831 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
832 }
833 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
834 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
835 if (Arg->hasReg()) {
John Porto1d235422015-08-12 12:37:53 -0700836 assert(Ty != IceType_i64 || Traits::Is64Bit);
John Porto921856d2015-07-07 11:56:26 -0700837 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
Jim Stichnoth55f931f2015-09-23 16:33:08 -0700838 Func, Ty, FramePtr,
839 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
John Porto7e93c622015-06-23 10:58:57 -0700840 if (isVectorType(Arg->getType())) {
841 _movp(Arg, Mem);
842 } else {
843 _mov(Arg, Mem);
844 }
John Porto921856d2015-07-07 11:56:26 -0700845 // This argument-copying instruction uses an explicit Traits::X86OperandMem
Andrew Scull57e12682015-09-16 11:30:19 -0700846 // operand instead of a Variable, so its fill-from-stack operation has to
847 // be tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -0700848 Ctx->statsUpdateFills();
849 }
850}
851
852template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
John Porto1d235422015-08-12 12:37:53 -0700853 return Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -0700854}
855
John Porto1d235422015-08-12 12:37:53 -0700856template <class Machine>
857template <typename T>
John Porto1d235422015-08-12 12:37:53 -0700858typename std::enable_if<!T::Is64Bit, Operand>::type *
859TargetX86Base<Machine>::loOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -0700860 assert(Operand->getType() == IceType_i64 ||
861 Operand->getType() == IceType_f64);
862 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
863 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700864 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
865 return Var64On32->getLo();
Jan Voungfbdd2442015-07-15 12:36:20 -0700866 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
867 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -0700868 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -0700869 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -0700870 return legalize(ConstInt);
871 }
John Porto921856d2015-07-07 11:56:26 -0700872 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
873 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700874 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
875 Mem->getShift(), Mem->getSegmentRegister());
876 // Test if we should randomize or pool the offset, if so randomize it or
877 // pool it then create mem operand with the blinded/pooled constant.
878 // Otherwise, return the mem operand as ordinary mem operand.
879 return legalize(MemOperand);
880 }
881 llvm_unreachable("Unsupported operand type");
882 return nullptr;
883}
884
885template <class Machine>
John Porto1d235422015-08-12 12:37:53 -0700886template <typename T>
887typename std::enable_if<!T::Is64Bit, Operand>::type *
888TargetX86Base<Machine>::hiOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -0700889 assert(Operand->getType() == IceType_i64 ||
890 Operand->getType() == IceType_f64);
891 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
892 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700893 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
894 return Var64On32->getHi();
Jan Voungfbdd2442015-07-15 12:36:20 -0700895 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
896 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -0700897 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -0700898 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -0700899 return legalize(ConstInt);
900 }
John Porto921856d2015-07-07 11:56:26 -0700901 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -0700902 Constant *Offset = Mem->getOffset();
903 if (Offset == nullptr) {
904 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -0700905 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -0700906 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -0700907 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -0700908 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
909 Offset =
910 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
911 SymOffset->getSuppressMangling());
912 }
John Porto921856d2015-07-07 11:56:26 -0700913 auto *MemOperand = Traits::X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -0700914 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
915 Mem->getShift(), Mem->getSegmentRegister());
916 // Test if the Offset is an eligible i32 constants for randomization and
917 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
918 // operand.
919 return legalize(MemOperand);
920 }
921 llvm_unreachable("Unsupported operand type");
922 return nullptr;
923}
924
925template <class Machine>
926llvm::SmallBitVector
927TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
928 RegSetMask Exclude) const {
John Porto921856d2015-07-07 11:56:26 -0700929 return Traits::getRegisterSet(Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -0700930}
931
932template <class Machine>
933void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
Jim Stichnoth55f931f2015-09-23 16:33:08 -0700934 if (!Inst->getKnownFrameOffset())
935 IsEbpBasedFrame = true;
Andrew Scull57e12682015-09-16 11:30:19 -0700936 // Conservatively require the stack to be aligned. Some stack adjustment
937 // operations implemented below assume that the stack is aligned before the
938 // alloca. All the alloca code ensures that the stack alignment is preserved
939 // after the alloca. The stack alignment restriction can be relaxed in some
940 // cases.
John Porto7e93c622015-06-23 10:58:57 -0700941 NeedsStackAlignment = true;
942
943 // TODO(stichnot): minimize the number of adjustments of esp, etc.
John Porto5d0acff2015-06-30 15:29:21 -0700944 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -0700945 Operand *TotalSize = legalize(Inst->getSizeInBytes());
946 Variable *Dest = Inst->getDest();
947 uint32_t AlignmentParam = Inst->getAlignInBytes();
948 // For default align=0, set it to the real value 1, to avoid any
949 // bit-manipulation problems below.
950 AlignmentParam = std::max(AlignmentParam, 1u);
951
952 // LLVM enforces power of 2 alignment.
953 assert(llvm::isPowerOf2_32(AlignmentParam));
954 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
955
956 uint32_t Alignment =
957 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
958 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
959 _and(esp, Ctx->getConstantInt32(-Alignment));
960 }
961 if (const auto *ConstantTotalSize =
962 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
963 uint32_t Value = ConstantTotalSize->getValue();
964 Value = Utils::applyAlignment(Value, Alignment);
Jim Stichnoth55f931f2015-09-23 16:33:08 -0700965 if (Inst->getKnownFrameOffset()) {
966 _adjust_stack(Value);
967 FixedAllocaSizeBytes += Value;
968 } else {
969 _sub(esp, Ctx->getConstantInt32(Value));
970 }
John Porto7e93c622015-06-23 10:58:57 -0700971 } else {
Andrew Scull57e12682015-09-16 11:30:19 -0700972 // Non-constant sizes need to be adjusted to the next highest multiple of
973 // the required alignment at runtime.
John Porto7e93c622015-06-23 10:58:57 -0700974 Variable *T = makeReg(IceType_i32);
975 _mov(T, TotalSize);
976 _add(T, Ctx->getConstantInt32(Alignment - 1));
977 _and(T, Ctx->getConstantInt32(-Alignment));
978 _sub(esp, T);
979 }
980 _mov(Dest, esp);
981}
982
Andrew Scull57e12682015-09-16 11:30:19 -0700983/// Strength-reduce scalar integer multiplication by a constant (for i32 or
984/// narrower) for certain constants. The lea instruction can be used to multiply
985/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
986/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
987/// lea-based multiplies by 5, combined with left-shifting by 2.
John Porto7e93c622015-06-23 10:58:57 -0700988template <class Machine>
989bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
990 int32_t Src1) {
Andrew Scull57e12682015-09-16 11:30:19 -0700991 // Disable this optimization for Om1 and O0, just to keep things simple
992 // there.
John Porto7e93c622015-06-23 10:58:57 -0700993 if (Ctx->getFlags().getOptLevel() < Opt_1)
994 return false;
995 Type Ty = Dest->getType();
996 Variable *T = nullptr;
997 if (Src1 == -1) {
998 _mov(T, Src0);
999 _neg(T);
1000 _mov(Dest, T);
1001 return true;
1002 }
1003 if (Src1 == 0) {
1004 _mov(Dest, Ctx->getConstantZero(Ty));
1005 return true;
1006 }
1007 if (Src1 == 1) {
1008 _mov(T, Src0);
1009 _mov(Dest, T);
1010 return true;
1011 }
1012 // Don't bother with the edge case where Src1 == MININT.
1013 if (Src1 == -Src1)
1014 return false;
1015 const bool Src1IsNegative = Src1 < 0;
1016 if (Src1IsNegative)
1017 Src1 = -Src1;
1018 uint32_t Count9 = 0;
1019 uint32_t Count5 = 0;
1020 uint32_t Count3 = 0;
1021 uint32_t Count2 = 0;
1022 uint32_t CountOps = 0;
1023 while (Src1 > 1) {
1024 if (Src1 % 9 == 0) {
1025 ++CountOps;
1026 ++Count9;
1027 Src1 /= 9;
1028 } else if (Src1 % 5 == 0) {
1029 ++CountOps;
1030 ++Count5;
1031 Src1 /= 5;
1032 } else if (Src1 % 3 == 0) {
1033 ++CountOps;
1034 ++Count3;
1035 Src1 /= 3;
1036 } else if (Src1 % 2 == 0) {
1037 if (Count2 == 0)
1038 ++CountOps;
1039 ++Count2;
1040 Src1 /= 2;
1041 } else {
1042 return false;
1043 }
1044 }
1045 // Lea optimization only works for i16 and i32 types, not i8.
1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1047 return false;
Andrew Scull57e12682015-09-16 11:30:19 -07001048 // Limit the number of lea/shl operations for a single multiply, to a
1049 // somewhat arbitrary choice of 3.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001050 constexpr uint32_t MaxOpsForOptimizedMul = 3;
John Porto7e93c622015-06-23 10:58:57 -07001051 if (CountOps > MaxOpsForOptimizedMul)
1052 return false;
1053 _mov(T, Src0);
1054 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1055 for (uint32_t i = 0; i < Count9; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001056 constexpr uint16_t Shift = 3; // log2(9-1)
John Porto921856d2015-07-07 11:56:26 -07001057 _lea(T,
1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001059 }
1060 for (uint32_t i = 0; i < Count5; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001061 constexpr uint16_t Shift = 2; // log2(5-1)
John Porto921856d2015-07-07 11:56:26 -07001062 _lea(T,
1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001064 }
1065 for (uint32_t i = 0; i < Count3; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001066 constexpr uint16_t Shift = 1; // log2(3-1)
John Porto921856d2015-07-07 11:56:26 -07001067 _lea(T,
1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001069 }
1070 if (Count2) {
1071 _shl(T, Ctx->getConstantInt(Ty, Count2));
1072 }
1073 if (Src1IsNegative)
1074 _neg(T);
1075 _mov(Dest, T);
1076 return true;
1077}
1078
1079template <class Machine>
David Sehr188eae52015-09-24 11:42:55 -07001080void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
1081 Operand *Src0Lo, Operand *Src0Hi,
1082 Operand *Src1Lo, Variable *DestLo,
1083 Variable *DestHi) {
1084 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1085 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1086 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1087 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1088 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1089 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1090 if (ShiftAmount > 32) {
1091 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);
1092 switch (Op) {
1093 default:
1094 assert(0 && "non-shift op");
1095 break;
1096 case InstArithmetic::Shl: {
1097 // a=b<<c ==>
1098 // t2 = b.lo
1099 // t2 = shl t2, ShiftAmount-32
1100 // t3 = t2
1101 // t2 = 0
1102 _mov(T_2, Src0Lo);
1103 _shl(T_2, ReducedShift);
1104 _mov(DestHi, T_2);
1105 _mov(DestLo, Zero);
1106 } break;
1107 case InstArithmetic::Lshr: {
1108 // a=b>>c (unsigned) ==>
1109 // t2 = b.hi
1110 // t2 = shr t2, ShiftAmount-32
1111 // a.lo = t2
1112 // a.hi = 0
1113 _mov(T_2, Src0Hi);
1114 _shr(T_2, ReducedShift);
1115 _mov(DestLo, T_2);
1116 _mov(DestHi, Zero);
1117 } break;
1118 case InstArithmetic::Ashr: {
1119 // a=b>>c (signed) ==>
1120 // t3 = b.hi
1121 // t3 = sar t3, 0x1f
1122 // t2 = b.hi
1123 // t2 = shrd t2, t3, ShiftAmount-32
1124 // a.lo = t2
1125 // a.hi = t3
1126 _mov(T_3, Src0Hi);
1127 _sar(T_3, SignExtend);
1128 _mov(T_2, Src0Hi);
1129 _shrd(T_2, T_3, ReducedShift);
1130 _mov(DestLo, T_2);
1131 _mov(DestHi, T_3);
1132 } break;
1133 }
1134 } else if (ShiftAmount == 32) {
1135 switch (Op) {
1136 default:
1137 assert(0 && "non-shift op");
1138 break;
1139 case InstArithmetic::Shl: {
1140 // a=b<<c ==>
1141 // t2 = b.lo
1142 // a.hi = t2
1143 // a.lo = 0
1144 _mov(T_2, Src0Lo);
1145 _mov(DestHi, T_2);
1146 _mov(DestLo, Zero);
1147 } break;
1148 case InstArithmetic::Lshr: {
1149 // a=b>>c (unsigned) ==>
1150 // t2 = b.hi
1151 // a.lo = t2
1152 // a.hi = 0
1153 _mov(T_2, Src0Hi);
1154 _mov(DestLo, T_2);
1155 _mov(DestHi, Zero);
1156 } break;
1157 case InstArithmetic::Ashr: {
1158 // a=b>>c (signed) ==>
1159 // t2 = b.hi
1160 // a.lo = t2
1161 // t3 = b.hi
1162 // t3 = sar t3, 0x1f
1163 // a.hi = t3
1164 _mov(T_2, Src0Hi);
1165 _mov(DestLo, T_2);
1166 _mov(T_3, Src0Hi);
1167 _sar(T_3, SignExtend);
1168 _mov(DestHi, T_3);
1169 } break;
1170 }
1171 } else {
1172 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1173 // t2 = b.lo
1174 // t3 = b.hi
1175 _mov(T_2, Src0Lo);
1176 _mov(T_3, Src0Hi);
1177 switch (Op) {
1178 default:
1179 assert(0 && "non-shift op");
1180 break;
1181 case InstArithmetic::Shl: {
1182 // a=b<<c ==>
1183 // t3 = shld t3, t2, ShiftAmount
1184 // t2 = shl t2, ShiftAmount
1185 _shld(T_3, T_2, ConstantShiftAmount);
1186 _shl(T_2, ConstantShiftAmount);
1187 } break;
1188 case InstArithmetic::Lshr: {
1189 // a=b>>c (unsigned) ==>
1190 // t2 = shrd t2, t3, ShiftAmount
1191 // t3 = shr t3, ShiftAmount
1192 _shrd(T_2, T_3, ConstantShiftAmount);
1193 _shr(T_3, ConstantShiftAmount);
1194 } break;
1195 case InstArithmetic::Ashr: {
1196 // a=b>>c (signed) ==>
1197 // t2 = shrd t2, t3, ShiftAmount
1198 // t3 = sar t3, ShiftAmount
1199 _shrd(T_2, T_3, ConstantShiftAmount);
1200 _sar(T_3, ConstantShiftAmount);
1201 } break;
1202 }
1203 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1204 // a.lo = t2
1205 // a.hi = t3
1206 _mov(DestLo, T_2);
1207 _mov(DestHi, T_3);
1208 }
1209 } else {
1210 // NON-CONSTANT CASES.
1211 Constant *BitTest = Ctx->getConstantInt32(0x20);
1212 typename Traits::Insts::Label *Label =
1213 Traits::Insts::Label::create(Func, this);
1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1215 // t1:ecx = c.lo & 0xff
1216 // t2 = b.lo
1217 // t3 = b.hi
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001218 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1219 _mov(T_1, Src1Lo);
David Sehr188eae52015-09-24 11:42:55 -07001220 _mov(T_2, Src0Lo);
1221 _mov(T_3, Src0Hi);
1222 switch (Op) {
1223 default:
1224 assert(0 && "non-shift op");
1225 break;
1226 case InstArithmetic::Shl: {
1227 // a=b<<c ==>
1228 // t3 = shld t3, t2, t1
1229 // t2 = shl t2, t1
1230 // test t1, 0x20
1231 // je L1
1232 // use(t3)
1233 // t3 = t2
1234 // t2 = 0
1235 _shld(T_3, T_2, T_1);
1236 _shl(T_2, T_1);
1237 _test(T_1, BitTest);
1238 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001239 // T_2 and T_3 are being assigned again because of the intra-block control
1240 // flow, so we need the _mov_redefined variant to avoid liveness problems.
1241 _mov_redefined(T_3, T_2);
1242 _mov_redefined(T_2, Zero);
David Sehr188eae52015-09-24 11:42:55 -07001243 } break;
1244 case InstArithmetic::Lshr: {
1245 // a=b>>c (unsigned) ==>
1246 // t2 = shrd t2, t3, t1
1247 // t3 = shr t3, t1
1248 // test t1, 0x20
1249 // je L1
1250 // use(t2)
1251 // t2 = t3
1252 // t3 = 0
1253 _shrd(T_2, T_3, T_1);
1254 _shr(T_3, T_1);
1255 _test(T_1, BitTest);
1256 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001257 // T_2 and T_3 are being assigned again because of the intra-block control
1258 // flow, so we need the _mov_redefined variant to avoid liveness problems.
1259 _mov_redefined(T_2, T_3);
1260 _mov_redefined(T_3, Zero);
David Sehr188eae52015-09-24 11:42:55 -07001261 } break;
1262 case InstArithmetic::Ashr: {
1263 // a=b>>c (signed) ==>
1264 // t2 = shrd t2, t3, t1
1265 // t3 = sar t3, t1
1266 // test t1, 0x20
1267 // je L1
1268 // use(t2)
1269 // t2 = t3
1270 // t3 = sar t3, 0x1f
1271 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1272 _shrd(T_2, T_3, T_1);
1273 _sar(T_3, T_1);
1274 _test(T_1, BitTest);
1275 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001276 // T_2 and T_3 are being assigned again because of the intra-block control
1277 // flow, so T_2 needs the _mov_redefined variant to avoid liveness
1278 // problems. T_3 doesn't need special treatment because it is reassigned
1279 // via _sar instead of _mov.
1280 _mov_redefined(T_2, T_3);
David Sehr188eae52015-09-24 11:42:55 -07001281 _sar(T_3, SignExtend);
1282 } break;
1283 }
1284 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1285 // L1:
1286 // a.lo = t2
1287 // a.hi = t3
1288 Context.insert(Label);
1289 _mov(DestLo, T_2);
1290 _mov(DestHi, T_3);
1291 }
1292}
1293
1294template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07001295void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1296 Variable *Dest = Inst->getDest();
1297 Operand *Src0 = legalize(Inst->getSrc(0));
1298 Operand *Src1 = legalize(Inst->getSrc(1));
1299 if (Inst->isCommutative()) {
David Sehr487bad02015-10-06 17:41:26 -07001300 uint32_t SwapCount = 0;
1301 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001302 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001303 ++SwapCount;
1304 }
1305 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001306 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001307 ++SwapCount;
1308 }
1309 // Improve two-address code patterns by avoiding a copy to the dest
1310 // register when one of the source operands ends its lifetime here.
1311 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) {
1312 std::swap(Src0, Src1);
1313 ++SwapCount;
1314 }
1315 assert(SwapCount <= 1);
Karl Schimpfa313a122015-10-08 10:40:57 -07001316 (void)SwapCount;
John Porto7e93c622015-06-23 10:58:57 -07001317 }
John Porto1d235422015-08-12 12:37:53 -07001318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1319 // These x86-32 helper-call-involved instructions are lowered in this
Andrew Scull57e12682015-09-16 11:30:19 -07001320 // separate switch. This is because loOperand() and hiOperand() may insert
1321 // redundant instructions for constant blinding and pooling. Such redundant
1322 // instructions will fail liveness analysis under -Om1 setting. And,
1323 // actually these arguments do not need to be processed with loOperand()
1324 // and hiOperand() to be used.
John Porto7e93c622015-06-23 10:58:57 -07001325 switch (Inst->getOp()) {
1326 case InstArithmetic::Udiv: {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001327 constexpr SizeT MaxSrcs = 2;
John Porto7e93c622015-06-23 10:58:57 -07001328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1329 Call->addArg(Inst->getSrc(0));
1330 Call->addArg(Inst->getSrc(1));
1331 lowerCall(Call);
1332 return;
1333 }
1334 case InstArithmetic::Sdiv: {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001335 constexpr SizeT MaxSrcs = 2;
John Porto7e93c622015-06-23 10:58:57 -07001336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1337 Call->addArg(Inst->getSrc(0));
1338 Call->addArg(Inst->getSrc(1));
1339 lowerCall(Call);
1340 return;
1341 }
1342 case InstArithmetic::Urem: {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001343 constexpr SizeT MaxSrcs = 2;
John Porto7e93c622015-06-23 10:58:57 -07001344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1345 Call->addArg(Inst->getSrc(0));
1346 Call->addArg(Inst->getSrc(1));
1347 lowerCall(Call);
1348 return;
1349 }
1350 case InstArithmetic::Srem: {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001351 constexpr SizeT MaxSrcs = 2;
John Porto7e93c622015-06-23 10:58:57 -07001352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1353 Call->addArg(Inst->getSrc(0));
1354 Call->addArg(Inst->getSrc(1));
1355 lowerCall(Call);
1356 return;
1357 }
1358 default:
1359 break;
1360 }
1361
1362 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1363 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1364 Operand *Src0Lo = loOperand(Src0);
1365 Operand *Src0Hi = hiOperand(Src0);
1366 Operand *Src1Lo = loOperand(Src1);
1367 Operand *Src1Hi = hiOperand(Src1);
1368 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1369 switch (Inst->getOp()) {
1370 case InstArithmetic::_num:
1371 llvm_unreachable("Unknown arithmetic operator");
1372 break;
1373 case InstArithmetic::Add:
1374 _mov(T_Lo, Src0Lo);
1375 _add(T_Lo, Src1Lo);
1376 _mov(DestLo, T_Lo);
1377 _mov(T_Hi, Src0Hi);
1378 _adc(T_Hi, Src1Hi);
1379 _mov(DestHi, T_Hi);
1380 break;
1381 case InstArithmetic::And:
1382 _mov(T_Lo, Src0Lo);
1383 _and(T_Lo, Src1Lo);
1384 _mov(DestLo, T_Lo);
1385 _mov(T_Hi, Src0Hi);
1386 _and(T_Hi, Src1Hi);
1387 _mov(DestHi, T_Hi);
1388 break;
1389 case InstArithmetic::Or:
1390 _mov(T_Lo, Src0Lo);
1391 _or(T_Lo, Src1Lo);
1392 _mov(DestLo, T_Lo);
1393 _mov(T_Hi, Src0Hi);
1394 _or(T_Hi, Src1Hi);
1395 _mov(DestHi, T_Hi);
1396 break;
1397 case InstArithmetic::Xor:
1398 _mov(T_Lo, Src0Lo);
1399 _xor(T_Lo, Src1Lo);
1400 _mov(DestLo, T_Lo);
1401 _mov(T_Hi, Src0Hi);
1402 _xor(T_Hi, Src1Hi);
1403 _mov(DestHi, T_Hi);
1404 break;
1405 case InstArithmetic::Sub:
1406 _mov(T_Lo, Src0Lo);
1407 _sub(T_Lo, Src1Lo);
1408 _mov(DestLo, T_Lo);
1409 _mov(T_Hi, Src0Hi);
1410 _sbb(T_Hi, Src1Hi);
1411 _mov(DestHi, T_Hi);
1412 break;
1413 case InstArithmetic::Mul: {
1414 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07001415 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1416 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001417 // gcc does the following:
1418 // a=b*c ==>
1419 // t1 = b.hi; t1 *=(imul) c.lo
1420 // t2 = c.hi; t2 *=(imul) b.lo
1421 // t3:eax = b.lo
1422 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1423 // a.lo = t4.lo
1424 // t4.hi += t1
1425 // t4.hi += t2
1426 // a.hi = t4.hi
1427 // The mul instruction cannot take an immediate operand.
1428 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1429 _mov(T_1, Src0Hi);
1430 _imul(T_1, Src1Lo);
1431 _mov(T_2, Src1Hi);
1432 _imul(T_2, Src0Lo);
John Porto5d0acff2015-06-30 15:29:21 -07001433 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001434 _mul(T_4Lo, T_3, Src1Lo);
Andrew Scull57e12682015-09-16 11:30:19 -07001435 // The mul instruction produces two dest variables, edx:eax. We create a
1436 // fake definition of edx to account for this.
John Porto7e93c622015-06-23 10:58:57 -07001437 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1438 _mov(DestLo, T_4Lo);
1439 _add(T_4Hi, T_1);
1440 _add(T_4Hi, T_2);
1441 _mov(DestHi, T_4Hi);
1442 } break;
David Sehr188eae52015-09-24 11:42:55 -07001443 case InstArithmetic::Shl:
1444 case InstArithmetic::Lshr:
1445 case InstArithmetic::Ashr:
1446 lowerShift64(Inst->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi);
1447 break;
John Porto7e93c622015-06-23 10:58:57 -07001448 case InstArithmetic::Fadd:
1449 case InstArithmetic::Fsub:
1450 case InstArithmetic::Fmul:
1451 case InstArithmetic::Fdiv:
1452 case InstArithmetic::Frem:
1453 llvm_unreachable("FP instruction with i64 type");
1454 break;
1455 case InstArithmetic::Udiv:
1456 case InstArithmetic::Sdiv:
1457 case InstArithmetic::Urem:
1458 case InstArithmetic::Srem:
1459 llvm_unreachable("Call-helper-involved instruction for i64 type \
1460 should have already been handled before");
1461 break;
1462 }
1463 return;
1464 }
1465 if (isVectorType(Dest->getType())) {
Andrew Scull57e12682015-09-16 11:30:19 -07001466 // TODO: Trap on integer divide and integer modulo by zero. See:
1467 // https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto921856d2015-07-07 11:56:26 -07001468 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001469 Src1 = legalizeToReg(Src1);
John Porto7e93c622015-06-23 10:58:57 -07001470 switch (Inst->getOp()) {
1471 case InstArithmetic::_num:
1472 llvm_unreachable("Unknown arithmetic operator");
1473 break;
1474 case InstArithmetic::Add: {
1475 Variable *T = makeReg(Dest->getType());
1476 _movp(T, Src0);
1477 _padd(T, Src1);
1478 _movp(Dest, T);
1479 } break;
1480 case InstArithmetic::And: {
1481 Variable *T = makeReg(Dest->getType());
1482 _movp(T, Src0);
1483 _pand(T, Src1);
1484 _movp(Dest, T);
1485 } break;
1486 case InstArithmetic::Or: {
1487 Variable *T = makeReg(Dest->getType());
1488 _movp(T, Src0);
1489 _por(T, Src1);
1490 _movp(Dest, T);
1491 } break;
1492 case InstArithmetic::Xor: {
1493 Variable *T = makeReg(Dest->getType());
1494 _movp(T, Src0);
1495 _pxor(T, Src1);
1496 _movp(Dest, T);
1497 } break;
1498 case InstArithmetic::Sub: {
1499 Variable *T = makeReg(Dest->getType());
1500 _movp(T, Src0);
1501 _psub(T, Src1);
1502 _movp(Dest, T);
1503 } break;
1504 case InstArithmetic::Mul: {
1505 bool TypesAreValidForPmull =
1506 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1507 bool InstructionSetIsValidForPmull =
John Porto5d0acff2015-06-30 15:29:21 -07001508 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07001509 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1510 Variable *T = makeReg(Dest->getType());
1511 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07001512 _pmull(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07001513 _movp(Dest, T);
1514 } else if (Dest->getType() == IceType_v4i32) {
1515 // Lowering sequence:
1516 // Note: The mask arguments have index 0 on the left.
1517 //
1518 // movups T1, Src0
1519 // pshufd T2, Src0, {1,0,3,0}
1520 // pshufd T3, Src1, {1,0,3,0}
1521 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1522 // pmuludq T1, Src1
1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1524 // pmuludq T2, T3
1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1526 // shufps T1, T2, {0,2,0,2}
1527 // pshufd T4, T1, {0,2,1,3}
1528 // movups Dest, T4
1529
1530 // Mask that directs pshufd to create a vector with entries
1531 // Src[1, 0, 3, 0]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001532 constexpr unsigned Constant1030 = 0x31;
John Porto7e93c622015-06-23 10:58:57 -07001533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1534 // Mask that directs shufps to create a vector with entries
1535 // Dest[0, 2], Src[0, 2]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001536 constexpr unsigned Mask0202 = 0x88;
John Porto7e93c622015-06-23 10:58:57 -07001537 // Mask that directs pshufd to create a vector with entries
1538 // Src[0, 2, 1, 3]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001539 constexpr unsigned Mask0213 = 0xd8;
John Porto7e93c622015-06-23 10:58:57 -07001540 Variable *T1 = makeReg(IceType_v4i32);
1541 Variable *T2 = makeReg(IceType_v4i32);
1542 Variable *T3 = makeReg(IceType_v4i32);
1543 Variable *T4 = makeReg(IceType_v4i32);
1544 _movp(T1, Src0);
1545 _pshufd(T2, Src0, Mask1030);
1546 _pshufd(T3, Src1, Mask1030);
1547 _pmuludq(T1, Src1);
1548 _pmuludq(T2, T3);
1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1550 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1551 _movp(Dest, T4);
Jim Stichnothebbb5912015-10-05 15:12:09 -07001552 } else if (Dest->getType() == IceType_v16i8) {
John Porto7e93c622015-06-23 10:58:57 -07001553 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
Jim Stichnothebbb5912015-10-05 15:12:09 -07001554 } else {
1555 llvm::report_fatal_error("Invalid vector multiply type");
John Porto7e93c622015-06-23 10:58:57 -07001556 }
1557 } break;
1558 case InstArithmetic::Shl:
1559 case InstArithmetic::Lshr:
1560 case InstArithmetic::Ashr:
1561 case InstArithmetic::Udiv:
1562 case InstArithmetic::Urem:
1563 case InstArithmetic::Sdiv:
1564 case InstArithmetic::Srem:
1565 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1566 break;
1567 case InstArithmetic::Fadd: {
1568 Variable *T = makeReg(Dest->getType());
1569 _movp(T, Src0);
1570 _addps(T, Src1);
1571 _movp(Dest, T);
1572 } break;
1573 case InstArithmetic::Fsub: {
1574 Variable *T = makeReg(Dest->getType());
1575 _movp(T, Src0);
1576 _subps(T, Src1);
1577 _movp(Dest, T);
1578 } break;
1579 case InstArithmetic::Fmul: {
1580 Variable *T = makeReg(Dest->getType());
1581 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07001582 _mulps(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07001583 _movp(Dest, T);
1584 } break;
1585 case InstArithmetic::Fdiv: {
1586 Variable *T = makeReg(Dest->getType());
1587 _movp(T, Src0);
1588 _divps(T, Src1);
1589 _movp(Dest, T);
1590 } break;
1591 case InstArithmetic::Frem:
1592 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1593 break;
1594 }
1595 return;
1596 }
1597 Variable *T_edx = nullptr;
1598 Variable *T = nullptr;
1599 switch (Inst->getOp()) {
1600 case InstArithmetic::_num:
1601 llvm_unreachable("Unknown arithmetic operator");
1602 break;
1603 case InstArithmetic::Add:
1604 _mov(T, Src0);
1605 _add(T, Src1);
1606 _mov(Dest, T);
1607 break;
1608 case InstArithmetic::And:
1609 _mov(T, Src0);
1610 _and(T, Src1);
1611 _mov(Dest, T);
1612 break;
1613 case InstArithmetic::Or:
1614 _mov(T, Src0);
1615 _or(T, Src1);
1616 _mov(Dest, T);
1617 break;
1618 case InstArithmetic::Xor:
1619 _mov(T, Src0);
1620 _xor(T, Src1);
1621 _mov(Dest, T);
1622 break;
1623 case InstArithmetic::Sub:
1624 _mov(T, Src0);
1625 _sub(T, Src1);
1626 _mov(Dest, T);
1627 break;
1628 case InstArithmetic::Mul:
1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1630 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1631 return;
1632 }
Andrew Scull57e12682015-09-16 11:30:19 -07001633 // The 8-bit version of imul only allows the form "imul r/m8" where T must
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001634 // be in al.
John Porto7e93c622015-06-23 10:58:57 -07001635 if (isByteSizedArithType(Dest->getType())) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001636 _mov(T, Src0, Traits::RegisterSet::Reg_al);
John Porto7e93c622015-06-23 10:58:57 -07001637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
David Sehre11f8782015-10-06 10:26:57 -07001638 _imul(T, Src0 == Src1 ? T : Src1);
1639 _mov(Dest, T);
1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1641 T = makeReg(Dest->getType());
1642 _imul_imm(T, Src0, ImmConst);
1643 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07001644 } else {
1645 _mov(T, Src0);
David Sehre11f8782015-10-06 10:26:57 -07001646 _imul(T, Src0 == Src1 ? T : Src1);
1647 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07001648 }
John Porto7e93c622015-06-23 10:58:57 -07001649 break;
1650 case InstArithmetic::Shl:
1651 _mov(T, Src0);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001652 if (!llvm::isa<ConstantInteger32>(Src1)) {
1653 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1654 _mov(Cl, Src1);
1655 Src1 = Cl;
1656 }
John Porto7e93c622015-06-23 10:58:57 -07001657 _shl(T, Src1);
1658 _mov(Dest, T);
1659 break;
1660 case InstArithmetic::Lshr:
1661 _mov(T, Src0);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001662 if (!llvm::isa<ConstantInteger32>(Src1)) {
1663 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1664 _mov(Cl, Src1);
1665 Src1 = Cl;
1666 }
John Porto7e93c622015-06-23 10:58:57 -07001667 _shr(T, Src1);
1668 _mov(Dest, T);
1669 break;
1670 case InstArithmetic::Ashr:
1671 _mov(T, Src0);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001672 if (!llvm::isa<ConstantInteger32>(Src1)) {
1673 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1674 _mov(Cl, Src1);
1675 Src1 = Cl;
1676 }
John Porto7e93c622015-06-23 10:58:57 -07001677 _sar(T, Src1);
1678 _mov(Dest, T);
1679 break;
1680 case InstArithmetic::Udiv:
1681 // div and idiv are the few arithmetic operators that do not allow
1682 // immediates as the operand.
1683 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1684 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001685 // For 8-bit unsigned division we need to zero-extend al into ah. A mov
1686 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
1687 // assembler refuses to encode %ah (encoding %spl with a REX prefix
1688 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
1689 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
1690 // d[%lh], which means the X86 target lowering (and the register
1691 // allocator) would have to be aware of this restriction. For now, we
1692 // simply zero %eax completely, and move the dividend into %al.
1693 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1694 Context.insert(InstFakeDef::create(Func, T_eax));
1695 _xor(T_eax, T_eax);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001696 _mov(T, Src0, Traits::RegisterSet::Reg_al);
John Porto448c16f2015-07-28 16:56:29 -07001697 _div(T, Src1, T);
John Porto7e93c622015-06-23 10:58:57 -07001698 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001699 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001700 } else {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001701 Type Ty = Dest->getType();
1702 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1703 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1704 switch (Ty) {
1705 default:
1706 llvm_unreachable("Bad type for udiv");
1707 // fallthrough
1708 case IceType_i32:
1709 break;
1710 case IceType_i16:
1711 Eax = Traits::RegisterSet::Reg_ax;
1712 Edx = Traits::RegisterSet::Reg_dx;
1713 break;
1714 }
1715 Constant *Zero = Ctx->getConstantZero(Ty);
1716 _mov(T, Src0, Eax);
1717 _mov(T_edx, Zero, Edx);
John Porto7e93c622015-06-23 10:58:57 -07001718 _div(T, Src1, T_edx);
1719 _mov(Dest, T);
1720 }
1721 break;
1722 case InstArithmetic::Sdiv:
Andrew Scull57e12682015-09-16 11:30:19 -07001723 // TODO(stichnot): Enable this after doing better performance and cross
1724 // testing.
John Porto7e93c622015-06-23 10:58:57 -07001725 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07001726 // Optimize division by constant power of 2, but not for Om1 or O0, just
1727 // to keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07001728 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1729 int32_t Divisor = C->getValue();
1730 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1731 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1732 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1733 Type Ty = Dest->getType();
1734 // LLVM does the following for dest=src/(1<<log):
1735 // t=src
1736 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1737 // shr t,typewidth-log
1738 // add t,src
1739 // sar t,log
1740 // dest=t
1741 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1742 _mov(T, Src0);
Andrew Scull57e12682015-09-16 11:30:19 -07001743 // If for some reason we are dividing by 1, just treat it like an
1744 // assignment.
John Porto7e93c622015-06-23 10:58:57 -07001745 if (LogDiv > 0) {
1746 // The initial sar is unnecessary when dividing by 2.
1747 if (LogDiv > 1)
1748 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1749 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1750 _add(T, Src0);
1751 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1752 }
1753 _mov(Dest, T);
1754 return;
1755 }
1756 }
1757 }
1758 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001759 switch (Type Ty = Dest->getType()) {
1760 default:
1761 llvm_unreachable("Bad type for sdiv");
1762 // fallthrough
1763 case IceType_i32:
1764 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
John Porto5d0acff2015-06-30 15:29:21 -07001765 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001766 break;
1767 case IceType_i16:
1768 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1769 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1770 break;
1771 case IceType_i8:
1772 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1773 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1774 break;
John Porto7e93c622015-06-23 10:58:57 -07001775 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001776 _cbwdq(T_edx, T);
1777 _idiv(T, Src1, T_edx);
1778 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07001779 break;
1780 case InstArithmetic::Urem:
1781 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1782 if (isByteSizedArithType(Dest->getType())) {
John Porto448c16f2015-07-28 16:56:29 -07001783 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1784 Context.insert(InstFakeDef::create(Func, T_eax));
1785 _xor(T_eax, T_eax);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001786 _mov(T, Src0, Traits::RegisterSet::Reg_al);
John Porto59f2d922015-07-31 13:45:48 -07001787 _div(T, Src1, T);
John Porto448c16f2015-07-28 16:56:29 -07001788 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1789 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1790 // this ever becomes a problem we can introduce a pseudo rem instruction
1791 // that returns the remainder in %al directly (and uses a mov for copying
1792 // %ah to %al.)
1793 static constexpr uint8_t AlSizeInBits = 8;
1794 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
John Porto59f2d922015-07-31 13:45:48 -07001795 _mov(Dest, T);
John Porto448c16f2015-07-28 16:56:29 -07001796 Context.insert(InstFakeUse::create(Func, T_eax));
John Porto7e93c622015-06-23 10:58:57 -07001797 } else {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001798 Type Ty = Dest->getType();
1799 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1800 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1801 switch (Ty) {
1802 default:
1803 llvm_unreachable("Bad type for urem");
1804 // fallthrough
1805 case IceType_i32:
1806 break;
1807 case IceType_i16:
1808 Eax = Traits::RegisterSet::Reg_ax;
1809 Edx = Traits::RegisterSet::Reg_dx;
1810 break;
1811 }
1812 Constant *Zero = Ctx->getConstantZero(Ty);
1813 T_edx = makeReg(Dest->getType(), Edx);
John Porto1d235422015-08-12 12:37:53 -07001814 _mov(T_edx, Zero);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001815 _mov(T, Src0, Eax);
John Porto7e93c622015-06-23 10:58:57 -07001816 _div(T_edx, Src1, T);
1817 _mov(Dest, T_edx);
1818 }
1819 break;
1820 case InstArithmetic::Srem:
Andrew Scull57e12682015-09-16 11:30:19 -07001821 // TODO(stichnot): Enable this after doing better performance and cross
1822 // testing.
John Porto7e93c622015-06-23 10:58:57 -07001823 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07001824 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
1825 // keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07001826 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1827 int32_t Divisor = C->getValue();
1828 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1829 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1830 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1831 Type Ty = Dest->getType();
1832 // LLVM does the following for dest=src%(1<<log):
1833 // t=src
1834 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1835 // shr t,typewidth-log
1836 // add t,src
1837 // and t, -(1<<log)
1838 // sub t,src
1839 // neg t
1840 // dest=t
1841 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1842 // If for some reason we are dividing by 1, just assign 0.
1843 if (LogDiv == 0) {
1844 _mov(Dest, Ctx->getConstantZero(Ty));
1845 return;
1846 }
1847 _mov(T, Src0);
1848 // The initial sar is unnecessary when dividing by 2.
1849 if (LogDiv > 1)
1850 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1851 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1852 _add(T, Src0);
1853 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1854 _sub(T, Src0);
1855 _neg(T);
1856 _mov(Dest, T);
1857 return;
1858 }
1859 }
1860 }
1861 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001862 switch (Type Ty = Dest->getType()) {
1863 default:
1864 llvm_unreachable("Bad type for srem");
1865 // fallthrough
1866 case IceType_i32:
1867 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
John Porto5d0acff2015-06-30 15:29:21 -07001868 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001869 _cbwdq(T_edx, T);
1870 _idiv(T_edx, Src1, T);
1871 _mov(Dest, T_edx);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001872 break;
1873 case IceType_i16:
1874 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1875 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1876 _cbwdq(T_edx, T);
1877 _idiv(T_edx, Src1, T);
1878 _mov(Dest, T_edx);
1879 break;
1880 case IceType_i8:
1881 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1882 // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
1883 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
1884 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1885 _cbwdq(T_edx, T);
1886 _idiv(T_edx, Src1, T);
1887 static constexpr uint8_t AlSizeInBits = 8;
1888 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
1889 _mov(Dest, T_edx);
1890 break;
John Porto7e93c622015-06-23 10:58:57 -07001891 }
1892 break;
1893 case InstArithmetic::Fadd:
1894 _mov(T, Src0);
1895 _addss(T, Src1);
1896 _mov(Dest, T);
1897 break;
1898 case InstArithmetic::Fsub:
1899 _mov(T, Src0);
1900 _subss(T, Src1);
1901 _mov(Dest, T);
1902 break;
1903 case InstArithmetic::Fmul:
1904 _mov(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07001905 _mulss(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07001906 _mov(Dest, T);
1907 break;
1908 case InstArithmetic::Fdiv:
1909 _mov(T, Src0);
1910 _divss(T, Src1);
1911 _mov(Dest, T);
1912 break;
1913 case InstArithmetic::Frem: {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001914 constexpr SizeT MaxSrcs = 2;
John Porto7e93c622015-06-23 10:58:57 -07001915 Type Ty = Dest->getType();
1916 InstCall *Call = makeHelperCall(
1917 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1918 Call->addArg(Src0);
1919 Call->addArg(Src1);
1920 return lowerCall(Call);
1921 }
1922 }
1923}
1924
1925template <class Machine>
1926void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1927 Variable *Dest = Inst->getDest();
1928 Operand *Src0 = Inst->getSrc(0);
1929 assert(Dest->getType() == Src0->getType());
John Porto1d235422015-08-12 12:37:53 -07001930 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07001931 Src0 = legalize(Src0);
1932 Operand *Src0Lo = loOperand(Src0);
1933 Operand *Src0Hi = hiOperand(Src0);
1934 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1935 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1936 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1937 _mov(T_Lo, Src0Lo);
1938 _mov(DestLo, T_Lo);
1939 _mov(T_Hi, Src0Hi);
1940 _mov(DestHi, T_Hi);
1941 } else {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001942 Operand *Src0Legal;
John Porto7e93c622015-06-23 10:58:57 -07001943 if (Dest->hasReg()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001944 // If Dest already has a physical register, then only basic legalization
1945 // is needed, as the source operand can be a register, immediate, or
1946 // memory.
1947 Src0Legal = legalize(Src0);
John Porto7e93c622015-06-23 10:58:57 -07001948 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07001949 // If Dest could be a stack operand, then RI must be a physical register
1950 // or a scalar integer immediate.
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001951 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
John Porto7e93c622015-06-23 10:58:57 -07001952 }
1953 if (isVectorType(Dest->getType()))
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001954 _movp(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07001955 else
Jim Stichnotha3f57b92015-07-30 12:46:04 -07001956 _mov(Dest, Src0Legal);
John Porto7e93c622015-06-23 10:58:57 -07001957 }
1958}
1959
1960template <class Machine>
1961void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
1962 if (Inst->isUnconditional()) {
1963 _br(Inst->getTargetUnconditional());
1964 return;
1965 }
1966 Operand *Cond = Inst->getCondition();
1967
1968 // Handle folding opportunities.
1969 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1970 assert(Producer->isDeleted());
1971 switch (BoolFolding::getProducerKind(Producer)) {
1972 default:
1973 break;
David Sehrd9810252015-10-16 13:23:17 -07001974 case BoolFolding::PK_Icmp32:
1975 case BoolFolding::PK_Icmp64: {
1976 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst);
John Porto7e93c622015-06-23 10:58:57 -07001977 return;
1978 }
1979 }
1980 }
1981
1982 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1983 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1984 _cmp(Src0, Zero);
John Porto5d0acff2015-06-30 15:29:21 -07001985 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07001986}
1987
1988template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07001989void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1990 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1991 InstCast::OpKind CastKind = Inst->getCastKind();
1992 Variable *Dest = Inst->getDest();
1993 switch (CastKind) {
1994 default:
1995 Func->setError("Cast type not supported");
1996 return;
1997 case InstCast::Sext: {
1998 // Src0RM is the source operand legalized to physical register or memory,
1999 // but not immediate, since the relevant x86 native instructions don't
Andrew Scull57e12682015-09-16 11:30:19 -07002000 // allow an immediate operand. If the operand is an immediate, we could
2001 // consider computing the strength-reduced result at translation time, but
2002 // we're unlikely to see something like that in the bitcode that the
2003 // optimizer wouldn't have already taken care of.
John Porto7e93c622015-06-23 10:58:57 -07002004 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2005 if (isVectorType(Dest->getType())) {
2006 Type DestTy = Dest->getType();
2007 if (DestTy == IceType_v16i8) {
2008 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2009 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2010 Variable *T = makeReg(DestTy);
2011 _movp(T, Src0RM);
2012 _pand(T, OneMask);
2013 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2014 _pcmpgt(T, Zeros);
2015 _movp(Dest, T);
2016 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07002017 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07002018 SizeT ShiftAmount =
2019 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2020 1;
2021 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2022 Variable *T = makeReg(DestTy);
2023 _movp(T, Src0RM);
2024 _psll(T, ShiftConstant);
2025 _psra(T, ShiftConstant);
2026 _movp(Dest, T);
2027 }
John Porto1d235422015-08-12 12:37:53 -07002028 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002029 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2030 Constant *Shift = Ctx->getConstantInt32(31);
2031 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2032 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2033 Variable *T_Lo = makeReg(DestLo->getType());
2034 if (Src0RM->getType() == IceType_i32) {
2035 _mov(T_Lo, Src0RM);
2036 } else if (Src0RM->getType() == IceType_i1) {
2037 _movzx(T_Lo, Src0RM);
2038 _shl(T_Lo, Shift);
2039 _sar(T_Lo, Shift);
2040 } else {
2041 _movsx(T_Lo, Src0RM);
2042 }
2043 _mov(DestLo, T_Lo);
2044 Variable *T_Hi = nullptr;
2045 _mov(T_Hi, T_Lo);
2046 if (Src0RM->getType() != IceType_i1)
2047 // For i1, the sar instruction is already done above.
2048 _sar(T_Hi, Shift);
2049 _mov(DestHi, T_Hi);
2050 } else if (Src0RM->getType() == IceType_i1) {
2051 // t1 = src
2052 // shl t1, dst_bitwidth - 1
2053 // sar t1, dst_bitwidth - 1
2054 // dst = t1
2055 size_t DestBits =
2056 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2057 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2058 Variable *T = makeReg(Dest->getType());
2059 if (typeWidthInBytes(Dest->getType()) <=
2060 typeWidthInBytes(Src0RM->getType())) {
2061 _mov(T, Src0RM);
2062 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07002063 // Widen the source using movsx or movzx. (It doesn't matter which one,
2064 // since the following shl/sar overwrite the bits.)
John Porto7e93c622015-06-23 10:58:57 -07002065 _movzx(T, Src0RM);
2066 }
2067 _shl(T, ShiftAmount);
2068 _sar(T, ShiftAmount);
2069 _mov(Dest, T);
2070 } else {
2071 // t1 = movsx src; dst = t1
2072 Variable *T = makeReg(Dest->getType());
2073 _movsx(T, Src0RM);
2074 _mov(Dest, T);
2075 }
2076 break;
2077 }
2078 case InstCast::Zext: {
2079 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2080 if (isVectorType(Dest->getType())) {
2081 // onemask = materialize(1,1,...); dest = onemask & src
2082 Type DestTy = Dest->getType();
2083 Variable *OneMask = makeVectorOfOnes(DestTy);
2084 Variable *T = makeReg(DestTy);
2085 _movp(T, Src0RM);
2086 _pand(T, OneMask);
2087 _movp(Dest, T);
John Porto1d235422015-08-12 12:37:53 -07002088 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002089 // t1=movzx src; dst.lo=t1; dst.hi=0
2090 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2091 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2092 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2093 Variable *Tmp = makeReg(DestLo->getType());
2094 if (Src0RM->getType() == IceType_i32) {
2095 _mov(Tmp, Src0RM);
2096 } else {
2097 _movzx(Tmp, Src0RM);
2098 }
John Porto7e93c622015-06-23 10:58:57 -07002099 _mov(DestLo, Tmp);
2100 _mov(DestHi, Zero);
2101 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth485d0772015-10-09 06:52:19 -07002102 // t = Src0RM; Dest = t
John Porto7e93c622015-06-23 10:58:57 -07002103 Type DestTy = Dest->getType();
John Porto1d235422015-08-12 12:37:53 -07002104 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07002105 if (DestTy == IceType_i8) {
John Porto7e93c622015-06-23 10:58:57 -07002106 _mov(T, Src0RM);
2107 } else {
John Porto1d235422015-08-12 12:37:53 -07002108 assert(DestTy != IceType_i1);
2109 assert(Traits::Is64Bit || DestTy != IceType_i64);
John Porto7e93c622015-06-23 10:58:57 -07002110 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
John Porto1d235422015-08-12 12:37:53 -07002111 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
2112 // written to the stack (i.e., in -Om1) will be fully zero-extended.
2113 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07002114 _movzx(T, Src0RM);
2115 }
John Porto7e93c622015-06-23 10:58:57 -07002116 _mov(Dest, T);
2117 } else {
2118 // t1 = movzx src; dst = t1
2119 Variable *T = makeReg(Dest->getType());
2120 _movzx(T, Src0RM);
2121 _mov(Dest, T);
2122 }
2123 break;
2124 }
2125 case InstCast::Trunc: {
2126 if (isVectorType(Dest->getType())) {
2127 // onemask = materialize(1,1,...); dst = src & onemask
2128 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2129 Type Src0Ty = Src0RM->getType();
2130 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2131 Variable *T = makeReg(Dest->getType());
2132 _movp(T, Src0RM);
2133 _pand(T, OneMask);
2134 _movp(Dest, T);
2135 } else {
Jan Voungfbdd2442015-07-15 12:36:20 -07002136 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
John Porto1d235422015-08-12 12:37:53 -07002137 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -07002138 Src0 = loOperand(Src0);
2139 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2140 // t1 = trunc Src0RM; Dest = t1
2141 Variable *T = nullptr;
2142 _mov(T, Src0RM);
2143 if (Dest->getType() == IceType_i1)
2144 _and(T, Ctx->getConstantInt1(1));
2145 _mov(Dest, T);
2146 }
2147 break;
2148 }
2149 case InstCast::Fptrunc:
2150 case InstCast::Fpext: {
2151 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2152 // t1 = cvt Src0RM; Dest = t1
2153 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002154 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002155 _mov(Dest, T);
2156 break;
2157 }
2158 case InstCast::Fptosi:
2159 if (isVectorType(Dest->getType())) {
2160 assert(Dest->getType() == IceType_v4i32 &&
2161 Inst->getSrc(0)->getType() == IceType_v4f32);
2162 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002163 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002164 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002165 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002166 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002167 _movp(Dest, T);
John Porto1d235422015-08-12 12:37:53 -07002168 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002169 constexpr SizeT MaxSrcs = 1;
John Porto7e93c622015-06-23 10:58:57 -07002170 Type SrcType = Inst->getSrc(0)->getType();
2171 InstCall *Call =
2172 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2173 : H_fptosi_f64_i64,
2174 Dest, MaxSrcs);
2175 Call->addArg(Inst->getSrc(0));
2176 lowerCall(Call);
2177 } else {
2178 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2179 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002180 Variable *T_1 = nullptr;
2181 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
2182 T_1 = makeReg(IceType_i64);
2183 } else {
2184 assert(Dest->getType() != IceType_i64);
2185 T_1 = makeReg(IceType_i32);
2186 }
2187 // cvt() requires its integer argument to be a GPR.
John Porto7e93c622015-06-23 10:58:57 -07002188 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002189 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2191 if (Dest->getType() == IceType_i1)
2192 _and(T_2, Ctx->getConstantInt1(1));
2193 _mov(Dest, T_2);
2194 }
2195 break;
2196 case InstCast::Fptoui:
2197 if (isVectorType(Dest->getType())) {
2198 assert(Dest->getType() == IceType_v4i32 &&
2199 Inst->getSrc(0)->getType() == IceType_v4f32);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002200 constexpr SizeT MaxSrcs = 1;
John Porto7e93c622015-06-23 10:58:57 -07002201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2202 Call->addArg(Inst->getSrc(0));
2203 lowerCall(Call);
2204 } else if (Dest->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07002205 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
John Porto7e93c622015-06-23 10:58:57 -07002206 // Use a helper for both x86-32 and x86-64.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002207 constexpr SizeT MaxSrcs = 1;
John Porto7e93c622015-06-23 10:58:57 -07002208 Type DestType = Dest->getType();
2209 Type SrcType = Inst->getSrc(0)->getType();
2210 IceString TargetString;
John Porto1d235422015-08-12 12:37:53 -07002211 if (Traits::Is64Bit) {
2212 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2213 : H_fptoui_f64_i64;
2214 } else if (isInt32Asserting32Or64(DestType)) {
John Porto7e93c622015-06-23 10:58:57 -07002215 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2216 : H_fptoui_f64_i32;
2217 } else {
2218 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2219 : H_fptoui_f64_i64;
2220 }
2221 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2222 Call->addArg(Inst->getSrc(0));
2223 lowerCall(Call);
2224 return;
2225 } else {
2226 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2227 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002228 assert(Dest->getType() != IceType_i64);
2229 Variable *T_1 = nullptr;
2230 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
2231 T_1 = makeReg(IceType_i64);
2232 } else {
2233 assert(Dest->getType() != IceType_i32);
2234 T_1 = makeReg(IceType_i32);
2235 }
John Porto7e93c622015-06-23 10:58:57 -07002236 Variable *T_2 = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002237 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002238 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2239 if (Dest->getType() == IceType_i1)
2240 _and(T_2, Ctx->getConstantInt1(1));
2241 _mov(Dest, T_2);
2242 }
2243 break;
2244 case InstCast::Sitofp:
2245 if (isVectorType(Dest->getType())) {
2246 assert(Dest->getType() == IceType_v4f32 &&
2247 Inst->getSrc(0)->getType() == IceType_v4i32);
2248 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002249 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002250 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002251 Variable *T = makeReg(Dest->getType());
John Porto921856d2015-07-07 11:56:26 -07002252 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07002253 _movp(Dest, T);
John Porto1d235422015-08-12 12:37:53 -07002254 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002255 // Use a helper for x86-32.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002256 constexpr SizeT MaxSrcs = 1;
John Porto7e93c622015-06-23 10:58:57 -07002257 Type DestType = Dest->getType();
2258 InstCall *Call =
2259 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2260 : H_sitofp_i64_f64,
2261 Dest, MaxSrcs);
2262 // TODO: Call the correct compiler-rt helper function.
2263 Call->addArg(Inst->getSrc(0));
2264 lowerCall(Call);
2265 return;
2266 } else {
2267 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2268 // Sign-extend the operand.
2269 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07002270 Variable *T_1 = nullptr;
2271 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2272 T_1 = makeReg(IceType_i64);
2273 } else {
2274 assert(Src0RM->getType() != IceType_i64);
2275 T_1 = makeReg(IceType_i32);
2276 }
John Porto7e93c622015-06-23 10:58:57 -07002277 Variable *T_2 = makeReg(Dest->getType());
John Porto1d235422015-08-12 12:37:53 -07002278 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07002279 _mov(T_1, Src0RM);
2280 else
2281 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002282 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002283 _mov(Dest, T_2);
2284 }
2285 break;
2286 case InstCast::Uitofp: {
2287 Operand *Src0 = Inst->getSrc(0);
2288 if (isVectorType(Src0->getType())) {
2289 assert(Dest->getType() == IceType_v4f32 &&
2290 Src0->getType() == IceType_v4i32);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002291 constexpr SizeT MaxSrcs = 1;
John Porto7e93c622015-06-23 10:58:57 -07002292 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2293 Call->addArg(Src0);
2294 lowerCall(Call);
2295 } else if (Src0->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07002296 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
Andrew Scull57e12682015-09-16 11:30:19 -07002297 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
2298 // x86-32.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002299 constexpr SizeT MaxSrcs = 1;
John Porto7e93c622015-06-23 10:58:57 -07002300 Type DestType = Dest->getType();
2301 IceString TargetString;
2302 if (isInt32Asserting32Or64(Src0->getType())) {
2303 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2304 : H_uitofp_i32_f64;
2305 } else {
2306 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2307 : H_uitofp_i64_f64;
2308 }
2309 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2310 Call->addArg(Src0);
2311 lowerCall(Call);
2312 return;
2313 } else {
2314 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2315 // Zero-extend the operand.
2316 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07002317 Variable *T_1 = nullptr;
2318 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2319 T_1 = makeReg(IceType_i64);
2320 } else {
2321 assert(Src0RM->getType() != IceType_i64);
2322 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2323 T_1 = makeReg(IceType_i32);
2324 }
John Porto7e93c622015-06-23 10:58:57 -07002325 Variable *T_2 = makeReg(Dest->getType());
John Porto1d235422015-08-12 12:37:53 -07002326 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07002327 _mov(T_1, Src0RM);
2328 else
2329 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002330 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002331 _mov(Dest, T_2);
2332 }
2333 break;
2334 }
2335 case InstCast::Bitcast: {
2336 Operand *Src0 = Inst->getSrc(0);
2337 if (Dest->getType() == Src0->getType()) {
2338 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2339 lowerAssign(Assign);
2340 return;
2341 }
2342 switch (Dest->getType()) {
2343 default:
2344 llvm_unreachable("Unexpected Bitcast dest type");
2345 case IceType_i8: {
2346 assert(Src0->getType() == IceType_v8i1);
2347 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2348 Call->addArg(Src0);
2349 lowerCall(Call);
2350 } break;
2351 case IceType_i16: {
2352 assert(Src0->getType() == IceType_v16i1);
2353 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2354 Call->addArg(Src0);
2355 lowerCall(Call);
2356 } break;
2357 case IceType_i32:
2358 case IceType_f32: {
2359 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2360 Type DestType = Dest->getType();
2361 Type SrcType = Src0RM->getType();
2362 (void)DestType;
2363 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2364 (DestType == IceType_f32 && SrcType == IceType_i32));
2365 // a.i32 = bitcast b.f32 ==>
2366 // t.f32 = b.f32
2367 // s.f32 = spill t.f32
2368 // a.i32 = s.f32
2369 Variable *T = nullptr;
2370 // TODO: Should be able to force a spill setup by calling legalize() with
2371 // Legal_Mem and not Legal_Reg or Legal_Imm.
John Porto921856d2015-07-07 11:56:26 -07002372 typename Traits::SpillVariable *SpillVar =
John Porto5aeed952015-07-21 13:39:09 -07002373 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
John Porto7e93c622015-06-23 10:58:57 -07002374 SpillVar->setLinkedTo(Dest);
2375 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07002376 Spill->setMustNotHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07002377 _mov(T, Src0RM);
2378 _mov(Spill, T);
2379 _mov(Dest, Spill);
2380 } break;
2381 case IceType_i64: {
John Porto1d235422015-08-12 12:37:53 -07002382 assert(Src0->getType() == IceType_f64);
2383 if (Traits::Is64Bit) {
2384 // Movd requires its fp argument (in this case, the bitcast source) to
2385 // be an xmm register.
2386 Variable *Src0R = legalizeToReg(Src0);
2387 Variable *T = makeReg(IceType_i64);
2388 _movd(T, Src0R);
2389 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002390 } else {
John Porto1d235422015-08-12 12:37:53 -07002391 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2392 // a.i64 = bitcast b.f64 ==>
2393 // s.f64 = spill b.f64
2394 // t_lo.i32 = lo(s.f64)
2395 // a_lo.i32 = t_lo.i32
2396 // t_hi.i32 = hi(s.f64)
2397 // a_hi.i32 = t_hi.i32
2398 Operand *SpillLo, *SpillHi;
2399 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2400 typename Traits::SpillVariable *SpillVar =
2401 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2402 SpillVar->setLinkedTo(Src0Var);
2403 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07002404 Spill->setMustNotHaveReg();
John Porto1d235422015-08-12 12:37:53 -07002405 _movq(Spill, Src0RM);
2406 SpillLo = Traits::VariableSplit::create(Func, Spill,
2407 Traits::VariableSplit::Low);
2408 SpillHi = Traits::VariableSplit::create(Func, Spill,
2409 Traits::VariableSplit::High);
2410 } else {
2411 SpillLo = loOperand(Src0RM);
2412 SpillHi = hiOperand(Src0RM);
2413 }
2414
2415 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2416 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2417 Variable *T_Lo = makeReg(IceType_i32);
2418 Variable *T_Hi = makeReg(IceType_i32);
2419
2420 _mov(T_Lo, SpillLo);
2421 _mov(DestLo, T_Lo);
2422 _mov(T_Hi, SpillHi);
2423 _mov(DestHi, T_Hi);
John Porto7e93c622015-06-23 10:58:57 -07002424 }
John Porto7e93c622015-06-23 10:58:57 -07002425 } break;
2426 case IceType_f64: {
John Porto7e93c622015-06-23 10:58:57 -07002427 assert(Src0->getType() == IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002428 if (Traits::Is64Bit) {
2429 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2430 Variable *T = makeReg(IceType_f64);
Andrew Scull57e12682015-09-16 11:30:19 -07002431 // Movd requires its fp argument (in this case, the bitcast
2432 // destination) to be an xmm register.
John Porto1d235422015-08-12 12:37:53 -07002433 _movd(T, Src0RM);
2434 _mov(Dest, T);
2435 } else {
2436 Src0 = legalize(Src0);
2437 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2438 Variable *T = Func->makeVariable(Dest->getType());
2439 _movq(T, Src0);
2440 _movq(Dest, T);
2441 break;
2442 }
2443 // a.f64 = bitcast b.i64 ==>
2444 // t_lo.i32 = b_lo.i32
2445 // FakeDef(s.f64)
2446 // lo(s.f64) = t_lo.i32
2447 // t_hi.i32 = b_hi.i32
2448 // hi(s.f64) = t_hi.i32
2449 // a.f64 = s.f64
2450 typename Traits::SpillVariable *SpillVar =
2451 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2452 SpillVar->setLinkedTo(Dest);
2453 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07002454 Spill->setMustNotHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07002455
John Porto1d235422015-08-12 12:37:53 -07002456 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2457 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2458 Func, Spill, Traits::VariableSplit::Low);
2459 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2460 Func, Spill, Traits::VariableSplit::High);
2461 _mov(T_Lo, loOperand(Src0));
2462 // Technically, the Spill is defined after the _store happens, but
Andrew Scull57e12682015-09-16 11:30:19 -07002463 // SpillLo is considered a "use" of Spill so define Spill before it is
2464 // used.
John Porto1d235422015-08-12 12:37:53 -07002465 Context.insert(InstFakeDef::create(Func, Spill));
2466 _store(T_Lo, SpillLo);
2467 _mov(T_Hi, hiOperand(Src0));
2468 _store(T_Hi, SpillHi);
2469 _movq(Dest, Spill);
2470 }
John Porto7e93c622015-06-23 10:58:57 -07002471 } break;
2472 case IceType_v8i1: {
2473 assert(Src0->getType() == IceType_i8);
2474 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002475 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002476 // Arguments to functions are required to be at least 32 bits wide.
2477 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2478 Call->addArg(Src0AsI32);
2479 lowerCall(Call);
2480 } break;
2481 case IceType_v16i1: {
2482 assert(Src0->getType() == IceType_i16);
2483 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
John Porto5aeed952015-07-21 13:39:09 -07002484 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
John Porto7e93c622015-06-23 10:58:57 -07002485 // Arguments to functions are required to be at least 32 bits wide.
2486 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2487 Call->addArg(Src0AsI32);
2488 lowerCall(Call);
2489 } break;
2490 case IceType_v8i16:
2491 case IceType_v16i8:
2492 case IceType_v4i32:
2493 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07002494 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07002495 } break;
2496 }
2497 break;
2498 }
2499 }
2500}
2501
2502template <class Machine>
2503void TargetX86Base<Machine>::lowerExtractElement(
2504 const InstExtractElement *Inst) {
2505 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2506 ConstantInteger32 *ElementIndex =
2507 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2508 // Only constant indices are allowed in PNaCl IR.
2509 assert(ElementIndex);
2510
2511 unsigned Index = ElementIndex->getValue();
2512 Type Ty = SourceVectNotLegalized->getType();
2513 Type ElementTy = typeElementType(Ty);
2514 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002515
2516 // TODO(wala): Determine the best lowering sequences for each type.
2517 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002518 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
2519 Variable *ExtractedElementR =
2520 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
2521 if (CanUsePextr) {
2522 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
2523 // bits of the destination register, so we represent this by always
2524 // extracting into an i32 register. The _mov into Dest below will do
2525 // truncation as necessary.
John Porto7e93c622015-06-23 10:58:57 -07002526 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07002527 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002528 _pextr(ExtractedElementR, SourceVectR, Mask);
2529 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2530 // Use pshufd and movd/movss.
2531 Variable *T = nullptr;
2532 if (Index) {
Andrew Scull57e12682015-09-16 11:30:19 -07002533 // The shuffle only needs to occur if the element to be extracted is not
2534 // at the lowest index.
John Porto7e93c622015-06-23 10:58:57 -07002535 Constant *Mask = Ctx->getConstantInt32(Index);
2536 T = makeReg(Ty);
2537 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2538 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07002539 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07002540 }
2541
2542 if (InVectorElementTy == IceType_i32) {
2543 _movd(ExtractedElementR, T);
2544 } else { // Ty == IceType_f32
Andrew Scull57e12682015-09-16 11:30:19 -07002545 // TODO(wala): _movss is only used here because _mov does not allow a
2546 // vector source and a scalar destination. _mov should be able to be
2547 // used here.
2548 // _movss is a binary instruction, so the FakeDef is needed to keep the
2549 // live range analysis consistent.
John Porto7e93c622015-06-23 10:58:57 -07002550 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2551 _movss(ExtractedElementR, T);
2552 }
2553 } else {
2554 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2555 // Spill the value to a stack slot and do the extraction in memory.
2556 //
Andrew Scull57e12682015-09-16 11:30:19 -07002557 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
2558 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07002559 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07002560 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07002561 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07002562
2563 // Compute the location of the element in memory.
2564 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07002565 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07002566 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2567 _mov(ExtractedElementR, Loc);
2568 }
2569
2570 if (ElementTy == IceType_i1) {
2571 // Truncate extracted integers to i1s if necessary.
2572 Variable *T = makeReg(IceType_i1);
2573 InstCast *Cast =
2574 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2575 lowerCast(Cast);
2576 ExtractedElementR = T;
2577 }
2578
2579 // Copy the element to the destination.
2580 Variable *Dest = Inst->getDest();
2581 _mov(Dest, ExtractedElementR);
2582}
2583
2584template <class Machine>
2585void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
2586 Operand *Src0 = Inst->getSrc(0);
2587 Operand *Src1 = Inst->getSrc(1);
2588 Variable *Dest = Inst->getDest();
2589
2590 if (isVectorType(Dest->getType())) {
2591 InstFcmp::FCond Condition = Inst->getCondition();
2592 size_t Index = static_cast<size_t>(Condition);
2593 assert(Index < Traits::TableFcmpSize);
2594
2595 if (Traits::TableFcmp[Index].SwapVectorOperands) {
2596 Operand *T = Src0;
2597 Src0 = Src1;
2598 Src1 = T;
2599 }
2600
2601 Variable *T = nullptr;
2602
2603 if (Condition == InstFcmp::True) {
2604 // makeVectorOfOnes() requires an integer vector type.
2605 T = makeVectorOfMinusOnes(IceType_v4i32);
2606 } else if (Condition == InstFcmp::False) {
2607 T = makeVectorOfZeros(Dest->getType());
2608 } else {
2609 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2610 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto921856d2015-07-07 11:56:26 -07002611 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002612 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002613
2614 switch (Condition) {
2615 default: {
John Porto5d0acff2015-06-30 15:29:21 -07002616 typename Traits::Cond::CmppsCond Predicate =
2617 Traits::TableFcmp[Index].Predicate;
2618 assert(Predicate != Traits::Cond::Cmpps_Invalid);
John Porto7e93c622015-06-23 10:58:57 -07002619 T = makeReg(Src0RM->getType());
2620 _movp(T, Src0RM);
2621 _cmpps(T, Src1RM, Predicate);
2622 } break;
2623 case InstFcmp::One: {
2624 // Check both unequal and ordered.
2625 T = makeReg(Src0RM->getType());
2626 Variable *T2 = makeReg(Src0RM->getType());
2627 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002628 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
John Porto7e93c622015-06-23 10:58:57 -07002629 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002630 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
John Porto7e93c622015-06-23 10:58:57 -07002631 _pand(T, T2);
2632 } break;
2633 case InstFcmp::Ueq: {
2634 // Check both equal or unordered.
2635 T = makeReg(Src0RM->getType());
2636 Variable *T2 = makeReg(Src0RM->getType());
2637 _movp(T, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002638 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
John Porto7e93c622015-06-23 10:58:57 -07002639 _movp(T2, Src0RM);
John Porto5d0acff2015-06-30 15:29:21 -07002640 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
John Porto7e93c622015-06-23 10:58:57 -07002641 _por(T, T2);
2642 } break;
2643 }
2644 }
2645
2646 _movp(Dest, T);
2647 eliminateNextVectorSextInstruction(Dest);
2648 return;
2649 }
2650
2651 // Lowering a = fcmp cond, b, c
2652 // ucomiss b, c /* only if C1 != Br_None */
2653 // /* but swap b,c order if SwapOperands==true */
2654 // mov a, <default>
2655 // j<C1> label /* only if C1 != Br_None */
2656 // j<C2> label /* only if C2 != Br_None */
2657 // FakeUse(a) /* only if C1 != Br_None */
2658 // mov a, !<default> /* only if C1 != Br_None */
2659 // label: /* only if C1 != Br_None */
2660 //
2661 // setcc lowering when C1 != Br_None && C2 == Br_None:
2662 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2663 // setcc a, C1
2664 InstFcmp::FCond Condition = Inst->getCondition();
2665 size_t Index = static_cast<size_t>(Condition);
2666 assert(Index < Traits::TableFcmpSize);
2667 if (Traits::TableFcmp[Index].SwapScalarOperands)
2668 std::swap(Src0, Src1);
John Porto5d0acff2015-06-30 15:29:21 -07002669 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2670 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07002671 if (HasC1) {
2672 Src0 = legalize(Src0);
2673 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2674 Variable *T = nullptr;
2675 _mov(T, Src0);
2676 _ucomiss(T, Src1RM);
2677 if (!HasC2) {
2678 assert(Traits::TableFcmp[Index].Default);
2679 _setcc(Dest, Traits::TableFcmp[Index].C1);
2680 return;
2681 }
2682 }
Jim Stichnoth1fb030c2015-10-15 11:10:38 -07002683 Constant *Default =
2684 Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default);
John Porto7e93c622015-06-23 10:58:57 -07002685 _mov(Dest, Default);
2686 if (HasC1) {
John Porto921856d2015-07-07 11:56:26 -07002687 typename Traits::Insts::Label *Label =
2688 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07002689 _br(Traits::TableFcmp[Index].C1, Label);
2690 if (HasC2) {
2691 _br(Traits::TableFcmp[Index].C2, Label);
2692 }
2693 Constant *NonDefault =
Jim Stichnoth1fb030c2015-10-15 11:10:38 -07002694 Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default);
Jim Stichnoth230d4102015-09-25 17:40:32 -07002695 _mov_redefined(Dest, NonDefault);
John Porto7e93c622015-06-23 10:58:57 -07002696 Context.insert(Label);
2697 }
2698}
2699
David Sehr5c875422015-10-15 10:38:53 -07002700inline bool isZero(const Operand *Opnd) {
2701 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
2702 return C64->getValue() == 0;
2703 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
2704 return C32->getValue() == 0;
2705 return false;
2706}
2707
John Porto7e93c622015-06-23 10:58:57 -07002708template <class Machine>
2709void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
David Sehrd9810252015-10-16 13:23:17 -07002710 constexpr InstBr *Br = nullptr;
2711 lowerIcmpAndBr(Inst, Br);
2712}
2713
2714template <class Machine>
2715void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp,
2716 const InstBr *Br) {
2717 Operand *Src0 = legalize(Icmp->getSrc(0));
2718 Operand *Src1 = legalize(Icmp->getSrc(1));
2719 Variable *Dest = Icmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07002720
2721 if (isVectorType(Dest->getType())) {
David Sehrd9810252015-10-16 13:23:17 -07002722 if (Br)
2723 llvm::report_fatal_error("vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07002724 Type Ty = Src0->getType();
2725 // Promote i1 vectors to 128 bit integer vector types.
2726 if (typeElementType(Ty) == IceType_i1) {
2727 Type NewTy = IceType_NUM;
2728 switch (Ty) {
2729 default:
2730 llvm_unreachable("unexpected type");
2731 break;
2732 case IceType_v4i1:
2733 NewTy = IceType_v4i32;
2734 break;
2735 case IceType_v8i1:
2736 NewTy = IceType_v8i16;
2737 break;
2738 case IceType_v16i1:
2739 NewTy = IceType_v16i8;
2740 break;
2741 }
John Porto5aeed952015-07-21 13:39:09 -07002742 Variable *NewSrc0 = Func->makeVariable(NewTy);
2743 Variable *NewSrc1 = Func->makeVariable(NewTy);
John Porto7e93c622015-06-23 10:58:57 -07002744 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2745 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2746 Src0 = NewSrc0;
2747 Src1 = NewSrc1;
2748 Ty = NewTy;
2749 }
2750
David Sehrd9810252015-10-16 13:23:17 -07002751 InstIcmp::ICond Condition = Icmp->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07002752
2753 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2754 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2755
Andrew Scull57e12682015-09-16 11:30:19 -07002756 // SSE2 only has signed comparison operations. Transform unsigned inputs in
2757 // a manner that allows for the use of signed comparison operations by
2758 // flipping the high order bits.
John Porto7e93c622015-06-23 10:58:57 -07002759 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2760 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2761 Variable *T0 = makeReg(Ty);
2762 Variable *T1 = makeReg(Ty);
2763 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2764 _movp(T0, Src0RM);
2765 _pxor(T0, HighOrderBits);
2766 _movp(T1, Src1RM);
2767 _pxor(T1, HighOrderBits);
2768 Src0RM = T0;
2769 Src1RM = T1;
2770 }
2771
2772 Variable *T = makeReg(Ty);
2773 switch (Condition) {
2774 default:
2775 llvm_unreachable("unexpected condition");
2776 break;
2777 case InstIcmp::Eq: {
John Porto921856d2015-07-07 11:56:26 -07002778 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002779 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002780 _movp(T, Src0RM);
2781 _pcmpeq(T, Src1RM);
2782 } break;
2783 case InstIcmp::Ne: {
John Porto921856d2015-07-07 11:56:26 -07002784 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002785 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002786 _movp(T, Src0RM);
2787 _pcmpeq(T, Src1RM);
2788 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2789 _pxor(T, MinusOne);
2790 } break;
2791 case InstIcmp::Ugt:
2792 case InstIcmp::Sgt: {
John Porto921856d2015-07-07 11:56:26 -07002793 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002794 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002795 _movp(T, Src0RM);
2796 _pcmpgt(T, Src1RM);
2797 } break;
2798 case InstIcmp::Uge:
2799 case InstIcmp::Sge: {
2800 // !(Src1RM > Src0RM)
John Porto921856d2015-07-07 11:56:26 -07002801 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002802 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002803 _movp(T, Src1RM);
2804 _pcmpgt(T, Src0RM);
2805 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2806 _pxor(T, MinusOne);
2807 } break;
2808 case InstIcmp::Ult:
2809 case InstIcmp::Slt: {
John Porto921856d2015-07-07 11:56:26 -07002810 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002811 Src0RM = legalizeToReg(Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002812 _movp(T, Src1RM);
2813 _pcmpgt(T, Src0RM);
2814 } break;
2815 case InstIcmp::Ule:
2816 case InstIcmp::Sle: {
2817 // !(Src0RM > Src1RM)
John Porto921856d2015-07-07 11:56:26 -07002818 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002819 Src1RM = legalizeToReg(Src1RM);
John Porto7e93c622015-06-23 10:58:57 -07002820 _movp(T, Src0RM);
2821 _pcmpgt(T, Src1RM);
2822 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2823 _pxor(T, MinusOne);
2824 } break;
2825 }
2826
2827 _movp(Dest, T);
2828 eliminateNextVectorSextInstruction(Dest);
2829 return;
2830 }
2831
John Porto1d235422015-08-12 12:37:53 -07002832 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
David Sehrd9810252015-10-16 13:23:17 -07002833 lowerIcmp64(Icmp, Br);
John Porto7e93c622015-06-23 10:58:57 -07002834 return;
2835 }
2836
2837 // cmp b, c
David Sehr5c875422015-10-15 10:38:53 -07002838 if (isZero(Src1)) {
David Sehrd9810252015-10-16 13:23:17 -07002839 switch (Icmp->getCondition()) {
David Sehr5c875422015-10-15 10:38:53 -07002840 default:
2841 break;
2842 case InstIcmp::Uge:
David Sehrd9810252015-10-16 13:23:17 -07002843 movOrBr(true, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002844 return;
2845 case InstIcmp::Ult:
David Sehrd9810252015-10-16 13:23:17 -07002846 movOrBr(false, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002847 return;
2848 }
2849 }
John Porto7e93c622015-06-23 10:58:57 -07002850 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2851 _cmp(Src0RM, Src1);
David Sehrd9810252015-10-16 13:23:17 -07002852 setccOrBr(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, Br);
John Porto7e93c622015-06-23 10:58:57 -07002853}
2854
John Porto1d235422015-08-12 12:37:53 -07002855template <typename Machine>
2856template <typename T>
2857typename std::enable_if<!T::Is64Bit, void>::type
David Sehrd9810252015-10-16 13:23:17 -07002858TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) {
John Porto1d235422015-08-12 12:37:53 -07002859 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
David Sehrd9810252015-10-16 13:23:17 -07002860 Operand *Src0 = legalize(Icmp->getSrc(0));
2861 Operand *Src1 = legalize(Icmp->getSrc(1));
2862 Variable *Dest = Icmp->getDest();
2863 InstIcmp::ICond Condition = Icmp->getCondition();
John Porto1d235422015-08-12 12:37:53 -07002864 size_t Index = static_cast<size_t>(Condition);
2865 assert(Index < Traits::TableIcmp64Size);
David Sehr5c875422015-10-15 10:38:53 -07002866 Operand *Src0LoRM = nullptr;
2867 Operand *Src0HiRM = nullptr;
2868 // Legalize the portions of Src0 that are going to be needed.
2869 if (isZero(Src1)) {
2870 switch (Condition) {
2871 default:
2872 llvm_unreachable("unexpected condition");
2873 break;
2874 // These two are not optimized, so we fall through to the general case,
2875 // which needs the upper and lower halves legalized.
2876 case InstIcmp::Sgt:
2877 case InstIcmp::Sle:
Jim Stichnoth1fb030c2015-10-15 11:10:38 -07002878 // These four compare after performing an "or" of the high and low half, so
2879 // they need the upper and lower halves legalized.
David Sehr5c875422015-10-15 10:38:53 -07002880 case InstIcmp::Eq:
2881 case InstIcmp::Ule:
2882 case InstIcmp::Ne:
2883 case InstIcmp::Ugt:
2884 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2885 // These two test only the high half's sign bit, so they need only
2886 // the upper half legalized.
2887 case InstIcmp::Sge:
2888 case InstIcmp::Slt:
2889 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2890 break;
2891
2892 // These two move constants and hence need no legalization.
2893 case InstIcmp::Uge:
2894 case InstIcmp::Ult:
2895 break;
2896 }
2897 } else {
2898 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2899 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2900 }
2901 // Optimize comparisons with zero.
2902 if (isZero(Src1)) {
2903 Constant *SignMask = Ctx->getConstantInt32(0x80000000);
2904 Variable *Temp = nullptr;
2905 switch (Condition) {
2906 default:
2907 llvm_unreachable("unexpected condition");
2908 break;
2909 case InstIcmp::Eq:
2910 case InstIcmp::Ule:
David Sehraa0b1a12015-10-27 16:55:40 -07002911 // Mov Src0HiRM first, because it was legalized most recently, and will
2912 // sometimes avoid a move before the OR.
2913 _mov(Temp, Src0HiRM);
2914 _or(Temp, Src0LoRM);
David Sehr5c875422015-10-15 10:38:53 -07002915 Context.insert(InstFakeUse::create(Func, Temp));
David Sehrd9810252015-10-16 13:23:17 -07002916 setccOrBr(Traits::Cond::Br_e, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002917 return;
2918 case InstIcmp::Ne:
2919 case InstIcmp::Ugt:
David Sehraa0b1a12015-10-27 16:55:40 -07002920 // Mov Src0HiRM first, because it was legalized most recently, and will
2921 // sometimes avoid a move before the OR.
2922 _mov(Temp, Src0HiRM);
2923 _or(Temp, Src0LoRM);
David Sehr5c875422015-10-15 10:38:53 -07002924 Context.insert(InstFakeUse::create(Func, Temp));
David Sehrd9810252015-10-16 13:23:17 -07002925 setccOrBr(Traits::Cond::Br_ne, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002926 return;
2927 case InstIcmp::Uge:
David Sehrd9810252015-10-16 13:23:17 -07002928 movOrBr(true, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002929 return;
2930 case InstIcmp::Ult:
David Sehrd9810252015-10-16 13:23:17 -07002931 movOrBr(false, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002932 return;
2933 case InstIcmp::Sgt:
2934 break;
2935 case InstIcmp::Sge:
2936 _test(Src0HiRM, SignMask);
David Sehrd9810252015-10-16 13:23:17 -07002937 setccOrBr(Traits::Cond::Br_e, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002938 return;
2939 case InstIcmp::Slt:
2940 _test(Src0HiRM, SignMask);
David Sehrd9810252015-10-16 13:23:17 -07002941 setccOrBr(Traits::Cond::Br_ne, Dest, Br);
David Sehr5c875422015-10-15 10:38:53 -07002942 return;
2943 case InstIcmp::Sle:
2944 break;
2945 }
2946 }
2947 // Handle general compares.
2948 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2949 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
David Sehrd9810252015-10-16 13:23:17 -07002950 if (Br == nullptr) {
2951 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
2952 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
2953 typename Traits::Insts::Label *LabelFalse =
2954 Traits::Insts::Label::create(Func, this);
2955 typename Traits::Insts::Label *LabelTrue =
2956 Traits::Insts::Label::create(Func, this);
2957 _mov(Dest, One);
2958 _cmp(Src0HiRM, Src1HiRI);
2959 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2960 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2961 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2962 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2963 _cmp(Src0LoRM, Src1LoRI);
2964 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2965 Context.insert(LabelFalse);
2966 _mov_redefined(Dest, Zero);
2967 Context.insert(LabelTrue);
2968 } else {
2969 _cmp(Src0HiRM, Src1HiRI);
2970 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2971 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue());
2972 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2973 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse());
2974 _cmp(Src0LoRM, Src1LoRI);
2975 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(),
2976 Br->getTargetFalse());
2977 }
2978}
2979
2980template <class Machine>
2981void TargetX86Base<Machine>::setccOrBr(typename Traits::Cond::BrCond Condition,
2982 Variable *Dest, const InstBr *Br) {
2983 if (Br == nullptr) {
2984 _setcc(Dest, Condition);
2985 } else {
2986 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
2987 }
2988}
2989
2990template <class Machine>
2991void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest,
2992 const InstBr *Br) {
2993 if (Br == nullptr) {
2994 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
2995 } else {
2996 // TODO(sehr,stichnot): This could be done with a single unconditional
2997 // branch instruction, but subzero doesn't know how to handle the resulting
2998 // control flow graph changes now. Make it do so to eliminate mov and cmp.
2999 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
3000 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
3001 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3002 }
John Porto1d235422015-08-12 12:37:53 -07003003}
3004
John Porto7e93c622015-06-23 10:58:57 -07003005template <class Machine>
3006void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
3007 Operand *SourceVectNotLegalized = Inst->getSrc(0);
3008 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
3009 ConstantInteger32 *ElementIndex =
3010 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
3011 // Only constant indices are allowed in PNaCl IR.
3012 assert(ElementIndex);
3013 unsigned Index = ElementIndex->getValue();
3014 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3015
3016 Type Ty = SourceVectNotLegalized->getType();
3017 Type ElementTy = typeElementType(Ty);
3018 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3019
3020 if (ElementTy == IceType_i1) {
Andrew Scull57e12682015-09-16 11:30:19 -07003021 // Expand the element to the appropriate size for it to be inserted in the
3022 // vector.
John Porto5aeed952015-07-21 13:39:09 -07003023 Variable *Expanded = Func->makeVariable(InVectorElementTy);
John Porto7e93c622015-06-23 10:58:57 -07003024 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3025 ElementToInsertNotLegalized);
3026 lowerCast(Cast);
3027 ElementToInsertNotLegalized = Expanded;
3028 }
3029
3030 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07003031 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07003032 // Use insertps, pinsrb, pinsrw, or pinsrd.
3033 Operand *ElementRM =
3034 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3035 Operand *SourceVectRM =
3036 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3037 Variable *T = makeReg(Ty);
3038 _movp(T, SourceVectRM);
3039 if (Ty == IceType_v4f32)
3040 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3041 else
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003042 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
3043 // operand is a register, it must be a full r32 register like eax, and not
3044 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
3045 // for the use of r16 and r8 by converting them through getBaseReg(),
3046 // while emitIAS() validates that the original and base register encodings
3047 // are the same. But for an "interior" register like ah, it should
3048 // probably be copied into an r32 via movzx so that the types work out.
John Porto7e93c622015-06-23 10:58:57 -07003049 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
3050 _movp(Inst->getDest(), T);
3051 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3052 // Use shufps or movss.
3053 Variable *ElementR = nullptr;
3054 Operand *SourceVectRM =
3055 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3056
3057 if (InVectorElementTy == IceType_f32) {
3058 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07003059 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003060 } else {
3061 // Copy an integer to an XMM register.
3062 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3063 ElementR = makeReg(Ty);
3064 _movd(ElementR, T);
3065 }
3066
3067 if (Index == 0) {
3068 Variable *T = makeReg(Ty);
3069 _movp(T, SourceVectRM);
3070 _movss(T, ElementR);
3071 _movp(Inst->getDest(), T);
3072 return;
3073 }
3074
Andrew Scull57e12682015-09-16 11:30:19 -07003075 // shufps treats the source and destination operands as vectors of four
3076 // doublewords. The destination's two high doublewords are selected from
3077 // the source operand and the two low doublewords are selected from the
3078 // (original value of) the destination operand. An insertelement operation
3079 // can be effected with a sequence of two shufps operations with
3080 // appropriate masks. In all cases below, Element[0] is being inserted into
3081 // SourceVectOperand. Indices are ordered from left to right.
John Porto7e93c622015-06-23 10:58:57 -07003082 //
3083 // insertelement into index 1 (result is stored in ElementR):
3084 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3085 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
3086 //
3087 // insertelement into index 2 (result is stored in T):
3088 // T := SourceVectRM
3089 // ElementR := ElementR[0, 0] T[0, 3]
3090 // T := T[0, 1] ElementR[0, 3]
3091 //
3092 // insertelement into index 3 (result is stored in T):
3093 // T := SourceVectRM
3094 // ElementR := ElementR[0, 0] T[0, 2]
3095 // T := T[0, 1] ElementR[3, 0]
3096 const unsigned char Mask1[3] = {0, 192, 128};
3097 const unsigned char Mask2[3] = {227, 196, 52};
3098
3099 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3100 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
3101
3102 if (Index == 1) {
3103 _shufps(ElementR, SourceVectRM, Mask1Constant);
3104 _shufps(ElementR, SourceVectRM, Mask2Constant);
3105 _movp(Inst->getDest(), ElementR);
3106 } else {
3107 Variable *T = makeReg(Ty);
3108 _movp(T, SourceVectRM);
3109 _shufps(ElementR, T, Mask1Constant);
3110 _shufps(T, ElementR, Mask2Constant);
3111 _movp(Inst->getDest(), T);
3112 }
3113 } else {
3114 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
Andrew Scull57e12682015-09-16 11:30:19 -07003115 // Spill the value to a stack slot and perform the insertion in memory.
John Porto7e93c622015-06-23 10:58:57 -07003116 //
Andrew Scull57e12682015-09-16 11:30:19 -07003117 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3118 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003119 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003120 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003121 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003122
3123 // Compute the location of the position to insert in memory.
3124 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto921856d2015-07-07 11:56:26 -07003125 typename Traits::X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003126 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07003127 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07003128
3129 Variable *T = makeReg(Ty);
3130 _movp(T, Slot);
3131 _movp(Inst->getDest(), T);
3132 }
3133}
3134
3135template <class Machine>
3136void TargetX86Base<Machine>::lowerIntrinsicCall(
3137 const InstIntrinsicCall *Instr) {
3138 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3139 case Intrinsics::AtomicCmpxchg: {
3140 if (!Intrinsics::isMemoryOrderValid(
3141 ID, getConstantMemoryOrder(Instr->getArg(3)),
3142 getConstantMemoryOrder(Instr->getArg(4)))) {
3143 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3144 return;
3145 }
3146 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07003147 Operand *PtrToMem = legalize(Instr->getArg(0));
3148 Operand *Expected = legalize(Instr->getArg(1));
3149 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003150 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3151 return;
3152 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
3153 return;
3154 }
3155 case Intrinsics::AtomicFence:
3156 if (!Intrinsics::isMemoryOrderValid(
3157 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
3158 Func->setError("Unexpected memory ordering for AtomicFence");
3159 return;
3160 }
3161 _mfence();
3162 return;
3163 case Intrinsics::AtomicFenceAll:
Andrew Scull57e12682015-09-16 11:30:19 -07003164 // NOTE: FenceAll should prevent and load/store from being moved across the
3165 // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
3166 // currently marked coarsely as "HasSideEffects".
John Porto7e93c622015-06-23 10:58:57 -07003167 _mfence();
3168 return;
3169 case Intrinsics::AtomicIsLockFree: {
3170 // X86 is always lock free for 8/16/32/64 bit accesses.
Andrew Scull57e12682015-09-16 11:30:19 -07003171 // TODO(jvoung): Since the result is constant when given a constant byte
3172 // size, this opens up DCE opportunities.
John Porto7e93c622015-06-23 10:58:57 -07003173 Operand *ByteSize = Instr->getArg(0);
3174 Variable *Dest = Instr->getDest();
3175 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
3176 Constant *Result;
3177 switch (CI->getValue()) {
3178 default:
Andrew Scull57e12682015-09-16 11:30:19 -07003179 // Some x86-64 processors support the cmpxchg16b instruction, which can
3180 // make 16-byte operations lock free (when used with the LOCK prefix).
3181 // However, that's not supported in 32-bit mode, so just return 0 even
3182 // for large sizes.
John Porto7e93c622015-06-23 10:58:57 -07003183 Result = Ctx->getConstantZero(IceType_i32);
3184 break;
3185 case 1:
3186 case 2:
3187 case 4:
3188 case 8:
3189 Result = Ctx->getConstantInt32(1);
3190 break;
3191 }
3192 _mov(Dest, Result);
3193 return;
3194 }
3195 // The PNaCl ABI requires the byte size to be a compile-time constant.
3196 Func->setError("AtomicIsLockFree byte size should be compile-time const");
3197 return;
3198 }
3199 case Intrinsics::AtomicLoad: {
Andrew Scull57e12682015-09-16 11:30:19 -07003200 // We require the memory address to be naturally aligned. Given that is the
3201 // case, then normal loads are atomic.
John Porto7e93c622015-06-23 10:58:57 -07003202 if (!Intrinsics::isMemoryOrderValid(
3203 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
3204 Func->setError("Unexpected memory ordering for AtomicLoad");
3205 return;
3206 }
3207 Variable *Dest = Instr->getDest();
Andrew Scull6d47bcd2015-09-17 17:10:05 -07003208 if (!Traits::Is64Bit) {
3209 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) {
3210 // Follow what GCC does and use a movq instead of what lowerLoad()
3211 // normally does (split the load into two). Thus, this skips
3212 // load/arithmetic op folding. Load/arithmetic folding can't happen
3213 // anyway, since this is x86-32 and integer arithmetic only happens on
3214 // 32-bit quantities.
3215 Variable *T = makeReg(IceType_f64);
3216 typename Traits::X86OperandMem *Addr =
3217 formMemoryOperand(Instr->getArg(0), IceType_f64);
3218 _movq(T, Addr);
3219 // Then cast the bits back out of the XMM register to the i64 Dest.
3220 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3221 lowerCast(Cast);
3222 // Make sure that the atomic load isn't elided when unused.
3223 Context.insert(InstFakeUse::create(Func, Dest64On32->getLo()));
3224 Context.insert(InstFakeUse::create(Func, Dest64On32->getHi()));
3225 return;
3226 }
John Porto7e93c622015-06-23 10:58:57 -07003227 }
3228 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
3229 lowerLoad(Load);
3230 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
Andrew Scull57e12682015-09-16 11:30:19 -07003231 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
3232 // the FakeUse on the last-inserted instruction's dest.
John Porto7e93c622015-06-23 10:58:57 -07003233 Context.insert(
3234 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
3235 return;
3236 }
3237 case Intrinsics::AtomicRMW:
3238 if (!Intrinsics::isMemoryOrderValid(
3239 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3240 Func->setError("Unexpected memory ordering for AtomicRMW");
3241 return;
3242 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07003243 lowerAtomicRMW(
3244 Instr->getDest(),
3245 static_cast<uint32_t>(
3246 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3247 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003248 return;
3249 case Intrinsics::AtomicStore: {
3250 if (!Intrinsics::isMemoryOrderValid(
3251 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
3252 Func->setError("Unexpected memory ordering for AtomicStore");
3253 return;
3254 }
Andrew Scull57e12682015-09-16 11:30:19 -07003255 // We require the memory address to be naturally aligned. Given that is the
3256 // case, then normal stores are atomic. Add a fence after the store to make
3257 // it visible.
John Porto7e93c622015-06-23 10:58:57 -07003258 Operand *Value = Instr->getArg(0);
3259 Operand *Ptr = Instr->getArg(1);
John Porto1d235422015-08-12 12:37:53 -07003260 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
Andrew Scull57e12682015-09-16 11:30:19 -07003261 // Use a movq instead of what lowerStore() normally does (split the store
3262 // into two), following what GCC does. Cast the bits from int -> to an
3263 // xmm register first.
John Porto7e93c622015-06-23 10:58:57 -07003264 Variable *T = makeReg(IceType_f64);
3265 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3266 lowerCast(Cast);
3267 // Then store XMM w/ a movq.
John Porto921856d2015-07-07 11:56:26 -07003268 typename Traits::X86OperandMem *Addr =
3269 formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07003270 _storeq(T, Addr);
3271 _mfence();
3272 return;
3273 }
3274 InstStore *Store = InstStore::create(Func, Value, Ptr);
3275 lowerStore(Store);
3276 _mfence();
3277 return;
3278 }
3279 case Intrinsics::Bswap: {
3280 Variable *Dest = Instr->getDest();
3281 Operand *Val = Instr->getArg(0);
Andrew Scull57e12682015-09-16 11:30:19 -07003282 // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
3283 // must be a register. Use rotate left for 16-bit bswap.
John Porto1d235422015-08-12 12:37:53 -07003284 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07003285 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07003286 Variable *T_Lo = legalizeToReg(loOperand(Val));
3287 Variable *T_Hi = legalizeToReg(hiOperand(Val));
John Porto7e93c622015-06-23 10:58:57 -07003288 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3289 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3290 _bswap(T_Lo);
3291 _bswap(T_Hi);
3292 _mov(DestLo, T_Hi);
3293 _mov(DestHi, T_Lo);
John Porto1d235422015-08-12 12:37:53 -07003294 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
3295 Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07003296 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07003297 _bswap(T);
3298 _mov(Dest, T);
3299 } else {
3300 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07003301 Constant *Eight = Ctx->getConstantInt16(8);
3302 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07003303 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07003304 _mov(T, Val);
3305 _rol(T, Eight);
3306 _mov(Dest, T);
3307 }
3308 return;
3309 }
3310 case Intrinsics::Ctpop: {
3311 Variable *Dest = Instr->getDest();
John Porto1d235422015-08-12 12:37:53 -07003312 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07003313 Operand *Val = Instr->getArg(0);
John Porto1d235422015-08-12 12:37:53 -07003314 Type ValTy = Val->getType();
3315 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
3316
3317 if (!Traits::Is64Bit) {
3318 T = Dest;
3319 } else {
3320 T = makeReg(IceType_i64);
3321 if (ValTy == IceType_i32) {
3322 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
3323 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
3324 // ensure we will not have any bits set on Val's upper 32 bits.
3325 Variable *V = makeReg(IceType_i64);
3326 _movzx(V, Val);
3327 Val = V;
3328 }
3329 ValTy = IceType_i64;
3330 }
3331
3332 InstCall *Call = makeHelperCall(
3333 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
John Porto7e93c622015-06-23 10:58:57 -07003334 Call->addArg(Val);
3335 lowerCall(Call);
3336 // The popcount helpers always return 32-bit values, while the intrinsic's
3337 // signature matches the native POPCNT instruction and fills a 64-bit reg
3338 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
3339 // the user doesn't do that in the IR. If the user does that in the IR,
3340 // then this zero'ing instruction is dead and gets optimized out.
John Porto1d235422015-08-12 12:37:53 -07003341 if (!Traits::Is64Bit) {
3342 assert(T == Dest);
3343 if (Val->getType() == IceType_i64) {
3344 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3345 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3346 _mov(DestHi, Zero);
3347 }
3348 } else {
3349 assert(Val->getType() == IceType_i64);
3350 // T is 64 bit. It needs to be copied to dest. We need to:
3351 //
3352 // T_1.32 = trunc T.64 to i32
3353 // T_2.64 = zext T_1.32 to i64
3354 // Dest.<<right_size>> = T_2.<<right_size>>
3355 //
3356 // which ensures the upper 32 bits will always be cleared. Just doing a
3357 //
3358 // mov Dest.32 = trunc T.32 to i32
3359 //
3360 // is dangerous because there's a chance the compiler will optimize this
3361 // copy out. To use _movzx we need two new registers (one 32-, and
3362 // another 64-bit wide.)
3363 Variable *T_1 = makeReg(IceType_i32);
3364 _mov(T_1, T);
3365 Variable *T_2 = makeReg(IceType_i64);
3366 _movzx(T_2, T_1);
3367 _mov(Dest, T_2);
John Porto7e93c622015-06-23 10:58:57 -07003368 }
3369 return;
3370 }
3371 case Intrinsics::Ctlz: {
Andrew Scull57e12682015-09-16 11:30:19 -07003372 // The "is zero undef" parameter is ignored and we always return a
3373 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07003374 Operand *Val = legalize(Instr->getArg(0));
3375 Operand *FirstVal;
3376 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07003377 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003378 FirstVal = loOperand(Val);
3379 SecondVal = hiOperand(Val);
3380 } else {
3381 FirstVal = Val;
3382 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003383 constexpr bool IsCttz = false;
John Porto7e93c622015-06-23 10:58:57 -07003384 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3385 SecondVal);
3386 return;
3387 }
3388 case Intrinsics::Cttz: {
Andrew Scull57e12682015-09-16 11:30:19 -07003389 // The "is zero undef" parameter is ignored and we always return a
3390 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07003391 Operand *Val = legalize(Instr->getArg(0));
3392 Operand *FirstVal;
3393 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07003394 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003395 FirstVal = hiOperand(Val);
3396 SecondVal = loOperand(Val);
3397 } else {
3398 FirstVal = Val;
3399 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003400 constexpr bool IsCttz = true;
John Porto7e93c622015-06-23 10:58:57 -07003401 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3402 SecondVal);
3403 return;
3404 }
3405 case Intrinsics::Fabs: {
3406 Operand *Src = legalize(Instr->getArg(0));
3407 Type Ty = Src->getType();
3408 Variable *Dest = Instr->getDest();
3409 Variable *T = makeVectorOfFabsMask(Ty);
Andrew Scull57e12682015-09-16 11:30:19 -07003410 // The pand instruction operates on an m128 memory operand, so if Src is an
3411 // f32 or f64, we need to make sure it's in a register.
John Porto7e93c622015-06-23 10:58:57 -07003412 if (isVectorType(Ty)) {
John Porto921856d2015-07-07 11:56:26 -07003413 if (llvm::isa<typename Traits::X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07003414 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003415 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003416 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07003417 }
3418 _pand(T, Src);
3419 if (isVectorType(Ty))
3420 _movp(Dest, T);
3421 else
3422 _mov(Dest, T);
3423 return;
3424 }
3425 case Intrinsics::Longjmp: {
3426 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
3427 Call->addArg(Instr->getArg(0));
3428 Call->addArg(Instr->getArg(1));
3429 lowerCall(Call);
3430 return;
3431 }
3432 case Intrinsics::Memcpy: {
Andrew Scull9df4a372015-08-07 09:19:35 -07003433 lowerMemcpy(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003434 return;
3435 }
3436 case Intrinsics::Memmove: {
Andrew Scullcfa628b2015-08-20 14:23:05 -07003437 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003438 return;
3439 }
3440 case Intrinsics::Memset: {
Andrew Scull713dbde2015-08-04 14:25:27 -07003441 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003442 return;
3443 }
3444 case Intrinsics::NaClReadTP: {
3445 if (Ctx->getFlags().getUseSandboxing()) {
John Porto5aeed952015-07-21 13:39:09 -07003446 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07003447 Variable *Dest = Instr->getDest();
3448 Variable *T = nullptr;
3449 _mov(T, Src);
3450 _mov(Dest, T);
3451 } else {
3452 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3453 lowerCall(Call);
3454 }
3455 return;
3456 }
3457 case Intrinsics::Setjmp: {
3458 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
3459 Call->addArg(Instr->getArg(0));
3460 lowerCall(Call);
3461 return;
3462 }
3463 case Intrinsics::Sqrt: {
3464 Operand *Src = legalize(Instr->getArg(0));
3465 Variable *Dest = Instr->getDest();
3466 Variable *T = makeReg(Dest->getType());
3467 _sqrtss(T, Src);
3468 _mov(Dest, T);
3469 return;
3470 }
3471 case Intrinsics::Stacksave: {
John Porto5d0acff2015-06-30 15:29:21 -07003472 Variable *esp =
3473 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
John Porto7e93c622015-06-23 10:58:57 -07003474 Variable *Dest = Instr->getDest();
3475 _mov(Dest, esp);
3476 return;
3477 }
3478 case Intrinsics::Stackrestore: {
John Porto5d0acff2015-06-30 15:29:21 -07003479 Variable *esp =
3480 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Jim Stichnoth230d4102015-09-25 17:40:32 -07003481 _mov_redefined(esp, Instr->getArg(0));
John Porto7e93c622015-06-23 10:58:57 -07003482 return;
3483 }
3484 case Intrinsics::Trap:
3485 _ud2();
3486 return;
3487 case Intrinsics::UnknownIntrinsic:
3488 Func->setError("Should not be lowering UnknownIntrinsic");
3489 return;
3490 }
3491 return;
3492}
3493
3494template <class Machine>
3495void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3496 Operand *Ptr, Operand *Expected,
3497 Operand *Desired) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003498 Type Ty = Expected->getType();
3499 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003500 // Reserve the pre-colored registers first, before adding any more
3501 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07003502 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3503 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3504 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3505 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07003506 _mov(T_eax, loOperand(Expected));
3507 _mov(T_edx, hiOperand(Expected));
3508 _mov(T_ebx, loOperand(Desired));
3509 _mov(T_ecx, hiOperand(Desired));
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003510 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3511 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07003512 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3513 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3514 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3515 _mov(DestLo, T_eax);
3516 _mov(DestHi, T_edx);
3517 return;
3518 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003519 int32_t Eax;
3520 switch (Ty) {
3521 default:
3522 llvm_unreachable("Bad type for cmpxchg");
3523 // fallthrough
3524 case IceType_i32:
3525 Eax = Traits::RegisterSet::Reg_eax;
3526 break;
3527 case IceType_i16:
3528 Eax = Traits::RegisterSet::Reg_ax;
3529 break;
3530 case IceType_i8:
3531 Eax = Traits::RegisterSet::Reg_al;
3532 break;
3533 }
3534 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07003535 _mov(T_eax, Expected);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003536 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Andrew Scull97f460d2015-07-21 10:07:42 -07003537 Variable *DesiredReg = legalizeToReg(Desired);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003538 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07003539 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3540 _mov(DestPrev, T_eax);
3541}
3542
3543template <class Machine>
3544bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3545 Operand *PtrToMem,
3546 Operand *Expected,
3547 Operand *Desired) {
3548 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3549 return false;
3550 // Peek ahead a few instructions and see how Dest is used.
3551 // It's very common to have:
3552 //
3553 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3554 // [%y_phi = ...] // list of phi stores
3555 // %p = icmp eq i32 %x, %expected
3556 // br i1 %p, label %l1, label %l2
3557 //
3558 // which we can optimize into:
3559 //
3560 // %x = <cmpxchg code>
3561 // [%y_phi = ...] // list of phi stores
3562 // br eq, %l1, %l2
3563 InstList::iterator I = Context.getCur();
3564 // I is currently the InstIntrinsicCall. Peek past that.
3565 // This assumes that the atomic cmpxchg has not been lowered yet,
3566 // so that the instructions seen in the scan from "Cur" is simple.
3567 assert(llvm::isa<InstIntrinsicCall>(*I));
3568 Inst *NextInst = Context.getNextInst(I);
3569 if (!NextInst)
3570 return false;
3571 // There might be phi assignments right before the compare+branch, since this
3572 // could be a backward branch for a loop. This placement of assignments is
3573 // determined by placePhiStores().
3574 std::vector<InstAssign *> PhiAssigns;
3575 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3576 if (PhiAssign->getDest() == Dest)
3577 return false;
3578 PhiAssigns.push_back(PhiAssign);
3579 NextInst = Context.getNextInst(I);
3580 if (!NextInst)
3581 return false;
3582 }
3583 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3584 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3585 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3586 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3587 return false;
3588 }
3589 NextInst = Context.getNextInst(I);
3590 if (!NextInst)
3591 return false;
3592 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3593 if (!NextBr->isUnconditional() &&
3594 NextCmp->getDest() == NextBr->getCondition() &&
3595 NextBr->isLastUse(NextCmp->getDest())) {
3596 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3597 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3598 // Lower the phi assignments now, before the branch (same placement
3599 // as before).
3600 InstAssign *PhiAssign = PhiAssigns[i];
3601 PhiAssign->setDeleted();
3602 lowerAssign(PhiAssign);
3603 Context.advanceNext();
3604 }
John Porto5d0acff2015-06-30 15:29:21 -07003605 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
3606 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07003607 // Skip over the old compare and branch, by deleting them.
3608 NextCmp->setDeleted();
3609 NextBr->setDeleted();
3610 Context.advanceNext();
3611 Context.advanceNext();
3612 return true;
3613 }
3614 }
3615 }
3616 return false;
3617}
3618
3619template <class Machine>
3620void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3621 Operand *Ptr, Operand *Val) {
3622 bool NeedsCmpxchg = false;
3623 LowerBinOp Op_Lo = nullptr;
3624 LowerBinOp Op_Hi = nullptr;
3625 switch (Operation) {
3626 default:
3627 Func->setError("Unknown AtomicRMW operation");
3628 return;
3629 case Intrinsics::AtomicAdd: {
John Porto1d235422015-08-12 12:37:53 -07003630 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003631 // All the fall-through paths must set this to true, but use this
3632 // for asserting.
3633 NeedsCmpxchg = true;
3634 Op_Lo = &TargetX86Base<Machine>::_add;
3635 Op_Hi = &TargetX86Base<Machine>::_adc;
3636 break;
3637 }
John Porto921856d2015-07-07 11:56:26 -07003638 typename Traits::X86OperandMem *Addr =
3639 formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003640 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07003641 Variable *T = nullptr;
3642 _mov(T, Val);
3643 _xadd(Addr, T, Locked);
3644 _mov(Dest, T);
3645 return;
3646 }
3647 case Intrinsics::AtomicSub: {
John Porto1d235422015-08-12 12:37:53 -07003648 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003649 NeedsCmpxchg = true;
3650 Op_Lo = &TargetX86Base<Machine>::_sub;
3651 Op_Hi = &TargetX86Base<Machine>::_sbb;
3652 break;
3653 }
John Porto921856d2015-07-07 11:56:26 -07003654 typename Traits::X86OperandMem *Addr =
3655 formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003656 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07003657 Variable *T = nullptr;
3658 _mov(T, Val);
3659 _neg(T);
3660 _xadd(Addr, T, Locked);
3661 _mov(Dest, T);
3662 return;
3663 }
3664 case Intrinsics::AtomicOr:
3665 // TODO(jvoung): If Dest is null or dead, then some of these
3666 // operations do not need an "exchange", but just a locked op.
3667 // That appears to be "worth" it for sub, or, and, and xor.
3668 // xadd is probably fine vs lock add for add, and xchg is fine
3669 // vs an atomic store.
3670 NeedsCmpxchg = true;
3671 Op_Lo = &TargetX86Base<Machine>::_or;
3672 Op_Hi = &TargetX86Base<Machine>::_or;
3673 break;
3674 case Intrinsics::AtomicAnd:
3675 NeedsCmpxchg = true;
3676 Op_Lo = &TargetX86Base<Machine>::_and;
3677 Op_Hi = &TargetX86Base<Machine>::_and;
3678 break;
3679 case Intrinsics::AtomicXor:
3680 NeedsCmpxchg = true;
3681 Op_Lo = &TargetX86Base<Machine>::_xor;
3682 Op_Hi = &TargetX86Base<Machine>::_xor;
3683 break;
3684 case Intrinsics::AtomicExchange:
John Porto1d235422015-08-12 12:37:53 -07003685 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07003686 NeedsCmpxchg = true;
3687 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3688 // just need to be moved to the ecx and ebx registers.
3689 Op_Lo = nullptr;
3690 Op_Hi = nullptr;
3691 break;
3692 }
John Porto921856d2015-07-07 11:56:26 -07003693 typename Traits::X86OperandMem *Addr =
3694 formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07003695 Variable *T = nullptr;
3696 _mov(T, Val);
3697 _xchg(Addr, T);
3698 _mov(Dest, T);
3699 return;
3700 }
3701 // Otherwise, we need a cmpxchg loop.
3702 (void)NeedsCmpxchg;
3703 assert(NeedsCmpxchg);
3704 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3705}
3706
3707template <class Machine>
3708void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3709 LowerBinOp Op_Hi,
3710 Variable *Dest,
3711 Operand *Ptr,
3712 Operand *Val) {
3713 // Expand a more complex RMW operation as a cmpxchg loop:
3714 // For 64-bit:
3715 // mov eax, [ptr]
3716 // mov edx, [ptr + 4]
3717 // .LABEL:
3718 // mov ebx, eax
3719 // <Op_Lo> ebx, <desired_adj_lo>
3720 // mov ecx, edx
3721 // <Op_Hi> ecx, <desired_adj_hi>
3722 // lock cmpxchg8b [ptr]
3723 // jne .LABEL
3724 // mov <dest_lo>, eax
3725 // mov <dest_lo>, edx
3726 //
3727 // For 32-bit:
3728 // mov eax, [ptr]
3729 // .LABEL:
3730 // mov <reg>, eax
3731 // op <reg>, [desired_adj]
3732 // lock cmpxchg [ptr], <reg>
3733 // jne .LABEL
3734 // mov <dest>, eax
3735 //
3736 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3737 Val = legalize(Val);
3738 Type Ty = Val->getType();
John Porto1d235422015-08-12 12:37:53 -07003739 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07003740 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3741 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto921856d2015-07-07 11:56:26 -07003742 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07003743 _mov(T_eax, loOperand(Addr));
3744 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07003745 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3746 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto921856d2015-07-07 11:56:26 -07003747 typename Traits::Insts::Label *Label =
3748 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003749 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3750 if (!IsXchg8b) {
3751 Context.insert(Label);
3752 _mov(T_ebx, T_eax);
3753 (this->*Op_Lo)(T_ebx, loOperand(Val));
3754 _mov(T_ecx, T_edx);
3755 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3756 } else {
3757 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3758 // It just needs the Val loaded into ebx and ecx.
3759 // That can also be done before the loop.
3760 _mov(T_ebx, loOperand(Val));
3761 _mov(T_ecx, hiOperand(Val));
3762 Context.insert(Label);
3763 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003764 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07003765 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003766 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003767 if (!IsXchg8b) {
3768 // If Val is a variable, model the extended live range of Val through
3769 // the end of the loop, since it will be re-used by the loop.
3770 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3771 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3772 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3773 Context.insert(InstFakeUse::create(Func, ValLo));
3774 Context.insert(InstFakeUse::create(Func, ValHi));
3775 }
3776 } else {
3777 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3778 Context.insert(InstFakeUse::create(Func, T_ebx));
3779 Context.insert(InstFakeUse::create(Func, T_ecx));
3780 }
3781 // The address base (if any) is also reused in the loop.
3782 if (Variable *Base = Addr->getBase())
3783 Context.insert(InstFakeUse::create(Func, Base));
3784 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3785 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3786 _mov(DestLo, T_eax);
3787 _mov(DestHi, T_edx);
3788 return;
3789 }
John Porto921856d2015-07-07 11:56:26 -07003790 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003791 int32_t Eax;
3792 switch (Ty) {
3793 default:
3794 llvm_unreachable("Bad type for atomicRMW");
3795 // fallthrough
3796 case IceType_i32:
3797 Eax = Traits::RegisterSet::Reg_eax;
3798 break;
3799 case IceType_i16:
3800 Eax = Traits::RegisterSet::Reg_ax;
3801 break;
3802 case IceType_i8:
3803 Eax = Traits::RegisterSet::Reg_al;
3804 break;
3805 }
3806 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07003807 _mov(T_eax, Addr);
John Porto921856d2015-07-07 11:56:26 -07003808 typename Traits::Insts::Label *Label =
3809 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07003810 Context.insert(Label);
3811 // We want to pick a different register for T than Eax, so don't use
3812 // _mov(T == nullptr, T_eax).
3813 Variable *T = makeReg(Ty);
3814 _mov(T, T_eax);
3815 (this->*Op_Lo)(T, Val);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003816 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07003817 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07003818 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07003819 // If Val is a variable, model the extended live range of Val through
3820 // the end of the loop, since it will be re-used by the loop.
3821 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3822 Context.insert(InstFakeUse::create(Func, ValVar));
3823 }
3824 // The address base (if any) is also reused in the loop.
3825 if (Variable *Base = Addr->getBase())
3826 Context.insert(InstFakeUse::create(Func, Base));
3827 _mov(Dest, T_eax);
3828}
3829
Andrew Scull9612d322015-07-06 14:53:25 -07003830/// Lowers count {trailing, leading} zeros intrinsic.
3831///
3832/// We could do constant folding here, but that should have
3833/// been done by the front-end/middle-end optimizations.
John Porto7e93c622015-06-23 10:58:57 -07003834template <class Machine>
3835void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3836 Operand *FirstVal,
3837 Operand *SecondVal) {
3838 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3839 // Then the instructions will handle the Val == 0 case much more simply
3840 // and won't require conversion from bit position to number of zeros.
3841 //
3842 // Otherwise:
3843 // bsr IF_NOT_ZERO, Val
3844 // mov T_DEST, 63
3845 // cmovne T_DEST, IF_NOT_ZERO
3846 // xor T_DEST, 31
3847 // mov DEST, T_DEST
3848 //
3849 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3850 // register. Also, bsf and bsr require their dest to be a register.
3851 //
3852 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3853 // E.g., for 000... 00001100, bsr will say that the most significant bit
3854 // set is at position 3, while the number of leading zeros is 28. Xor is
3855 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
3856 //
3857 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3858 // are all zero, and compute the result for that case (checking the lower
3859 // 32 bits). Then actually compute the result for the upper bits and
3860 // cmov in the result from the lower computation if the earlier speculation
3861 // was correct.
3862 //
3863 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3864 // bit position conversion, and the speculation is reversed.
3865 assert(Ty == IceType_i32 || Ty == IceType_i64);
3866 Variable *T = makeReg(IceType_i32);
3867 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3868 if (Cttz) {
3869 _bsf(T, FirstValRM);
3870 } else {
3871 _bsr(T, FirstValRM);
3872 }
3873 Variable *T_Dest = makeReg(IceType_i32);
3874 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3875 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3876 if (Cttz) {
3877 _mov(T_Dest, ThirtyTwo);
3878 } else {
3879 Constant *SixtyThree = Ctx->getConstantInt32(63);
3880 _mov(T_Dest, SixtyThree);
3881 }
John Porto5d0acff2015-06-30 15:29:21 -07003882 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07003883 if (!Cttz) {
3884 _xor(T_Dest, ThirtyOne);
3885 }
John Porto1d235422015-08-12 12:37:53 -07003886 if (Traits::Is64Bit || Ty == IceType_i32) {
John Porto7e93c622015-06-23 10:58:57 -07003887 _mov(Dest, T_Dest);
3888 return;
3889 }
3890 _add(T_Dest, ThirtyTwo);
3891 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3892 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3893 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07003894 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07003895 Variable *T_Dest2 = makeReg(IceType_i32);
3896 if (Cttz) {
3897 _bsf(T_Dest2, SecondVar);
3898 } else {
3899 _bsr(T_Dest2, SecondVar);
3900 _xor(T_Dest2, ThirtyOne);
3901 }
3902 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07003903 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07003904 _mov(DestLo, T_Dest2);
3905 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3906}
3907
Andrew Scull86df4e92015-07-30 13:54:44 -07003908template <class Machine>
Andrew Scullcfa628b2015-08-20 14:23:05 -07003909void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base,
3910 Constant *Offset) {
3911 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
3912
3913 if (isVectorType(Ty))
3914 _movp(Dest, Mem);
3915 else if (Ty == IceType_f64)
3916 _movq(Dest, Mem);
3917 else
3918 _mov(Dest, Mem);
3919}
3920
3921template <class Machine>
3922void TargetX86Base<Machine>::typedStore(Type Ty, Variable *Value,
3923 Variable *Base, Constant *Offset) {
3924 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
3925
3926 if (isVectorType(Ty))
3927 _storep(Value, Mem);
3928 else if (Ty == IceType_f64)
3929 _storeq(Value, Mem);
3930 else
3931 _store(Value, Mem);
3932}
3933
3934template <class Machine>
3935void TargetX86Base<Machine>::copyMemory(Type Ty, Variable *Dest, Variable *Src,
3936 int32_t OffsetAmt) {
3937 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
3938 // TODO(ascull): this or add nullptr test to _movp, _movq
3939 Variable *Data = makeReg(Ty);
3940
3941 typedLoad(Ty, Data, Src, Offset);
3942 typedStore(Ty, Data, Dest, Offset);
3943}
3944
3945template <class Machine>
Andrew Scull9df4a372015-08-07 09:19:35 -07003946void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src,
3947 Operand *Count) {
3948 // There is a load and store for each chunk in the unroll
Andrew Scull9df4a372015-08-07 09:19:35 -07003949 constexpr uint32_t BytesPerStorep = 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07003950
3951 // Check if the operands are constants
3952 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
3953 const bool IsCountConst = CountConst != nullptr;
3954 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3955
Andrew Scullcfa628b2015-08-20 14:23:05 -07003956 if (shouldOptimizeMemIntrins() && IsCountConst &&
3957 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
Andrew Scull9df4a372015-08-07 09:19:35 -07003958 // Unlikely, but nothing to do if it does happen
3959 if (CountValue == 0)
3960 return;
3961
3962 Variable *SrcBase = legalizeToReg(Src);
3963 Variable *DestBase = legalizeToReg(Dest);
3964
Andrew Scullcfa628b2015-08-20 14:23:05 -07003965 // Find the largest type that can be used and use it as much as possible in
3966 // reverse order. Then handle any remainder with overlapping copies. Since
3967 // the remainder will be at the end, there will be reduced pressure on the
3968 // memory unit as the accesses to the same memory are far apart.
3969 Type Ty = largestTypeInSize(CountValue);
3970 uint32_t TyWidth = typeWidthInBytes(Ty);
Andrew Scull9df4a372015-08-07 09:19:35 -07003971
Andrew Scullcfa628b2015-08-20 14:23:05 -07003972 uint32_t RemainingBytes = CountValue;
3973 int32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
3974 while (RemainingBytes >= TyWidth) {
3975 copyMemory(Ty, DestBase, SrcBase, Offset);
3976 RemainingBytes -= TyWidth;
3977 Offset -= TyWidth;
Andrew Scull9df4a372015-08-07 09:19:35 -07003978 }
3979
Andrew Scullcfa628b2015-08-20 14:23:05 -07003980 if (RemainingBytes == 0)
Andrew Scull9df4a372015-08-07 09:19:35 -07003981 return;
Andrew Scull9df4a372015-08-07 09:19:35 -07003982
Andrew Scullcfa628b2015-08-20 14:23:05 -07003983 // Lower the remaining bytes. Adjust to larger types in order to make use
3984 // of overlaps in the copies.
3985 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
3986 Offset = CountValue - typeWidthInBytes(LeftOverTy);
3987 copyMemory(LeftOverTy, DestBase, SrcBase, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07003988 return;
3989 }
3990
3991 // Fall back on a function call
3992 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
3993 Call->addArg(Dest);
3994 Call->addArg(Src);
3995 Call->addArg(Count);
3996 lowerCall(Call);
3997}
3998
3999template <class Machine>
Andrew Scullcfa628b2015-08-20 14:23:05 -07004000void TargetX86Base<Machine>::lowerMemmove(Operand *Dest, Operand *Src,
4001 Operand *Count) {
4002 // There is a load and store for each chunk in the unroll
4003 constexpr uint32_t BytesPerStorep = 16;
4004
4005 // Check if the operands are constants
4006 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4007 const bool IsCountConst = CountConst != nullptr;
4008 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4009
4010 if (shouldOptimizeMemIntrins() && IsCountConst &&
4011 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) {
4012 // Unlikely, but nothing to do if it does happen
4013 if (CountValue == 0)
4014 return;
4015
4016 Variable *SrcBase = legalizeToReg(Src);
4017 Variable *DestBase = legalizeToReg(Dest);
4018
4019 std::tuple<Type, Constant *, Variable *>
4020 Moves[Traits::MEMMOVE_UNROLL_LIMIT];
4021 Constant *Offset;
4022 Variable *Reg;
4023
4024 // Copy the data into registers as the source and destination could overlap
Andrew Scull57e12682015-09-16 11:30:19 -07004025 // so make sure not to clobber the memory. This also means overlapping
4026 // moves can be used as we are taking a safe snapshot of the memory.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004027 Type Ty = largestTypeInSize(CountValue);
4028 uint32_t TyWidth = typeWidthInBytes(Ty);
4029
4030 uint32_t RemainingBytes = CountValue;
4031 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;
4032 size_t N = 0;
4033 while (RemainingBytes >= TyWidth) {
4034 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4035 Offset = Ctx->getConstantInt32(OffsetAmt);
4036 Reg = makeReg(Ty);
4037 typedLoad(Ty, Reg, SrcBase, Offset);
4038 RemainingBytes -= TyWidth;
4039 OffsetAmt -= TyWidth;
4040 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4041 }
4042
4043 if (RemainingBytes != 0) {
4044 // Lower the remaining bytes. Adjust to larger types in order to make use
4045 // of overlaps in the copies.
4046 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4047 Ty = firstTypeThatFitsSize(RemainingBytes);
4048 Offset = Ctx->getConstantInt32(CountValue - typeWidthInBytes(Ty));
4049 Reg = makeReg(Ty);
4050 typedLoad(Ty, Reg, SrcBase, Offset);
4051 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4052 }
4053
4054 // Copy the data out into the destination memory
4055 for (size_t i = 0; i < N; ++i) {
4056 std::tie(Ty, Offset, Reg) = Moves[i];
4057 typedStore(Ty, Reg, DestBase, Offset);
4058 }
4059
4060 return;
4061 }
4062
4063 // Fall back on a function call
4064 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
4065 Call->addArg(Dest);
4066 Call->addArg(Src);
4067 Call->addArg(Count);
4068 lowerCall(Call);
4069}
4070
4071template <class Machine>
Andrew Scull713dbde2015-08-04 14:25:27 -07004072void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val,
4073 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004074 constexpr uint32_t BytesPerStorep = 16;
4075 constexpr uint32_t BytesPerStoreq = 8;
4076 constexpr uint32_t BytesPerStorei32 = 4;
Andrew Scull713dbde2015-08-04 14:25:27 -07004077 assert(Val->getType() == IceType_i8);
4078
4079 // Check if the operands are constants
4080 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4081 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
4082 const bool IsCountConst = CountConst != nullptr;
4083 const bool IsValConst = ValConst != nullptr;
4084 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4085 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
4086
4087 // Unlikely, but nothing to do if it does happen
4088 if (IsCountConst && CountValue == 0)
4089 return;
4090
4091 // TODO(ascull): if the count is constant but val is not it would be possible
4092 // to inline by spreading the value across 4 bytes and accessing subregs e.g.
4093 // eax, ax and al.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004094 if (shouldOptimizeMemIntrins() && IsCountConst && IsValConst) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004095 Variable *Base = nullptr;
Andrew Scullcfa628b2015-08-20 14:23:05 -07004096 Variable *VecReg = nullptr;
Andrew Scull9df4a372015-08-07 09:19:35 -07004097 const uint32_t SpreadValue =
4098 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
Andrew Scull713dbde2015-08-04 14:25:27 -07004099
Andrew Scull9df4a372015-08-07 09:19:35 -07004100 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
Jim Stichnoth992f91d2015-08-10 11:18:38 -07004101 uint32_t OffsetAmt) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004102 assert(Base != nullptr);
4103 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07004104
Andrew Scull9df4a372015-08-07 09:19:35 -07004105 // TODO(ascull): is 64-bit better with vector or scalar movq?
4106 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
4107 if (isVectorType(Ty)) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004108 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004109 _storep(VecReg, Mem);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004110 } else if (Ty == IceType_f64) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004111 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004112 _storeq(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07004113 } else {
4114 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
Andrew Scull713dbde2015-08-04 14:25:27 -07004115 }
4116 };
4117
Andrew Scullcfa628b2015-08-20 14:23:05 -07004118 // Find the largest type that can be used and use it as much as possible in
4119 // reverse order. Then handle any remainder with overlapping copies. Since
4120 // the remainder will be at the end, there will be reduces pressure on the
4121 // memory unit as the access to the same memory are far apart.
4122 Type Ty;
Andrew Scull9df4a372015-08-07 09:19:35 -07004123 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
Andrew Scullcfa628b2015-08-20 14:23:05 -07004124 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
4125 // When the value is zero it can be loaded into a vector register cheaply
4126 // using the xor trick.
Andrew Scull9df4a372015-08-07 09:19:35 -07004127 Base = legalizeToReg(Dest);
4128 VecReg = makeVectorOfZeros(IceType_v16i8);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004129 Ty = largestTypeInSize(CountValue);
4130 } else if (CountValue <= BytesPerStorei32 * Traits::MEMCPY_UNROLL_LIMIT) {
4131 // When the value is non-zero or the count is small we can't use vector
4132 // instructions so are limited to 32-bit stores.
4133 Base = legalizeToReg(Dest);
4134 constexpr uint32_t MaxSize = 4;
4135 Ty = largestTypeInSize(CountValue, MaxSize);
Andrew Scull713dbde2015-08-04 14:25:27 -07004136 }
4137
Andrew Scullcfa628b2015-08-20 14:23:05 -07004138 if (Base) {
4139 uint32_t TyWidth = typeWidthInBytes(Ty);
4140
4141 uint32_t RemainingBytes = CountValue;
4142 uint32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
4143 while (RemainingBytes >= TyWidth) {
4144 lowerSet(Ty, Offset);
4145 RemainingBytes -= TyWidth;
4146 Offset -= TyWidth;
Andrew Scull713dbde2015-08-04 14:25:27 -07004147 }
Andrew Scull9df4a372015-08-07 09:19:35 -07004148
Andrew Scullcfa628b2015-08-20 14:23:05 -07004149 if (RemainingBytes == 0)
4150 return;
4151
4152 // Lower the remaining bytes. Adjust to larger types in order to make use
4153 // of overlaps in the copies.
4154 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
4155 Offset = CountValue - typeWidthInBytes(LeftOverTy);
4156 lowerSet(LeftOverTy, Offset);
Andrew Scull713dbde2015-08-04 14:25:27 -07004157 return;
4158 }
4159 }
4160
4161 // Fall back on calling the memset function. The value operand needs to be
4162 // extended to a stack slot size because the PNaCl ABI requires arguments to
4163 // be at least 32 bits wide.
4164 Operand *ValExt;
4165 if (IsValConst) {
4166 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
4167 } else {
4168 Variable *ValExtVar = Func->makeVariable(stackSlotType());
4169 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
4170 ValExt = ValExtVar;
4171 }
4172 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
4173 Call->addArg(Dest);
4174 Call->addArg(ValExt);
4175 Call->addArg(Count);
4176 lowerCall(Call);
4177}
4178
4179template <class Machine>
Andrew Scull86df4e92015-07-30 13:54:44 -07004180void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) {
4181 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
4182 if (NeedSandboxing) {
4183 _bundle_lock();
4184 const SizeT BundleSize =
4185 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
4186 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1)));
4187 }
4188 _jmp(Target);
4189 if (NeedSandboxing)
4190 _bundle_unlock();
4191}
4192
John Porto5aeed952015-07-21 13:39:09 -07004193inline bool isAdd(const Inst *Inst) {
David Sehraa0b1a12015-10-27 16:55:40 -07004194 if (auto *Arith = llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
John Porto7e93c622015-06-23 10:58:57 -07004195 return (Arith->getOp() == InstArithmetic::Add);
4196 }
4197 return false;
4198}
4199
David Sehraa0b1a12015-10-27 16:55:40 -07004200inline void dumpAddressOpt(const Cfg *Func,
4201 const ConstantRelocatable *Relocatable,
4202 int32_t Offset, const Variable *Base,
John Porto5aeed952015-07-21 13:39:09 -07004203 const Variable *Index, uint16_t Shift,
David Sehraa0b1a12015-10-27 16:55:40 -07004204 const Inst *Reason) {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004205 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07004206 return;
4207 if (!Func->isVerbose(IceV_AddrOpt))
4208 return;
4209 OstreamLocker L(Func->getContext());
4210 Ostream &Str = Func->getContext()->getStrDump();
4211 Str << "Instruction: ";
4212 Reason->dumpDecorated(Func);
4213 Str << " results in Base=";
4214 if (Base)
4215 Base->dump(Func);
4216 else
4217 Str << "<null>";
4218 Str << ", Index=";
4219 if (Index)
4220 Index->dump(Func);
4221 else
4222 Str << "<null>";
David Sehraa0b1a12015-10-27 16:55:40 -07004223 Str << ", Shift=" << Shift << ", Offset=" << Offset
4224 << ", Relocatable=" << Relocatable << "\n";
John Porto7e93c622015-06-23 10:58:57 -07004225}
4226
David Sehraa0b1a12015-10-27 16:55:40 -07004227inline bool matchAssign(const VariablesMetadata *VMetadata, Variable *&Var,
4228 ConstantRelocatable *&Relocatable, int32_t &Offset,
4229 const Inst *&Reason) {
Andrew Scull57e12682015-09-16 11:30:19 -07004230 // Var originates from Var=SrcVar ==> set Var:=SrcVar
John Porto7e93c622015-06-23 10:58:57 -07004231 if (Var == nullptr)
4232 return false;
4233 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
4234 assert(!VMetadata->isMultiDef(Var));
4235 if (llvm::isa<InstAssign>(VarAssign)) {
4236 Operand *SrcOp = VarAssign->getSrc(0);
4237 assert(SrcOp);
David Sehraa0b1a12015-10-27 16:55:40 -07004238 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
John Porto7e93c622015-06-23 10:58:57 -07004239 if (!VMetadata->isMultiDef(SrcVar) &&
4240 // TODO: ensure SrcVar stays single-BB
4241 true) {
4242 Var = SrcVar;
4243 Reason = VarAssign;
4244 return true;
4245 }
David Sehraa0b1a12015-10-27 16:55:40 -07004246 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
4247 int32_t MoreOffset = Const->getValue();
4248 if (Utils::WouldOverflowAdd(Offset, MoreOffset))
4249 return false;
4250 Var = nullptr;
4251 Offset += MoreOffset;
4252 Reason = VarAssign;
4253 return true;
4254 } else if (auto *AddReloc = llvm::dyn_cast<ConstantRelocatable>(SrcOp)) {
4255 if (Relocatable == nullptr) {
4256 Var = nullptr;
4257 Relocatable = AddReloc;
4258 Reason = VarAssign;
4259 return true;
4260 }
John Porto7e93c622015-06-23 10:58:57 -07004261 }
4262 }
4263 }
4264 return false;
4265}
4266
John Porto5aeed952015-07-21 13:39:09 -07004267inline bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata,
4268 Variable *&Base, Variable *&Index,
4269 uint16_t &Shift, const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004270 // Index==nullptr && Base is Base=Var1+Var2 ==>
4271 // set Base=Var1, Index=Var2, Shift=0
4272 if (Base == nullptr)
4273 return false;
4274 if (Index != nullptr)
4275 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004276 auto *BaseInst = VMetadata->getSingleDefinition(Base);
John Porto7e93c622015-06-23 10:58:57 -07004277 if (BaseInst == nullptr)
4278 return false;
4279 assert(!VMetadata->isMultiDef(Base));
4280 if (BaseInst->getSrcSize() < 2)
4281 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004282 if (auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
John Porto7e93c622015-06-23 10:58:57 -07004283 if (VMetadata->isMultiDef(Var1))
4284 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004285 if (auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
John Porto7e93c622015-06-23 10:58:57 -07004286 if (VMetadata->isMultiDef(Var2))
4287 return false;
4288 if (isAdd(BaseInst) &&
4289 // TODO: ensure Var1 and Var2 stay single-BB
4290 true) {
4291 Base = Var1;
4292 Index = Var2;
4293 Shift = 0; // should already have been 0
4294 Reason = BaseInst;
4295 return true;
4296 }
4297 }
4298 }
4299 return false;
4300}
4301
John Porto5aeed952015-07-21 13:39:09 -07004302inline bool matchShiftedIndex(const VariablesMetadata *VMetadata,
4303 Variable *&Index, uint16_t &Shift,
4304 const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004305 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
4306 // Index=Var, Shift+=log2(Const)
4307 if (Index == nullptr)
4308 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004309 auto *IndexInst = VMetadata->getSingleDefinition(Index);
John Porto7e93c622015-06-23 10:58:57 -07004310 if (IndexInst == nullptr)
4311 return false;
4312 assert(!VMetadata->isMultiDef(Index));
4313 if (IndexInst->getSrcSize() < 2)
4314 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004315 if (auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst)) {
4316 if (auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
4317 if (auto *Const =
John Porto7e93c622015-06-23 10:58:57 -07004318 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
David Sehraa0b1a12015-10-27 16:55:40 -07004319 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
4320 return false;
4321 switch (ArithInst->getOp()) {
4322 default:
4323 return false;
4324 case InstArithmetic::Mul: {
4325 uint32_t Mult = Const->getValue();
John Porto7e93c622015-06-23 10:58:57 -07004326 uint32_t LogMult;
4327 switch (Mult) {
4328 case 1:
4329 LogMult = 0;
4330 break;
4331 case 2:
4332 LogMult = 1;
4333 break;
4334 case 4:
4335 LogMult = 2;
4336 break;
4337 case 8:
4338 LogMult = 3;
4339 break;
4340 default:
4341 return false;
4342 }
4343 if (Shift + LogMult <= 3) {
4344 Index = Var;
4345 Shift += LogMult;
4346 Reason = IndexInst;
4347 return true;
4348 }
4349 }
David Sehraa0b1a12015-10-27 16:55:40 -07004350 case InstArithmetic::Shl: {
4351 uint32_t ShiftAmount = Const->getValue();
4352 switch (ShiftAmount) {
4353 case 0:
4354 case 1:
4355 case 2:
4356 case 3:
4357 break;
4358 default:
4359 return false;
4360 }
4361 if (Shift + ShiftAmount <= 3) {
4362 Index = Var;
4363 Shift += ShiftAmount;
4364 Reason = IndexInst;
4365 return true;
4366 }
4367 }
4368 }
John Porto7e93c622015-06-23 10:58:57 -07004369 }
4370 }
4371 }
4372 return false;
4373}
4374
John Porto5aeed952015-07-21 13:39:09 -07004375inline bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
David Sehraa0b1a12015-10-27 16:55:40 -07004376 ConstantRelocatable *&Relocatable, int32_t &Offset,
4377 const Inst *&Reason) {
John Porto7e93c622015-06-23 10:58:57 -07004378 // Base is Base=Var+Const || Base is Base=Const+Var ==>
4379 // set Base=Var, Offset+=Const
4380 // Base is Base=Var-Const ==>
4381 // set Base=Var, Offset-=Const
David Sehraa0b1a12015-10-27 16:55:40 -07004382 if (Base == nullptr) {
John Porto7e93c622015-06-23 10:58:57 -07004383 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004384 }
John Porto7e93c622015-06-23 10:58:57 -07004385 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
David Sehraa0b1a12015-10-27 16:55:40 -07004386 if (BaseInst == nullptr) {
John Porto7e93c622015-06-23 10:58:57 -07004387 return false;
David Sehraa0b1a12015-10-27 16:55:40 -07004388 }
John Porto7e93c622015-06-23 10:58:57 -07004389 assert(!VMetadata->isMultiDef(Base));
David Sehraa0b1a12015-10-27 16:55:40 -07004390 if (auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004391 if (ArithInst->getOp() != InstArithmetic::Add &&
4392 ArithInst->getOp() != InstArithmetic::Sub)
4393 return false;
4394 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
David Sehraa0b1a12015-10-27 16:55:40 -07004395 Operand *Src0 = ArithInst->getSrc(0);
4396 Operand *Src1 = ArithInst->getSrc(1);
4397 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
4398 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
4399 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
4400 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
4401 auto *Reloc0 = llvm::dyn_cast<ConstantRelocatable>(Src0);
4402 auto *Reloc1 = llvm::dyn_cast<ConstantRelocatable>(Src1);
4403 Variable *NewBase = nullptr;
4404 int32_t NewOffset = Offset;
4405 ConstantRelocatable *NewRelocatable = Relocatable;
4406 if (Var0 && Var1)
4407 // TODO(sehr): merge base/index splitting into here.
4408 return false;
4409 if (!IsAdd && Var1)
4410 return false;
4411 if (Var0)
4412 NewBase = Var0;
4413 else if (Var1)
4414 NewBase = Var1;
4415 // Don't know how to add/subtract two relocatables.
4416 if ((Relocatable && (Reloc0 || Reloc1)) || (Reloc0 && Reloc1))
4417 return false;
4418 // Don't know how to subtract a relocatable.
4419 if (!IsAdd && Reloc1)
4420 return false;
4421 // Incorporate ConstantRelocatables.
4422 if (Reloc0)
4423 NewRelocatable = Reloc0;
4424 else if (Reloc1)
4425 NewRelocatable = Reloc1;
4426 // Compute the updated constant offset.
4427 if (Const0) {
4428 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
4429 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
4430 return false;
4431 NewOffset += MoreOffset;
John Porto7e93c622015-06-23 10:58:57 -07004432 }
David Sehraa0b1a12015-10-27 16:55:40 -07004433 if (Const1) {
4434 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
4435 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
4436 return false;
4437 NewOffset += MoreOffset;
4438 }
4439 // Update the computed address parameters once we are sure optimization
4440 // is valid.
4441 Base = NewBase;
4442 Offset = NewOffset;
4443 Relocatable = NewRelocatable;
John Porto7e93c622015-06-23 10:58:57 -07004444 Reason = BaseInst;
4445 return true;
4446 }
4447 return false;
4448}
4449
David Sehraa0b1a12015-10-27 16:55:40 -07004450// Builds information for a canonical address expresion:
4451// <Relocatable + Offset>(Base, Index, Shift)
4452// On entry:
4453// Relocatable == null,
4454// Offset == 0,
4455// Base is a Variable,
4456// Index == nullptr,
4457// Shift == 0
4458inline bool computeAddressOpt(Cfg *Func, const Inst *Instr,
4459 ConstantRelocatable *&Relocatable,
4460 int32_t &Offset, Variable *&Base,
4461 Variable *&Index, uint16_t &Shift) {
4462 bool AddressWasOptimized = false;
John Porto7e93c622015-06-23 10:58:57 -07004463 Func->resetCurrentNode();
4464 if (Func->isVerbose(IceV_AddrOpt)) {
4465 OstreamLocker L(Func->getContext());
4466 Ostream &Str = Func->getContext()->getStrDump();
4467 Str << "\nStarting computeAddressOpt for instruction:\n ";
4468 Instr->dumpDecorated(Func);
4469 }
John Porto7e93c622015-06-23 10:58:57 -07004470 if (Base == nullptr)
David Sehraa0b1a12015-10-27 16:55:40 -07004471 return AddressWasOptimized;
Andrew Scull57e12682015-09-16 11:30:19 -07004472 // If the Base has more than one use or is live across multiple blocks, then
4473 // don't go further. Alternatively (?), never consider a transformation that
4474 // would change a variable that is currently *not* live across basic block
4475 // boundaries into one that *is*.
John Porto7e93c622015-06-23 10:58:57 -07004476 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
David Sehraa0b1a12015-10-27 16:55:40 -07004477 return AddressWasOptimized;
John Porto7e93c622015-06-23 10:58:57 -07004478
Jim Stichnothad2989b2015-09-15 10:21:42 -07004479 const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck();
John Porto7e93c622015-06-23 10:58:57 -07004480 const VariablesMetadata *VMetadata = Func->getVMetadata();
David Sehraa0b1a12015-10-27 16:55:40 -07004481 const Inst *Reason = nullptr;
4482 do {
4483 if (Reason) {
4484 dumpAddressOpt(Func, Relocatable, Offset, Base, Index, Shift, Reason);
4485 AddressWasOptimized = true;
4486 Reason = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07004487 }
David Sehraa0b1a12015-10-27 16:55:40 -07004488 // Update Base and Index to follow through assignments to definitions.
4489 if (matchAssign(VMetadata, Base, Relocatable, Offset, Reason)) {
4490 // Assignments of Base from a Relocatable or ConstantInt32 can result
4491 // in Base becoming nullptr. To avoid code duplication in this loop we
4492 // prefer that Base be non-nullptr if possible.
4493 if ((Base == nullptr) && (Index != nullptr) && (Shift == 0))
4494 std::swap(Base, Index);
4495 continue;
4496 }
4497 if (matchAssign(VMetadata, Index, Relocatable, Offset, Reason))
4498 continue;
John Porto7e93c622015-06-23 10:58:57 -07004499
David Sehraa0b1a12015-10-27 16:55:40 -07004500 if (!MockBounds) {
4501 // Transition from:
4502 // <Relocatable + Offset>(Base) to
4503 // <Relocatable + Offset>(Base, Index)
4504 if (matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason))
4505 continue;
4506 // Recognize multiply/shift and update Shift amount.
4507 // Index becomes Index=Var<<Const && Const+Shift<=3 ==>
4508 // Index=Var, Shift+=Const
4509 // Index becomes Index=Const*Var && log2(Const)+Shift<=3 ==>
4510 // Index=Var, Shift+=log2(Const)
4511 if (matchShiftedIndex(VMetadata, Index, Shift, Reason))
4512 continue;
4513 // If Shift is zero, the choice of Base and Index was purely arbitrary.
4514 // Recognize multiply/shift and set Shift amount.
4515 // Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
4516 // swap(Index,Base)
4517 // Similar for Base=Const*Var and Base=Var<<Const
4518 if ((Shift == 0) && matchShiftedIndex(VMetadata, Base, Shift, Reason)) {
4519 std::swap(Base, Index);
4520 continue;
4521 }
4522 }
4523 // Update Offset to reflect additions/subtractions with constants and
4524 // relocatables.
John Porto7e93c622015-06-23 10:58:57 -07004525 // TODO: consider overflow issues with respect to Offset.
4526 // TODO: handle symbolic constants.
David Sehraa0b1a12015-10-27 16:55:40 -07004527 if (matchOffsetBase(VMetadata, Base, Relocatable, Offset, Reason))
4528 continue;
4529 // TODO(sehr, stichnot): Handle updates of Index with Shift != 0.
4530 // Index is Index=Var+Const ==>
4531 // set Index=Var, Offset+=(Const<<Shift)
4532 // Index is Index=Const+Var ==>
4533 // set Index=Var, Offset+=(Const<<Shift)
4534 // Index is Index=Var-Const ==>
4535 // set Index=Var, Offset-=(Const<<Shift)
4536 break;
4537 } while (Reason);
4538 return AddressWasOptimized;
John Porto7e93c622015-06-23 10:58:57 -07004539}
4540
Jim Stichnothad2989b2015-09-15 10:21:42 -07004541/// Add a mock bounds check on the memory address before using it as a load or
4542/// store operand. The basic idea is that given a memory operand [reg], we
4543/// would first add bounds-check code something like:
4544///
4545/// cmp reg, <lb>
4546/// jl out_of_line_error
4547/// cmp reg, <ub>
4548/// jg out_of_line_error
4549///
4550/// In reality, the specific code will depend on how <lb> and <ub> are
4551/// represented, e.g. an immediate, a global, or a function argument.
4552///
4553/// As such, we need to enforce that the memory operand does not have the form
4554/// [reg1+reg2], because then there is no simple cmp instruction that would
4555/// suffice. However, we consider [reg+offset] to be OK because the offset is
4556/// usually small, and so <ub> could have a safety buffer built in and then we
4557/// could instead branch to a custom out_of_line_error that does the precise
4558/// check and jumps back if it turns out OK.
4559///
4560/// For the purpose of mocking the bounds check, we'll do something like this:
4561///
4562/// cmp reg, 0
4563/// je label
4564/// cmp reg, 1
4565/// je label
4566/// label:
4567///
4568/// Also note that we don't need to add a bounds check to a dereference of a
4569/// simple global variable address.
4570template <class Machine>
4571void TargetX86Base<Machine>::doMockBoundsCheck(Operand *Opnd) {
4572 if (!Ctx->getFlags().getMockBoundsCheck())
4573 return;
4574 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd)) {
4575 if (Mem->getIndex()) {
4576 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg");
4577 }
4578 Opnd = Mem->getBase();
4579 }
4580 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps
4581 // something else. We only care if it is Variable.
4582 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);
4583 if (Var == nullptr)
4584 return;
4585 // We use lowerStore() to copy out-args onto the stack. This creates a memory
4586 // operand with the stack pointer as the base register. Don't do bounds
4587 // checks on that.
4588 if (Var->getRegNum() == Traits::RegisterSet::Reg_esp)
4589 return;
4590
4591 typename Traits::Insts::Label *Label =
4592 Traits::Insts::Label::create(Func, this);
4593 _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
4594 _br(Traits::Cond::Br_e, Label);
4595 _cmp(Opnd, Ctx->getConstantInt32(1));
4596 _br(Traits::Cond::Br_e, Label);
4597 Context.insert(Label);
4598}
4599
John Porto7e93c622015-06-23 10:58:57 -07004600template <class Machine>
4601void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07004602 // A Load instruction can be treated the same as an Assign instruction, after
4603 // the source operand is transformed into an Traits::X86OperandMem operand.
4604 // Note that the address mode optimization already creates an
4605 // Traits::X86OperandMem operand, so it doesn't need another level of
4606 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07004607 Variable *DestLoad = Load->getDest();
4608 Type Ty = DestLoad->getType();
4609 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07004610 doMockBoundsCheck(Src0);
John Porto7e93c622015-06-23 10:58:57 -07004611 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4612 lowerAssign(Assign);
4613}
4614
4615template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4616 Inst *Inst = Context.getCur();
4617 Variable *Dest = Inst->getDest();
4618 Operand *Addr = Inst->getSrc(0);
4619 Variable *Index = nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07004620 ConstantRelocatable *Relocatable = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07004621 uint16_t Shift = 0;
David Sehraa0b1a12015-10-27 16:55:40 -07004622 int32_t Offset = 0;
John Porto921856d2015-07-07 11:56:26 -07004623 // Vanilla ICE load instructions should not use the segment registers, and
4624 // computeAddressOpt only works at the level of Variables and Constants, not
4625 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004626 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004627 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4628 Traits::X86OperandMem::DefaultSegment;
David Sehraa0b1a12015-10-27 16:55:40 -07004629 auto *Base = llvm::dyn_cast<Variable>(Addr);
4630 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) {
John Porto7e93c622015-06-23 10:58:57 -07004631 Inst->setDeleted();
David Sehraa0b1a12015-10-27 16:55:40 -07004632 Constant *OffsetOp = nullptr;
4633 if (Relocatable == nullptr) {
4634 OffsetOp = Ctx->getConstantInt32(Offset);
4635 } else {
4636 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset,
4637 Relocatable->getName(),
4638 Relocatable->getSuppressMangling());
4639 }
John Porto921856d2015-07-07 11:56:26 -07004640 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
4641 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004642 Context.insert(InstLoad::create(Func, Dest, Addr));
4643 }
4644}
4645
4646template <class Machine>
Qining Luaee5fa82015-08-20 14:59:03 -07004647void TargetX86Base<Machine>::randomlyInsertNop(float Probability,
4648 RandomNumberGenerator &RNG) {
4649 RandomNumberGeneratorWrapper RNGW(RNG);
4650 if (RNGW.getTrueWithProbability(Probability)) {
4651 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
John Porto7e93c622015-06-23 10:58:57 -07004652 }
4653}
4654
4655template <class Machine>
4656void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4657 Func->setError("Phi found in regular instruction list");
4658}
4659
4660template <class Machine>
John Porto7e93c622015-06-23 10:58:57 -07004661void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4662 Variable *Dest = Inst->getDest();
4663 Type DestTy = Dest->getType();
4664 Operand *SrcT = Inst->getTrueOperand();
4665 Operand *SrcF = Inst->getFalseOperand();
4666 Operand *Condition = Inst->getCondition();
4667
4668 if (isVectorType(DestTy)) {
4669 Type SrcTy = SrcT->getType();
4670 Variable *T = makeReg(SrcTy);
4671 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4672 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004673 if (InstructionSet >= Traits::SSE4_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07004674 // TODO(wala): If the condition operand is a constant, use blendps or
4675 // pblendw.
John Porto7e93c622015-06-23 10:58:57 -07004676 //
4677 // Use blendvps or pblendvb to implement select.
4678 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4679 SrcTy == IceType_v4f32) {
4680 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
John Porto5d0acff2015-06-30 15:29:21 -07004681 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004682 _movp(xmm0, ConditionRM);
4683 _psll(xmm0, Ctx->getConstantInt8(31));
4684 _movp(T, SrcFRM);
4685 _blendvps(T, SrcTRM, xmm0);
4686 _movp(Dest, T);
4687 } else {
4688 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4689 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4690 : IceType_v16i8;
John Porto5d0acff2015-06-30 15:29:21 -07004691 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
John Porto7e93c622015-06-23 10:58:57 -07004692 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4693 _movp(T, SrcFRM);
4694 _pblendvb(T, SrcTRM, xmm0);
4695 _movp(Dest, T);
4696 }
4697 return;
4698 }
John Porto5d0acff2015-06-30 15:29:21 -07004699 // Lower select without Traits::SSE4.1:
John Porto7e93c622015-06-23 10:58:57 -07004700 // a=d?b:c ==>
4701 // if elementtype(d) != i1:
4702 // d=sext(d);
4703 // a=(b&d)|(c&~d);
4704 Variable *T2 = makeReg(SrcTy);
4705 // Sign extend the condition operand if applicable.
4706 if (SrcTy == IceType_v4f32) {
4707 // The sext operation takes only integer arguments.
John Porto5aeed952015-07-21 13:39:09 -07004708 Variable *T3 = Func->makeVariable(IceType_v4i32);
John Porto7e93c622015-06-23 10:58:57 -07004709 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4710 _movp(T, T3);
4711 } else if (typeElementType(SrcTy) != IceType_i1) {
4712 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4713 } else {
4714 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4715 _movp(T, ConditionRM);
4716 }
4717 _movp(T2, T);
4718 _pand(T, SrcTRM);
4719 _pandn(T2, SrcFRM);
4720 _por(T, T2);
4721 _movp(Dest, T);
4722
4723 return;
4724 }
4725
John Porto5d0acff2015-06-30 15:29:21 -07004726 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
John Porto7e93c622015-06-23 10:58:57 -07004727 Operand *CmpOpnd0 = nullptr;
4728 Operand *CmpOpnd1 = nullptr;
4729 // Handle folding opportunities.
4730 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4731 assert(Producer->isDeleted());
4732 switch (BoolFolding::getProducerKind(Producer)) {
4733 default:
4734 break;
4735 case BoolFolding::PK_Icmp32: {
4736 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4737 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
4738 CmpOpnd1 = legalize(Producer->getSrc(1));
4739 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4740 } break;
4741 }
4742 }
4743 if (CmpOpnd0 == nullptr) {
4744 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4745 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4746 }
4747 assert(CmpOpnd0);
4748 assert(CmpOpnd1);
4749
4750 _cmp(CmpOpnd0, CmpOpnd1);
4751 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
Andrew Scull57e12682015-09-16 11:30:19 -07004752 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
4753 // explicit control flow.
John Porto7e93c622015-06-23 10:58:57 -07004754 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto921856d2015-07-07 11:56:26 -07004755 typename Traits::Insts::Label *Label =
4756 Traits::Insts::Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004757 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4758 _mov(Dest, SrcT);
4759 _br(Cond, Label);
4760 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
Jim Stichnoth230d4102015-09-25 17:40:32 -07004761 _mov_redefined(Dest, SrcF);
John Porto7e93c622015-06-23 10:58:57 -07004762 Context.insert(Label);
4763 return;
4764 }
4765 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
Andrew Scull57e12682015-09-16 11:30:19 -07004766 // But if SrcT is immediate, we might be able to do better, as the cmov
4767 // instruction doesn't allow an immediate operand:
John Porto7e93c622015-06-23 10:58:57 -07004768 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4769 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4770 std::swap(SrcT, SrcF);
John Porto921856d2015-07-07 11:56:26 -07004771 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07004772 }
John Porto1d235422015-08-12 12:37:53 -07004773 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004774 SrcT = legalizeUndef(SrcT);
4775 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07004776 // Set the low portion.
4777 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4778 Variable *TLo = nullptr;
4779 Operand *SrcFLo = legalize(loOperand(SrcF));
4780 _mov(TLo, SrcFLo);
4781 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4782 _cmov(TLo, SrcTLo, Cond);
4783 _mov(DestLo, TLo);
4784 // Set the high portion.
4785 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4786 Variable *THi = nullptr;
4787 Operand *SrcFHi = legalize(hiOperand(SrcF));
4788 _mov(THi, SrcFHi);
4789 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4790 _cmov(THi, SrcTHi, Cond);
4791 _mov(DestHi, THi);
4792 return;
4793 }
4794
John Porto1d235422015-08-12 12:37:53 -07004795 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4796 (Traits::Is64Bit && DestTy == IceType_i64));
John Porto7e93c622015-06-23 10:58:57 -07004797 Variable *T = nullptr;
4798 SrcF = legalize(SrcF);
4799 _mov(T, SrcF);
4800 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4801 _cmov(T, SrcT, Cond);
4802 _mov(Dest, T);
4803}
4804
4805template <class Machine>
4806void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4807 Operand *Value = Inst->getData();
4808 Operand *Addr = Inst->getAddr();
John Porto921856d2015-07-07 11:56:26 -07004809 typename Traits::X86OperandMem *NewAddr =
4810 formMemoryOperand(Addr, Value->getType());
Jim Stichnothad2989b2015-09-15 10:21:42 -07004811 doMockBoundsCheck(NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07004812 Type Ty = NewAddr->getType();
4813
John Porto1d235422015-08-12 12:37:53 -07004814 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004815 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07004816 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4817 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07004818 _store(ValueHi,
4819 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4820 _store(ValueLo,
4821 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07004822 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004823 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07004824 } else {
4825 Value = legalize(Value, Legal_Reg | Legal_Imm);
4826 _store(Value, NewAddr);
4827 }
4828}
4829
4830template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4831 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4832 Operand *Data = Inst->getData();
4833 Operand *Addr = Inst->getAddr();
4834 Variable *Index = nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07004835 ConstantRelocatable *Relocatable = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07004836 uint16_t Shift = 0;
David Sehraa0b1a12015-10-27 16:55:40 -07004837 int32_t Offset = 0;
4838 auto *Base = llvm::dyn_cast<Variable>(Addr);
John Porto921856d2015-07-07 11:56:26 -07004839 // Vanilla ICE store instructions should not use the segment registers, and
4840 // computeAddressOpt only works at the level of Variables and Constants, not
4841 // other Traits::X86OperandMem, so there should be no mention of segment
John Porto7e93c622015-06-23 10:58:57 -07004842 // registers there either.
John Porto921856d2015-07-07 11:56:26 -07004843 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4844 Traits::X86OperandMem::DefaultSegment;
David Sehraa0b1a12015-10-27 16:55:40 -07004845 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) {
John Porto7e93c622015-06-23 10:58:57 -07004846 Inst->setDeleted();
David Sehraa0b1a12015-10-27 16:55:40 -07004847 Constant *OffsetOp = nullptr;
4848 if (Relocatable == nullptr) {
4849 OffsetOp = Ctx->getConstantInt32(Offset);
4850 } else {
4851 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset,
4852 Relocatable->getName(),
4853 Relocatable->getSuppressMangling());
4854 }
John Porto921856d2015-07-07 11:56:26 -07004855 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4856 Index, Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07004857 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4858 if (Inst->getDest())
4859 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4860 Context.insert(NewStore);
4861 }
4862}
4863
4864template <class Machine>
Andrew Scull87f80c12015-07-20 10:19:16 -07004865Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4866 uint64_t Min, uint64_t Max) {
4867 // TODO(ascull): 64-bit should not reach here but only because it is not
4868 // implemented yet. This should be able to handle the 64-bit case.
John Porto1d235422015-08-12 12:37:53 -07004869 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
Andrew Scull87f80c12015-07-20 10:19:16 -07004870 // Subtracting 0 is a nop so don't do it
4871 if (Min != 0) {
4872 // Avoid clobbering the comparison by copying it
4873 Variable *T = nullptr;
4874 _mov(T, Comparison);
4875 _sub(T, Ctx->getConstantInt32(Min));
4876 Comparison = T;
4877 }
4878
4879 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
4880
4881 return Comparison;
4882}
4883
4884template <class Machine>
4885void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case,
4886 Operand *Comparison, bool DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07004887 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004888 switch (Case.getKind()) {
4889 case CaseCluster::JumpTable: {
4890 typename Traits::Insts::Label *SkipJumpTable;
4891
4892 Operand *RangeIndex =
4893 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004894 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004895 // Skip over jump table logic if comparison not in range and no default
4896 SkipJumpTable = Traits::Insts::Label::create(Func, this);
4897 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07004898 } else {
4899 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07004900 }
Andrew Scull87f80c12015-07-20 10:19:16 -07004901
4902 InstJumpTable *JumpTable = Case.getJumpTable();
4903 Context.insert(JumpTable);
4904
4905 // Make sure the index is a register of the same width as the base
4906 Variable *Index;
4907 if (RangeIndex->getType() != getPointerType()) {
4908 Index = makeReg(getPointerType());
4909 _movzx(Index, RangeIndex);
4910 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004911 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07004912 }
4913
4914 constexpr RelocOffsetT RelocOffset = 0;
4915 constexpr bool SuppressMangling = true;
Andrew Scull86df4e92015-07-30 13:54:44 -07004916 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
4917 Constant *Base = Ctx->getConstantSym(
4918 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()),
4919 SuppressMangling);
Andrew Scull87f80c12015-07-20 10:19:16 -07004920 Constant *Offset = nullptr;
4921 uint16_t Shift = typeWidthInBytesLog2(getPointerType());
4922 // TODO(ascull): remove need for legalize by allowing null base in memop
Andrew Scull86df4e92015-07-30 13:54:44 -07004923 auto *TargetInMemory = Traits::X86OperandMem::create(
Andrew Scull97f460d2015-07-21 10:07:42 -07004924 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
Andrew Scull87f80c12015-07-20 10:19:16 -07004925 Variable *Target = nullptr;
Andrew Scull86df4e92015-07-30 13:54:44 -07004926 _mov(Target, TargetInMemory);
4927 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07004928
Andrew Scull86df4e92015-07-30 13:54:44 -07004929 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07004930 Context.insert(SkipJumpTable);
4931 return;
4932 }
4933 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07004934 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004935 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07004936 if (!DoneCmp) {
4937 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07004938 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07004939 }
4940 _br(Traits::Cond::Br_e, Case.getTarget());
4941 } else if (DoneCmp && Case.isPairRange()) {
4942 // Range of two items with first item aleady compared against
4943 _br(Traits::Cond::Br_e, Case.getTarget());
4944 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
4945 _cmp(Comparison, Value);
4946 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004947 } else {
4948 // Range
4949 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07004950 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07004951 }
Andrew Scull86df4e92015-07-30 13:54:44 -07004952 if (DefaultTarget != nullptr)
4953 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07004954 return;
4955 }
4956 }
4957}
4958
4959template <class Machine>
4960void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004961 // Group cases together and navigate through them with a binary search
4962 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4963 Operand *Src0 = Inst->getComparison();
Andrew Scull86df4e92015-07-30 13:54:44 -07004964 CfgNode *DefaultTarget = Inst->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07004965
4966 assert(CaseClusters.size() != 0); // Should always be at least one
4967
John Porto1d235422015-08-12 12:37:53 -07004968 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Andrew Scull87f80c12015-07-20 10:19:16 -07004969 Src0 = legalize(Src0); // get Base/Index into physical registers
4970 Operand *Src0Lo = loOperand(Src0);
4971 Operand *Src0Hi = hiOperand(Src0);
4972 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4973 // TODO(ascull): handle 64-bit case properly (currently naive version)
4974 // This might be handled by a higher level lowering of switches.
4975 SizeT NumCases = Inst->getNumCases();
4976 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004977 Src0Lo = legalizeToReg(Src0Lo);
4978 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07004979 } else {
4980 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4981 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4982 }
4983 for (SizeT I = 0; I < NumCases; ++I) {
4984 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4985 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4986 typename Traits::Insts::Label *Label =
4987 Traits::Insts::Label::create(Func, this);
4988 _cmp(Src0Lo, ValueLo);
4989 _br(Traits::Cond::Br_ne, Label);
4990 _cmp(Src0Hi, ValueHi);
4991 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4992 Context.insert(Label);
4993 }
4994 _br(Inst->getLabelDefault());
4995 return;
4996 } else {
4997 // All the values are 32-bit so just check the operand is too and then
4998 // fall through to the 32-bit implementation. This is a common case.
4999 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
5000 Constant *Zero = Ctx->getConstantInt32(0);
5001 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07005002 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07005003 Src0 = Src0Lo;
5004 }
John Porto7e93c622015-06-23 10:58:57 -07005005 }
5006
Andrew Scull87f80c12015-07-20 10:19:16 -07005007 // 32-bit lowering
5008
5009 if (CaseClusters.size() == 1) {
5010 // Jump straight to default if needed. Currently a common case as jump
5011 // tables occur on their own.
5012 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07005013 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07005014 return;
5015 }
5016
5017 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07005018 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07005019
5020 // A span is over the clusters
5021 struct SearchSpan {
5022 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label)
5023 : Begin(Begin), Size(Size), Label(Label) {}
5024
5025 SizeT Begin;
5026 SizeT Size;
5027 typename Traits::Insts::Label *Label;
5028 };
Andrew Scull8447bba2015-07-23 11:41:18 -07005029 // The stack will only grow to the height of the tree so 12 should be plenty
5030 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07005031 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
5032 bool DoneCmp = false;
5033
5034 while (!SearchSpanStack.empty()) {
5035 SearchSpan Span = SearchSpanStack.top();
5036 SearchSpanStack.pop();
5037
5038 if (Span.Label != nullptr)
5039 Context.insert(Span.Label);
5040
5041 switch (Span.Size) {
5042 case 0:
5043 llvm::report_fatal_error("Invalid SearchSpan size");
5044 break;
5045
5046 case 1:
5047 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07005048 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07005049 DoneCmp = false;
5050 break;
5051
Andrew Scull86df4e92015-07-30 13:54:44 -07005052 case 2: {
5053 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
5054 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
5055
5056 // Placing a range last may allow register clobbering during the range
5057 // test. That means there is no need to clone the register. If it is a
5058 // unit range the comparison may have already been done in the binary
5059 // search (DoneCmp) and so it should be placed first. If this is a range
5060 // of two items and the comparison with the low value has already been
5061 // done, comparing with the other element is cheaper than a range test.
5062 // If the low end of the range is zero then there is no subtraction and
5063 // nothing to be gained.
5064 if (!CaseA->isUnitRange() &&
5065 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
5066 std::swap(CaseA, CaseB);
5067 DoneCmp = false;
5068 }
5069
5070 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07005071 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07005072 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
5073 SearchSpanStack.empty() ? nullptr : DefaultTarget);
5074 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07005075
5076 default:
5077 // Pick the middle item and branch b or ae
5078 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
5079 const CaseCluster &Pivot = CaseClusters[PivotIndex];
5080 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07005081 typename Traits::Insts::Label *Label =
5082 Traits::Insts::Label::create(Func, this);
5083 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07005084 // TODO(ascull): does it alway have to be far?
5085 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07005086 // Lower the left and (pivot+right) sides, falling through to the right
5087 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
5088 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
5089 DoneCmp = true;
5090 break;
5091 }
5092 }
5093
Andrew Scull86df4e92015-07-30 13:54:44 -07005094 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07005095}
5096
5097template <class Machine>
5098void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
5099 Variable *Dest, Operand *Src0,
5100 Operand *Src1) {
5101 assert(isVectorType(Dest->getType()));
5102 Type Ty = Dest->getType();
5103 Type ElementTy = typeElementType(Ty);
5104 SizeT NumElements = typeNumElements(Ty);
5105
5106 Operand *T = Ctx->getConstantUndef(Ty);
5107 for (SizeT I = 0; I < NumElements; ++I) {
5108 Constant *Index = Ctx->getConstantInt32(I);
5109
5110 // Extract the next two inputs.
John Porto5aeed952015-07-21 13:39:09 -07005111 Variable *Op0 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07005112 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
John Porto5aeed952015-07-21 13:39:09 -07005113 Variable *Op1 = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07005114 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
5115
5116 // Perform the arithmetic as a scalar operation.
John Porto5aeed952015-07-21 13:39:09 -07005117 Variable *Res = Func->makeVariable(ElementTy);
John Porto7e93c622015-06-23 10:58:57 -07005118 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
5119
5120 // Insert the result into position.
John Porto5aeed952015-07-21 13:39:09 -07005121 Variable *DestT = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -07005122 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
5123 T = DestT;
5124 }
5125
5126 lowerAssign(InstAssign::create(Func, Dest, T));
5127}
5128
Andrew Scull9612d322015-07-06 14:53:25 -07005129/// The following pattern occurs often in lowered C and C++ code:
5130///
5131/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
5132/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
5133///
5134/// We can eliminate the sext operation by copying the result of pcmpeqd,
Andrew Scull57e12682015-09-16 11:30:19 -07005135/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
5136/// sext operation.
John Porto7e93c622015-06-23 10:58:57 -07005137template <class Machine>
5138void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
5139 Variable *SignExtendedResult) {
5140 if (InstCast *NextCast =
5141 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
5142 if (NextCast->getCastKind() == InstCast::Sext &&
5143 NextCast->getSrc(0) == SignExtendedResult) {
5144 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07005145 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07005146 // Skip over the instruction.
5147 Context.advanceNext();
5148 }
5149 }
5150}
5151
5152template <class Machine>
5153void TargetX86Base<Machine>::lowerUnreachable(
5154 const InstUnreachable * /*Inst*/) {
5155 _ud2();
5156}
5157
5158template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005159void TargetX86Base<Machine>::lowerRMW(
5160 const typename Traits::Insts::FakeRMW *RMW) {
Andrew Scull57e12682015-09-16 11:30:19 -07005161 // If the beacon variable's live range does not end in this instruction, then
5162 // it must end in the modified Store instruction that follows. This means
5163 // that the original Store instruction is still there, either because the
5164 // value being stored is used beyond the Store instruction, or because dead
5165 // code elimination did not happen. In either case, we cancel RMW lowering
5166 // (and the caller deletes the RMW instruction).
John Porto7e93c622015-06-23 10:58:57 -07005167 if (!RMW->isLastUse(RMW->getBeacon()))
5168 return;
5169 Operand *Src = RMW->getData();
5170 Type Ty = Src->getType();
John Porto921856d2015-07-07 11:56:26 -07005171 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005172 doMockBoundsCheck(Addr);
John Porto1d235422015-08-12 12:37:53 -07005173 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07005174 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07005175 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
5176 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
John Porto921856d2015-07-07 11:56:26 -07005177 typename Traits::X86OperandMem *AddrLo =
5178 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
5179 typename Traits::X86OperandMem *AddrHi =
5180 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07005181 switch (RMW->getOp()) {
5182 default:
5183 // TODO(stichnot): Implement other arithmetic operators.
5184 break;
5185 case InstArithmetic::Add:
5186 _add_rmw(AddrLo, SrcLo);
5187 _adc_rmw(AddrHi, SrcHi);
5188 return;
5189 case InstArithmetic::Sub:
5190 _sub_rmw(AddrLo, SrcLo);
5191 _sbb_rmw(AddrHi, SrcHi);
5192 return;
5193 case InstArithmetic::And:
5194 _and_rmw(AddrLo, SrcLo);
5195 _and_rmw(AddrHi, SrcHi);
5196 return;
5197 case InstArithmetic::Or:
5198 _or_rmw(AddrLo, SrcLo);
5199 _or_rmw(AddrHi, SrcHi);
5200 return;
5201 case InstArithmetic::Xor:
5202 _xor_rmw(AddrLo, SrcLo);
5203 _xor_rmw(AddrHi, SrcHi);
5204 return;
5205 }
5206 } else {
John Porto1d235422015-08-12 12:37:53 -07005207 // x86-32: i8, i16, i32
5208 // x86-64: i8, i16, i32, i64
John Porto7e93c622015-06-23 10:58:57 -07005209 switch (RMW->getOp()) {
5210 default:
5211 // TODO(stichnot): Implement other arithmetic operators.
5212 break;
5213 case InstArithmetic::Add:
5214 Src = legalize(Src, Legal_Reg | Legal_Imm);
5215 _add_rmw(Addr, Src);
5216 return;
5217 case InstArithmetic::Sub:
5218 Src = legalize(Src, Legal_Reg | Legal_Imm);
5219 _sub_rmw(Addr, Src);
5220 return;
5221 case InstArithmetic::And:
5222 Src = legalize(Src, Legal_Reg | Legal_Imm);
5223 _and_rmw(Addr, Src);
5224 return;
5225 case InstArithmetic::Or:
5226 Src = legalize(Src, Legal_Reg | Legal_Imm);
5227 _or_rmw(Addr, Src);
5228 return;
5229 case InstArithmetic::Xor:
5230 Src = legalize(Src, Legal_Reg | Legal_Imm);
5231 _xor_rmw(Addr, Src);
5232 return;
5233 }
5234 }
5235 llvm::report_fatal_error("Couldn't lower RMW instruction");
5236}
5237
5238template <class Machine>
5239void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
John Porto921856d2015-07-07 11:56:26 -07005240 if (const auto *RMW =
5241 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07005242 lowerRMW(RMW);
5243 } else {
5244 TargetLowering::lowerOther(Instr);
5245 }
5246}
5247
Andrew Scull57e12682015-09-16 11:30:19 -07005248/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5249/// integrity of liveness analysis. Undef values are also turned into zeroes,
5250/// since loOperand() and hiOperand() don't expect Undef input.
John Porto7e93c622015-06-23 10:58:57 -07005251template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
John Porto1d235422015-08-12 12:37:53 -07005252 if (Traits::Is64Bit) {
5253 // On x86-64 we don't need to prelower phis -- the architecture can handle
5254 // 64-bit integer natively.
5255 return;
5256 }
5257
Andrew Scull57e12682015-09-16 11:30:19 -07005258 // Pause constant blinding or pooling, blinding or pooling will be done later
5259 // during phi lowering assignments
John Porto7e93c622015-06-23 10:58:57 -07005260 BoolFlagSaver B(RandomizationPoolingPaused, true);
Jan Voung53483692015-07-16 10:47:46 -07005261 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
5262 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07005263}
5264
Andrew Scull57e12682015-09-16 11:30:19 -07005265// There is no support for loading or emitting vector constants, so the vector
5266// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
5267// initialized with register operations.
John Porto7e93c622015-06-23 10:58:57 -07005268//
Andrew Scull57e12682015-09-16 11:30:19 -07005269// TODO(wala): Add limited support for vector constants so that complex
5270// initialization in registers is unnecessary.
John Porto7e93c622015-06-23 10:58:57 -07005271
5272template <class Machine>
5273Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
5274 Variable *Reg = makeReg(Ty, RegNum);
Andrew Scull57e12682015-09-16 11:30:19 -07005275 // Insert a FakeDef, since otherwise the live range of Reg might be
5276 // overestimated.
John Porto7e93c622015-06-23 10:58:57 -07005277 Context.insert(InstFakeDef::create(Func, Reg));
5278 _pxor(Reg, Reg);
5279 return Reg;
5280}
5281
5282template <class Machine>
5283Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
5284 int32_t RegNum) {
5285 Variable *MinusOnes = makeReg(Ty, RegNum);
5286 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
5287 Context.insert(InstFakeDef::create(Func, MinusOnes));
5288 _pcmpeq(MinusOnes, MinusOnes);
5289 return MinusOnes;
5290}
5291
5292template <class Machine>
5293Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
5294 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
5295 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
5296 _psub(Dest, MinusOne);
5297 return Dest;
5298}
5299
5300template <class Machine>
5301Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
5302 int32_t RegNum) {
5303 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5304 Ty == IceType_v16i8);
5305 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5306 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5307 SizeT Shift =
5308 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
5309 _psll(Reg, Ctx->getConstantInt8(Shift));
5310 return Reg;
5311 } else {
5312 // SSE has no left shift operation for vectors of 8 bit integers.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07005313 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
John Porto7e93c622015-06-23 10:58:57 -07005314 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
5315 Variable *Reg = makeReg(Ty, RegNum);
5316 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5317 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5318 return Reg;
5319 }
5320}
5321
Andrew Scull57e12682015-09-16 11:30:19 -07005322/// Construct a mask in a register that can be and'ed with a floating-point
5323/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
5324/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
5325/// ones logically right shifted one bit.
5326// TODO(stichnot): Fix the wala
5327// TODO: above, to represent vector constants in memory.
John Porto7e93c622015-06-23 10:58:57 -07005328template <class Machine>
5329Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
5330 int32_t RegNum) {
5331 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
5332 _psrl(Reg, Ctx->getConstantInt8(1));
5333 return Reg;
5334}
5335
5336template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005337typename TargetX86Base<Machine>::Traits::X86OperandMem *
John Porto7e93c622015-06-23 10:58:57 -07005338TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
5339 uint32_t Offset) {
5340 // Ensure that Loc is a stack slot.
Andrew Scull11c9a322015-08-28 14:24:14 -07005341 assert(Slot->mustNotHaveReg());
John Porto7e93c622015-06-23 10:58:57 -07005342 assert(Slot->getRegNum() == Variable::NoRegister);
5343 // Compute the location of Loc in memory.
Andrew Scull57e12682015-09-16 11:30:19 -07005344 // TODO(wala,stichnot): lea should not
5345 // be required. The address of the stack slot is known at compile time
5346 // (although not until after addProlog()).
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07005347 constexpr Type PointerType = IceType_i32;
John Porto7e93c622015-06-23 10:58:57 -07005348 Variable *Loc = makeReg(PointerType);
5349 _lea(Loc, Slot);
5350 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto921856d2015-07-07 11:56:26 -07005351 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07005352}
5353
Andrew Scull9612d322015-07-06 14:53:25 -07005354/// Helper for legalize() to emit the right code to lower an operand to a
5355/// register of the appropriate type.
John Porto7e93c622015-06-23 10:58:57 -07005356template <class Machine>
5357Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5358 Type Ty = Src->getType();
5359 Variable *Reg = makeReg(Ty, RegNum);
5360 if (isVectorType(Ty)) {
5361 _movp(Reg, Src);
5362 } else {
5363 _mov(Reg, Src);
5364 }
5365 return Reg;
5366}
5367
5368template <class Machine>
5369Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
5370 int32_t RegNum) {
5371 Type Ty = From->getType();
Andrew Scull57e12682015-09-16 11:30:19 -07005372 // Assert that a physical register is allowed. To date, all calls to
5373 // legalize() allow a physical register. If a physical register needs to be
5374 // explicitly disallowed, then new code will need to be written to force a
5375 // spill.
John Porto7e93c622015-06-23 10:58:57 -07005376 assert(Allowed & Legal_Reg);
Andrew Scull57e12682015-09-16 11:30:19 -07005377 // If we're asking for a specific physical register, make sure we're not
5378 // allowing any other operand kinds. (This could be future work, e.g. allow
5379 // the shl shift amount to be either an immediate or in ecx.)
John Porto7e93c622015-06-23 10:58:57 -07005380 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
5381
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07005382 // Substitute with an available infinite-weight variable if possible. Only do
5383 // this when we are not asking for a specific register, and when the
5384 // substitution is not locked to a specific register, and when the types
5385 // match, in order to capture the vast majority of opportunities and avoid
5386 // corner cases in the lowering.
5387 if (RegNum == Variable::NoRegister) {
5388 if (Variable *Subst = getContext().availabilityGet(From)) {
5389 // At this point we know there is a potential substitution available.
5390 if (Subst->mustHaveReg() && !Subst->hasReg()) {
5391 // At this point we know the substitution will have a register.
5392 if (From->getType() == Subst->getType()) {
5393 // At this point we know the substitution's register is compatible.
5394 return Subst;
5395 }
5396 }
5397 }
5398 }
5399
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07005400 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07005401 // Before doing anything with a Mem operand, we need to ensure that the
5402 // Base and Index components are in physical registers.
John Porto7e93c622015-06-23 10:58:57 -07005403 Variable *Base = Mem->getBase();
5404 Variable *Index = Mem->getIndex();
5405 Variable *RegBase = nullptr;
5406 Variable *RegIndex = nullptr;
5407 if (Base) {
Andrew Scull97f460d2015-07-21 10:07:42 -07005408 RegBase = legalizeToReg(Base);
John Porto7e93c622015-06-23 10:58:57 -07005409 }
5410 if (Index) {
Andrew Scull97f460d2015-07-21 10:07:42 -07005411 RegIndex = legalizeToReg(Index);
John Porto7e93c622015-06-23 10:58:57 -07005412 }
5413 if (Base != RegBase || Index != RegIndex) {
John Porto921856d2015-07-07 11:56:26 -07005414 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
5415 RegIndex, Mem->getShift(),
5416 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005417 }
5418
5419 // For all Memory Operands, we do randomization/pooling here
5420 From = randomizeOrPoolImmediate(Mem);
5421
5422 if (!(Allowed & Legal_Mem)) {
5423 From = copyToReg(From, RegNum);
5424 }
5425 return From;
5426 }
5427 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
5428 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07005429 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07005430 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07005431 return From;
5432 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07005433 }
5434 // There should be no constants of vector type (other than undef).
5435 assert(!isVectorType(Ty));
5436
John Porto1d235422015-08-12 12:37:53 -07005437 // If the operand is a 64 bit constant integer we need to legalize it to a
5438 // register in x86-64.
5439 if (Traits::Is64Bit) {
5440 if (llvm::isa<ConstantInteger64>(Const)) {
5441 Variable *V = copyToReg(Const, RegNum);
John Porto1d235422015-08-12 12:37:53 -07005442 return V;
5443 }
5444 }
5445
Andrew Scull57e12682015-09-16 11:30:19 -07005446 // If the operand is an 32 bit constant integer, we should check whether we
5447 // need to randomize it or pool it.
John Porto7e93c622015-06-23 10:58:57 -07005448 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
5449 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
5450 if (NewConst != Const) {
5451 return NewConst;
5452 }
5453 }
5454
Andrew Scull57e12682015-09-16 11:30:19 -07005455 // Convert a scalar floating point constant into an explicit memory
5456 // operand.
John Porto7e93c622015-06-23 10:58:57 -07005457 if (isScalarFloatingType(Ty)) {
5458 Variable *Base = nullptr;
5459 std::string Buffer;
5460 llvm::raw_string_ostream StrBuf(Buffer);
Jim Stichnothb36757e2015-10-05 13:55:11 -07005461 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
John Porto7e93c622015-06-23 10:58:57 -07005462 llvm::cast<Constant>(From)->setShouldBePooled(true);
5463 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
John Porto921856d2015-07-07 11:56:26 -07005464 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07005465 }
5466 bool NeedsReg = false;
5467 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
5468 // Immediate specifically not allowed
5469 NeedsReg = true;
5470 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5471 // On x86, FP constants are lowered to mem operands.
5472 NeedsReg = true;
5473 if (NeedsReg) {
5474 From = copyToReg(From, RegNum);
5475 }
5476 return From;
5477 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07005478 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07005479 // Check if the variable is guaranteed a physical register. This can happen
5480 // either when the variable is pre-colored or when it is assigned infinite
5481 // weight.
Andrew Scull11c9a322015-08-28 14:24:14 -07005482 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
John Porto7e93c622015-06-23 10:58:57 -07005483 // We need a new physical register for the operand if:
5484 // Mem is not allowed and Var isn't guaranteed a physical
5485 // register, or
5486 // RegNum is required and Var->getRegNum() doesn't match.
5487 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5488 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5489 From = copyToReg(From, RegNum);
5490 }
5491 return From;
5492 }
5493 llvm_unreachable("Unhandled operand kind in legalize()");
5494 return From;
5495}
5496
Andrew Scull9612d322015-07-06 14:53:25 -07005497/// Provide a trivial wrapper to legalize() for this common usage.
John Porto7e93c622015-06-23 10:58:57 -07005498template <class Machine>
Andrew Scull97f460d2015-07-21 10:07:42 -07005499Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07005500 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5501}
5502
Jan Voungfbdd2442015-07-15 12:36:20 -07005503/// Legalize undef values to concrete values.
5504template <class Machine>
5505Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {
5506 Type Ty = From->getType();
5507 if (llvm::isa<ConstantUndef>(From)) {
5508 // Lower undefs to zero. Another option is to lower undefs to an
Andrew Scull57e12682015-09-16 11:30:19 -07005509 // uninitialized register; however, using an uninitialized register results
5510 // in less predictable code.
Jan Voungfbdd2442015-07-15 12:36:20 -07005511 //
Andrew Scull57e12682015-09-16 11:30:19 -07005512 // If in the future the implementation is changed to lower undef values to
5513 // uninitialized registers, a FakeDef will be needed:
Jan Voungfbdd2442015-07-15 12:36:20 -07005514 // Context.insert(InstFakeDef::create(Func, Reg));
5515 // This is in order to ensure that the live range of Reg is not
Andrew Scull57e12682015-09-16 11:30:19 -07005516 // overestimated. If the constant being lowered is a 64 bit value, then
5517 // the result should be split and the lo and hi components will need to go
5518 // in uninitialized registers.
Jan Voungfbdd2442015-07-15 12:36:20 -07005519 if (isVectorType(Ty))
5520 return makeVectorOfZeros(Ty, RegNum);
5521 return Ctx->getConstantZero(Ty);
5522 }
5523 return From;
5524}
5525
Andrew Scull57e12682015-09-16 11:30:19 -07005526/// For the cmp instruction, if Src1 is an immediate, or known to be a physical
5527/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
5528/// copied into a physical register. (Actually, either Src0 or Src1 can be
5529/// chosen for the physical register, but unfortunately we have to commit to one
5530/// or the other before register allocation.)
John Porto7e93c622015-06-23 10:58:57 -07005531template <class Machine>
5532Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5533 Operand *Src1) {
5534 bool IsSrc1ImmOrReg = false;
5535 if (llvm::isa<Constant>(Src1)) {
5536 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07005537 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07005538 if (Var->hasReg())
5539 IsSrc1ImmOrReg = true;
5540 }
5541 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5542}
5543
5544template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005545typename TargetX86Base<Machine>::Traits::X86OperandMem *
5546TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
5547 bool DoLegalize) {
5548 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
5549 // It may be the case that address mode optimization already creates an
5550 // Traits::X86OperandMem, so in that case it wouldn't need another level of
5551 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07005552 if (!Mem) {
5553 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5554 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5555 assert(Base || Offset);
5556 if (Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07005557 // During memory operand building, we do not blind or pool the constant
5558 // offset, we will work on the whole memory operand later as one entity
5559 // later, this save one instruction. By turning blinding and pooling off,
5560 // we guarantee legalize(Offset) will return a Constant*.
John Porto7e93c622015-06-23 10:58:57 -07005561 {
5562 BoolFlagSaver B(RandomizationPoolingPaused, true);
5563
5564 Offset = llvm::cast<Constant>(legalize(Offset));
5565 }
5566
5567 assert(llvm::isa<ConstantInteger32>(Offset) ||
5568 llvm::isa<ConstantRelocatable>(Offset));
5569 }
John Porto921856d2015-07-07 11:56:26 -07005570 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07005571 }
Andrew Scull57e12682015-09-16 11:30:19 -07005572 // Do legalization, which contains randomization/pooling or do
5573 // randomization/pooling.
John Porto921856d2015-07-07 11:56:26 -07005574 return llvm::cast<typename Traits::X86OperandMem>(
John Porto7e93c622015-06-23 10:58:57 -07005575 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5576}
5577
5578template <class Machine>
5579Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5580 // There aren't any 64-bit integer registers for x86-32.
John Porto1d235422015-08-12 12:37:53 -07005581 assert(Traits::Is64Bit || Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07005582 Variable *Reg = Func->makeVariable(Type);
John Porto7e93c622015-06-23 10:58:57 -07005583 if (RegNum == Variable::NoRegister)
Andrew Scull11c9a322015-08-28 14:24:14 -07005584 Reg->setMustHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07005585 else
5586 Reg->setRegNum(RegNum);
5587 return Reg;
5588}
5589
Andrew Scullcfa628b2015-08-20 14:23:05 -07005590template <class Machine>
5591const Type TargetX86Base<Machine>::TypeForSize[] = {
5592 IceType_i8, IceType_i16, IceType_i32,
5593 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8};
5594template <class Machine>
5595Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size,
5596 uint32_t MaxSize) {
5597 assert(Size != 0);
5598 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
5599 uint32_t MaxIndex = MaxSize == NoSizeLimit
5600 ? llvm::array_lengthof(TypeForSize) - 1
5601 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
5602 return TypeForSize[std::min(TyIndex, MaxIndex)];
5603}
5604
5605template <class Machine>
5606Type TargetX86Base<Machine>::firstTypeThatFitsSize(uint32_t Size,
5607 uint32_t MaxSize) {
5608 assert(Size != 0);
5609 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
5610 if (!llvm::isPowerOf2_32(Size))
5611 ++TyIndex;
5612 uint32_t MaxIndex = MaxSize == NoSizeLimit
5613 ? llvm::array_lengthof(TypeForSize) - 1
5614 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
5615 return TypeForSize[std::min(TyIndex, MaxIndex)];
5616}
5617
John Porto7e93c622015-06-23 10:58:57 -07005618template <class Machine> void TargetX86Base<Machine>::postLower() {
5619 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5620 return;
Jim Stichnoth230d4102015-09-25 17:40:32 -07005621 markRedefinitions();
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07005622 Context.availabilityUpdate();
John Porto7e93c622015-06-23 10:58:57 -07005623}
5624
5625template <class Machine>
5626void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5627 llvm::SmallVectorImpl<int32_t> &Permutation,
Qining Luaee5fa82015-08-20 14:59:03 -07005628 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
John Porto921856d2015-07-07 11:56:26 -07005629 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
Qining Luaee5fa82015-08-20 14:59:03 -07005630 ExcludeRegisters, Salt);
John Porto7e93c622015-06-23 10:58:57 -07005631}
5632
5633template <class Machine>
5634void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005635 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005636 return;
5637 Ostream &Str = Ctx->getStrEmit();
5638 Str << getConstantPrefix() << C->getValue();
5639}
5640
5641template <class Machine>
John Porto1d235422015-08-12 12:37:53 -07005642void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
5643 if (!Traits::Is64Bit) {
5644 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5645 } else {
5646 if (!BuildDefs::dump())
5647 return;
5648 Ostream &Str = Ctx->getStrEmit();
5649 Str << getConstantPrefix() << C->getValue();
5650 }
John Porto7e93c622015-06-23 10:58:57 -07005651}
5652
5653template <class Machine>
5654void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005655 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005656 return;
5657 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothb36757e2015-10-05 13:55:11 -07005658 C->emitPoolLabel(Str, Ctx);
John Porto7e93c622015-06-23 10:58:57 -07005659}
5660
5661template <class Machine>
5662void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005663 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005664 return;
5665 Ostream &Str = Ctx->getStrEmit();
Jim Stichnothb36757e2015-10-05 13:55:11 -07005666 C->emitPoolLabel(Str, Ctx);
John Porto7e93c622015-06-23 10:58:57 -07005667}
5668
5669template <class Machine>
5670void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5671 llvm::report_fatal_error("undef value encountered by emitter.");
5672}
5673
Andrew Scull9612d322015-07-06 14:53:25 -07005674/// Randomize or pool an Immediate.
John Porto7e93c622015-06-23 10:58:57 -07005675template <class Machine>
5676Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5677 int32_t RegNum) {
5678 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5679 llvm::isa<ConstantRelocatable>(Immediate));
5680 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5681 RandomizationPoolingPaused == true) {
5682 // Immediates randomization/pooling off or paused
5683 return Immediate;
5684 }
5685 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
5686 Ctx->statsUpdateRPImms();
5687 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5688 RPI_Randomize) {
5689 // blind the constant
5690 // FROM:
5691 // imm
5692 // TO:
5693 // insert: mov imm+cookie, Reg
5694 // insert: lea -cookie[Reg], Reg
5695 // => Reg
5696 // If we have already assigned a phy register, we must come from
Andrew Scull57e12682015-09-16 11:30:19 -07005697 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
5698 // assigned register as this assignment is that start of its use-def
5699 // chain. So we add RegNum argument here. Note we use 'lea' instruction
5700 // instead of 'xor' to avoid affecting the flags.
John Porto7e93c622015-06-23 10:58:57 -07005701 Variable *Reg = makeReg(IceType_i32, RegNum);
5702 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5703 uint32_t Value = Integer->getValue();
Qining Luaee5fa82015-08-20 14:59:03 -07005704 uint32_t Cookie = Func->getConstantBlindingCookie();
John Porto7e93c622015-06-23 10:58:57 -07005705 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5706 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
John Porto921856d2015-07-07 11:56:26 -07005707 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5708 nullptr, 0));
John Porto7e93c622015-06-23 10:58:57 -07005709 if (Immediate->getType() != IceType_i32) {
5710 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5711 _mov(TruncReg, Reg);
5712 return TruncReg;
5713 }
5714 return Reg;
5715 }
5716 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5717 // pool the constant
5718 // FROM:
5719 // imm
5720 // TO:
5721 // insert: mov $label, Reg
5722 // => Reg
5723 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5724 Immediate->setShouldBePooled(true);
5725 // if we have already assigned a phy register, we must come from
Andrew Scull57e12682015-09-16 11:30:19 -07005726 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
5727 // assigned register as this assignment is that start of its use-def
John Porto7e93c622015-06-23 10:58:57 -07005728 // chain. So we add RegNum argument here.
5729 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5730 IceString Label;
5731 llvm::raw_string_ostream Label_stream(Label);
Jim Stichnothb36757e2015-10-05 13:55:11 -07005732 Immediate->emitPoolLabel(Label_stream, Ctx);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07005733 constexpr RelocOffsetT Offset = 0;
5734 constexpr bool SuppressMangling = true;
John Porto7e93c622015-06-23 10:58:57 -07005735 Constant *Symbol =
5736 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005737 typename Traits::X86OperandMem *MemOperand =
5738 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5739 Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005740 _mov(Reg, MemOperand);
5741 return Reg;
5742 }
5743 assert("Unsupported -randomize-pool-immediates option" && false);
5744 }
5745 // the constant Immediate is not eligible for blinding/pooling
5746 return Immediate;
5747}
5748
5749template <class Machine>
John Porto921856d2015-07-07 11:56:26 -07005750typename TargetX86Base<Machine>::Traits::X86OperandMem *
5751TargetX86Base<Machine>::randomizeOrPoolImmediate(
5752 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07005753 assert(MemOperand);
5754 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5755 RandomizationPoolingPaused == true) {
5756 // immediates randomization/pooling is turned off
5757 return MemOperand;
5758 }
5759
Andrew Scull57e12682015-09-16 11:30:19 -07005760 // If this memory operand is already a randomized one, we do not randomize it
5761 // again.
John Porto7e93c622015-06-23 10:58:57 -07005762 if (MemOperand->getRandomized())
5763 return MemOperand;
5764
5765 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5766 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5767 // The offset of this mem operand should be blinded or pooled
5768 Ctx->statsUpdateRPImms();
5769 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5770 RPI_Randomize) {
5771 // blind the constant offset
5772 // FROM:
5773 // offset[base, index, shift]
5774 // TO:
5775 // insert: lea offset+cookie[base], RegTemp
5776 // => -cookie[RegTemp, index, shift]
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005777 uint32_t Value =
5778 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5779 ->getValue();
Qining Luaee5fa82015-08-20 14:59:03 -07005780 uint32_t Cookie = Func->getConstantBlindingCookie();
John Porto7e93c622015-06-23 10:58:57 -07005781 Constant *Mask1 = Ctx->getConstantInt(
5782 MemOperand->getOffset()->getType(), Cookie + Value);
5783 Constant *Mask2 =
5784 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5785
John Porto921856d2015-07-07 11:56:26 -07005786 typename Traits::X86OperandMem *TempMemOperand =
5787 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5788 MemOperand->getBase(), Mask1);
John Porto7e93c622015-06-23 10:58:57 -07005789 // If we have already assigned a physical register, we must come from
5790 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
John Porto1d235422015-08-12 12:37:53 -07005791 // the assigned register as this assignment is that start of its
5792 // use-def chain. So we add RegNum argument here.
John Porto7e93c622015-06-23 10:58:57 -07005793 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5794 _lea(RegTemp, TempMemOperand);
John Porto7e93c622015-06-23 10:58:57 -07005795
John Porto921856d2015-07-07 11:56:26 -07005796 typename Traits::X86OperandMem *NewMemOperand =
5797 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5798 Mask2, MemOperand->getIndex(),
5799 MemOperand->getShift(),
5800 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005801
Jan Voungfbdd2442015-07-15 12:36:20 -07005802 // Label this memory operand as randomized, so we won't randomize it
5803 // again in case we call legalize() multiple times on this memory
John Porto7e93c622015-06-23 10:58:57 -07005804 // operand.
5805 NewMemOperand->setRandomized(true);
5806 return NewMemOperand;
5807 }
5808 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5809 // pool the constant offset
5810 // FROM:
5811 // offset[base, index, shift]
5812 // TO:
5813 // insert: mov $label, RegTemp
5814 // insert: lea [base, RegTemp], RegTemp
5815 // =>[RegTemp, index, shift]
5816 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5817 RPI_Pool);
Andrew Scull57e12682015-09-16 11:30:19 -07005818 // Memory operand should never exist as source operands in phi lowering
5819 // assignments, so there is no need to reuse any registers here. For
5820 // phi lowering, we should not ask for new physical registers in
5821 // general. However, if we do meet Memory Operand during phi lowering,
5822 // we should not blind or pool the immediates for now.
John Porto7e93c622015-06-23 10:58:57 -07005823 if (RegNum != Variable::NoRegister)
5824 return MemOperand;
5825 Variable *RegTemp = makeReg(IceType_i32);
5826 IceString Label;
5827 llvm::raw_string_ostream Label_stream(Label);
Jim Stichnothb36757e2015-10-05 13:55:11 -07005828 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);
John Porto7e93c622015-06-23 10:58:57 -07005829 MemOperand->getOffset()->setShouldBePooled(true);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07005830 constexpr RelocOffsetT SymOffset = 0;
5831 constexpr bool SuppressMangling = true;
John Porto7e93c622015-06-23 10:58:57 -07005832 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5833 SuppressMangling);
John Porto921856d2015-07-07 11:56:26 -07005834 typename Traits::X86OperandMem *SymbolOperand =
5835 Traits::X86OperandMem::create(
5836 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
John Porto7e93c622015-06-23 10:58:57 -07005837 _mov(RegTemp, SymbolOperand);
5838 // If we have a base variable here, we should add the lea instruction
5839 // to add the value of the base variable to RegTemp. If there is no
5840 // base variable, we won't need this lea instruction.
5841 if (MemOperand->getBase()) {
John Porto921856d2015-07-07 11:56:26 -07005842 typename Traits::X86OperandMem *CalculateOperand =
5843 Traits::X86OperandMem::create(
5844 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5845 RegTemp, 0, MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005846 _lea(RegTemp, CalculateOperand);
John Porto7e93c622015-06-23 10:58:57 -07005847 }
John Porto921856d2015-07-07 11:56:26 -07005848 typename Traits::X86OperandMem *NewMemOperand =
5849 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5850 nullptr, MemOperand->getIndex(),
5851 MemOperand->getShift(),
5852 MemOperand->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07005853 return NewMemOperand;
5854 }
5855 assert("Unsupported -randomize-pool-immediates option" && false);
5856 }
5857 }
5858 // the offset is not eligible for blinding or pooling, return the original
5859 // mem operand
5860 return MemOperand;
5861}
5862
5863} // end of namespace X86Internal
5864} // end of namespace Ice
5865
5866#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H