blob: 4927e170598d69eded4aa851733b30ae89b114c8 [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
Jim Stichnoth92a6e5b2015-12-02 16:52:44 -080011/// \brief Implements the TargetLoweringX86Base class, which consists almost
Andrew Scull57e12682015-09-16 11:30:19 -070012/// entirely of the lowering sequence for each high-level instruction.
Andrew Scull9612d322015-07-06 14:53:25 -070013///
John Porto7e93c622015-06-23 10:58:57 -070014//===----------------------------------------------------------------------===//
15
16#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18
John Porto7e93c622015-06-23 10:58:57 -070019#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceClFlags.h"
22#include "IceDefs.h"
23#include "IceELFObjectWriter.h"
24#include "IceGlobalInits.h"
John Portoec3f5652015-08-31 15:07:09 -070025#include "IceInstVarIter.h"
John Porto7e93c622015-06-23 10:58:57 -070026#include "IceLiveness.h"
27#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070028#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070029#include "IceUtils.h"
John Porto4a566862016-01-04 09:33:41 -080030#include "IceInstX86Base.h"
John Porto67f8de92015-06-25 10:14:17 -070031#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070032
Andrew Scull87f80c12015-07-20 10:19:16 -070033#include <stack>
34
John Porto7e93c622015-06-23 10:58:57 -070035namespace Ice {
David Sehr6b80cf12016-01-21 23:16:58 -080036namespace X86 {
37template <typename T> struct PoolTypeConverter {};
38
39template <> struct PoolTypeConverter<float> {
40 using PrimitiveIntType = uint32_t;
41 using IceType = ConstantFloat;
42 static const Type Ty = IceType_f32;
43 static const char *TypeName;
44 static const char *AsmTag;
45 static const char *PrintfString;
46};
47
48template <> struct PoolTypeConverter<double> {
49 using PrimitiveIntType = uint64_t;
50 using IceType = ConstantDouble;
51 static const Type Ty = IceType_f64;
52 static const char *TypeName;
53 static const char *AsmTag;
54 static const char *PrintfString;
55};
56
57// Add converter for int type constant pooling
58template <> struct PoolTypeConverter<uint32_t> {
59 using PrimitiveIntType = uint32_t;
60 using IceType = ConstantInteger32;
61 static const Type Ty = IceType_i32;
62 static const char *TypeName;
63 static const char *AsmTag;
64 static const char *PrintfString;
65};
66
67// Add converter for int type constant pooling
68template <> struct PoolTypeConverter<uint16_t> {
69 using PrimitiveIntType = uint32_t;
70 using IceType = ConstantInteger32;
71 static const Type Ty = IceType_i16;
72 static const char *TypeName;
73 static const char *AsmTag;
74 static const char *PrintfString;
75};
76
77// Add converter for int type constant pooling
78template <> struct PoolTypeConverter<uint8_t> {
79 using PrimitiveIntType = uint32_t;
80 using IceType = ConstantInteger32;
81 static const Type Ty = IceType_i8;
82 static const char *TypeName;
83 static const char *AsmTag;
84 static const char *PrintfString;
85};
86} // end of namespace X86
87
John Porto4a566862016-01-04 09:33:41 -080088namespace X86NAMESPACE {
John Porto7e93c622015-06-23 10:58:57 -070089
Eric Holkd6cf6b32016-02-17 11:09:48 -080090using Utils::BoolFlagSaver;
John Porto7e93c622015-06-23 10:58:57 -070091
John Porto4a566862016-01-04 09:33:41 -080092template <typename Traits> class BoolFoldingEntry {
John Porto7e93c622015-06-23 10:58:57 -070093 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
94
95public:
96 BoolFoldingEntry() = default;
97 explicit BoolFoldingEntry(Inst *I);
98 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -070099 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -0700100 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -0700101 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -0700102 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -0700103 /// IsLiveOut is initialized conservatively to true, and is set to false when
Andrew Scull57e12682015-09-16 11:30:19 -0700104 /// we encounter an instruction that ends Var's live range. We disable the
105 /// folding optimization when Var is live beyond this basic block. Note that
Andrew Scull9612d322015-07-06 14:53:25 -0700106 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
107 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -0700108 bool IsLiveOut = true;
109 // NumUses counts the number of times Var is used as a source operand in the
Andrew Scull57e12682015-09-16 11:30:19 -0700110 // basic block. If IsComplex is true and there is more than one use of Var,
John Porto7e93c622015-06-23 10:58:57 -0700111 // then the folding optimization is disabled for Var.
112 uint32_t NumUses = 0;
113};
114
John Porto4a566862016-01-04 09:33:41 -0800115template <typename Traits> class BoolFolding {
John Porto7e93c622015-06-23 10:58:57 -0700116public:
117 enum BoolFoldingProducerKind {
118 PK_None,
John Porto1d235422015-08-12 12:37:53 -0700119 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
John Porto7e93c622015-06-23 10:58:57 -0700120 PK_Icmp32,
121 PK_Icmp64,
122 PK_Fcmp,
David Sehrdaf096c2015-11-11 10:56:58 -0800123 PK_Trunc,
John Porto7b3d9cb2015-11-11 14:26:57 -0800124 PK_Arith // A flag-setting arithmetic instruction.
John Porto7e93c622015-06-23 10:58:57 -0700125 };
126
Andrew Scull9612d322015-07-06 14:53:25 -0700127 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -0700128 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -0700129 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -0700130 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
131
132private:
133 BoolFolding(const BoolFolding &) = delete;
134 BoolFolding &operator=(const BoolFolding &) = delete;
135
136public:
137 BoolFolding() = default;
138 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
139 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
140 static bool hasComplexLowering(const Inst *Instr);
David Sehre3984282015-12-15 17:34:55 -0800141 static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
142 BoolFoldingConsumerKind ConsumerKind);
John Porto7e93c622015-06-23 10:58:57 -0700143 void init(CfgNode *Node);
144 const Inst *getProducerFor(const Operand *Opnd) const;
145 void dump(const Cfg *Func) const;
146
147private:
Andrew Scull9612d322015-07-06 14:53:25 -0700148 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700149 bool containsValid(SizeT VarNum) const {
150 auto Element = Producers.find(VarNum);
151 return Element != Producers.end() && Element->second.Instr != nullptr;
152 }
153 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Andrew Scull9612d322015-07-06 14:53:25 -0700154 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Portoe82b5602016-02-24 15:58:55 -0800155 CfgUnorderedMap<SizeT, BoolFoldingEntry<Traits>> Producers;
John Porto7e93c622015-06-23 10:58:57 -0700156};
157
John Porto4a566862016-01-04 09:33:41 -0800158template <typename Traits>
159BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I)
160 : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {}
John Porto7e93c622015-06-23 10:58:57 -0700161
John Porto4a566862016-01-04 09:33:41 -0800162template <typename Traits>
163typename BoolFolding<Traits>::BoolFoldingProducerKind
164BoolFolding<Traits>::getProducerKind(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700165 if (llvm::isa<InstIcmp>(Instr)) {
John Porto4a566862016-01-04 09:33:41 -0800166 if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -0700167 return PK_Icmp32;
David Sehrd9810252015-10-16 13:23:17 -0700168 return PK_Icmp64;
John Porto7e93c622015-06-23 10:58:57 -0700169 }
John Porto7e93c622015-06-23 10:58:57 -0700170 if (llvm::isa<InstFcmp>(Instr))
171 return PK_Fcmp;
David Sehrdaf096c2015-11-11 10:56:58 -0800172 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
John Porto4a566862016-01-04 09:33:41 -0800173 if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
David Sehrdaf096c2015-11-11 10:56:58 -0800174 switch (Arith->getOp()) {
175 default:
176 return PK_None;
177 case InstArithmetic::And:
178 case InstArithmetic::Or:
179 return PK_Arith;
180 }
181 }
182 }
183 return PK_None; // TODO(stichnot): remove this
184
John Porto7e93c622015-06-23 10:58:57 -0700185 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
186 switch (Cast->getCastKind()) {
187 default:
188 return PK_None;
189 case InstCast::Trunc:
190 return PK_Trunc;
191 }
192 }
193 return PK_None;
194}
195
John Porto4a566862016-01-04 09:33:41 -0800196template <typename Traits>
197typename BoolFolding<Traits>::BoolFoldingConsumerKind
198BoolFolding<Traits>::getConsumerKind(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700199 if (llvm::isa<InstBr>(Instr))
200 return CK_Br;
201 if (llvm::isa<InstSelect>(Instr))
202 return CK_Select;
203 return CK_None; // TODO(stichnot): remove this
204
205 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
206 switch (Cast->getCastKind()) {
207 default:
208 return CK_None;
209 case InstCast::Sext:
210 return CK_Sext;
211 case InstCast::Zext:
212 return CK_Zext;
213 }
214 }
215 return CK_None;
216}
217
John Porto921856d2015-07-07 11:56:26 -0700218/// Returns true if the producing instruction has a "complex" lowering sequence.
219/// This generally means that its lowering sequence requires more than one
220/// conditional branch, namely 64-bit integer compares and some floating-point
Andrew Scull57e12682015-09-16 11:30:19 -0700221/// compares. When this is true, and there is more than one consumer, we prefer
John Porto921856d2015-07-07 11:56:26 -0700222/// to disable the folding optimization because it minimizes branches.
John Porto4a566862016-01-04 09:33:41 -0800223template <typename Traits>
224bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700225 switch (getProducerKind(Instr)) {
226 default:
227 return false;
228 case PK_Icmp64:
John Porto4a566862016-01-04 09:33:41 -0800229 return !Traits::Is64Bit;
John Porto7e93c622015-06-23 10:58:57 -0700230 case PK_Fcmp:
John Porto4a566862016-01-04 09:33:41 -0800231 return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
232 Traits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700233 }
234}
235
John Porto4a566862016-01-04 09:33:41 -0800236template <typename Traits>
237bool BoolFolding<Traits>::isValidFolding(
238 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind,
239 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) {
David Sehre3984282015-12-15 17:34:55 -0800240 switch (ProducerKind) {
241 default:
242 return false;
243 case PK_Icmp32:
244 case PK_Icmp64:
245 case PK_Fcmp:
246 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
247 case PK_Arith:
248 return ConsumerKind == CK_Br;
249 }
250}
251
John Porto4a566862016-01-04 09:33:41 -0800252template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
John Porto7e93c622015-06-23 10:58:57 -0700253 Producers.clear();
254 for (Inst &Instr : Node->getInsts()) {
255 // Check whether Instr is a valid producer.
256 Variable *Var = Instr.getDest();
257 if (!Instr.isDeleted() // only consider non-deleted instructions
258 && Var // only instructions with an actual dest var
259 && Var->getType() == IceType_i1 // only bool-type dest vars
260 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
John Porto4a566862016-01-04 09:33:41 -0800261 Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
John Porto7e93c622015-06-23 10:58:57 -0700262 }
263 // Check each src variable against the map.
John Portoec3f5652015-08-31 15:07:09 -0700264 FOREACH_VAR_IN_INST(Var, Instr) {
265 SizeT VarNum = Var->getIndex();
David Sehre3984282015-12-15 17:34:55 -0800266 if (!containsValid(VarNum))
267 continue;
268 // All valid consumers use Var as the first source operand
269 if (IndexOfVarOperandInInst(Var) != 0) {
270 setInvalid(VarNum);
271 continue;
272 }
273 // Consumer instructions must be white-listed
John Porto4a566862016-01-04 09:33:41 -0800274 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind =
275 getConsumerKind(&Instr);
David Sehre3984282015-12-15 17:34:55 -0800276 if (ConsumerKind == CK_None) {
277 setInvalid(VarNum);
278 continue;
279 }
John Porto4a566862016-01-04 09:33:41 -0800280 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind =
281 getProducerKind(Producers[VarNum].Instr);
David Sehre3984282015-12-15 17:34:55 -0800282 if (!isValidFolding(ProducerKind, ConsumerKind)) {
283 setInvalid(VarNum);
284 continue;
285 }
286 // Avoid creating multiple copies of complex producer instructions.
287 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
288 setInvalid(VarNum);
289 continue;
290 }
291 ++Producers[VarNum].NumUses;
292 if (Instr.isLastUse(Var)) {
293 Producers[VarNum].IsLiveOut = false;
John Porto7e93c622015-06-23 10:58:57 -0700294 }
295 }
296 }
297 for (auto &I : Producers) {
298 // Ignore entries previously marked invalid.
299 if (I.second.Instr == nullptr)
300 continue;
301 // Disable the producer if its dest may be live beyond this block.
302 if (I.second.IsLiveOut) {
303 setInvalid(I.first);
304 continue;
305 }
Andrew Scull57e12682015-09-16 11:30:19 -0700306 // Mark as "dead" rather than outright deleting. This is so that other
John Porto921856d2015-07-07 11:56:26 -0700307 // peephole style optimizations during or before lowering have access to
Andrew Scull57e12682015-09-16 11:30:19 -0700308 // this instruction in undeleted form. See for example
John Porto921856d2015-07-07 11:56:26 -0700309 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700310 I.second.Instr->setDead();
311 }
312}
313
John Porto4a566862016-01-04 09:33:41 -0800314template <typename Traits>
315const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const {
John Porto7e93c622015-06-23 10:58:57 -0700316 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
317 if (Var == nullptr)
318 return nullptr;
319 SizeT VarNum = Var->getIndex();
320 auto Element = Producers.find(VarNum);
321 if (Element == Producers.end())
322 return nullptr;
323 return Element->second.Instr;
324}
325
John Porto4a566862016-01-04 09:33:41 -0800326template <typename Traits>
327void BoolFolding<Traits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700328 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700329 return;
330 OstreamLocker L(Func->getContext());
331 Ostream &Str = Func->getContext()->getStrDump();
332 for (auto &I : Producers) {
333 if (I.second.Instr == nullptr)
334 continue;
335 Str << "Found foldable producer:\n ";
336 I.second.Instr->dump(Func);
337 Str << "\n";
338 }
339}
340
John Porto4a566862016-01-04 09:33:41 -0800341template <typename TraitsType>
342void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) {
John Porto7e93c622015-06-23 10:58:57 -0700343 FoldingInfo.init(Node);
344 FoldingInfo.dump(Func);
345}
346
John Porto4a566862016-01-04 09:33:41 -0800347template <typename TraitsType>
348TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func)
John Portoac2388c2016-01-22 07:10:56 -0800349 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {
John Porto7e93c622015-06-23 10:58:57 -0700350 static_assert(
351 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
352 (TargetInstructionSet::X86InstructionSet_End -
353 TargetInstructionSet::X86InstructionSet_Begin),
354 "Traits::InstructionSet range different from TargetInstructionSet");
Karl Schimpfd4699942016-04-02 09:55:31 -0700355 if (getFlags().getTargetInstructionSet() !=
John Porto7e93c622015-06-23 10:58:57 -0700356 TargetInstructionSet::BaseInstructionSet) {
John Porto4a566862016-01-04 09:33:41 -0800357 InstructionSet = static_cast<InstructionSetEnum>(
Karl Schimpfd4699942016-04-02 09:55:31 -0700358 (getFlags().getTargetInstructionSet() -
John Porto7e93c622015-06-23 10:58:57 -0700359 TargetInstructionSet::X86InstructionSet_Begin) +
360 Traits::InstructionSet::Begin);
361 }
Jim Stichnoth94844f12015-11-04 16:06:16 -0800362}
363
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800364template <typename TraitsType>
Karl Schimpf5403f5d2016-01-15 11:07:46 -0800365void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) {
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800366 RegNumT::setLimit(Traits::RegisterSet::Reg_NUM);
Karl Schimpfd4699942016-04-02 09:55:31 -0700367 Traits::initRegisterSet(getFlags(), &TypeToRegisterSet, &RegisterAliases);
Jim Stichnothb40595a2016-01-29 06:14:31 -0800368 for (size_t i = 0; i < TypeToRegisterSet.size(); ++i)
369 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
Karl Schimpf5403f5d2016-01-15 11:07:46 -0800370 filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM,
371 TypeToRegisterSet.data(), TypeToRegisterSet.size(),
Jim Stichnoth2544d4d2016-01-22 13:07:46 -0800372 Traits::getRegName, getRegClassName);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800373 PcRelFixup = Traits::FK_PcRel;
Karl Schimpfd4699942016-04-02 09:55:31 -0700374 AbsFixup = getFlags().getUseNonsfi() ? Traits::FK_Gotoff : Traits::FK_Abs;
John Porto7e93c622015-06-23 10:58:57 -0700375}
376
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700377template <typename TraitsType>
378bool TargetX86Base<TraitsType>::shouldBePooled(const Constant *C) {
379 if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(C)) {
380 return !Utils::isPositiveZero(ConstFloat->getValue());
381 }
382 if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
383 return !Utils::isPositiveZero(ConstDouble->getValue());
384 }
Karl Schimpfd4699942016-04-02 09:55:31 -0700385 if (getFlags().getRandomizeAndPoolImmediatesOption() != RPI_Pool) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700386 return false;
387 }
388 return C->shouldBeRandomizedOrPooled();
389}
390
John Porto4a566862016-01-04 09:33:41 -0800391template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
John Porto7e93c622015-06-23 10:58:57 -0700392 TimerMarker T(TimerStack::TT_O2, Func);
393
John Portoac2388c2016-01-22 07:10:56 -0800394 if (SandboxingType != ST_None) {
395 initRebasePtr();
John Porto56958cb2016-01-14 09:18:18 -0800396 }
397
John Porto5e0a8a72015-11-20 13:50:36 -0800398 genTargetHelperCalls();
David Sehr26217e32015-11-26 13:03:50 -0800399 Func->dump("After target helper call insertion");
John Porto5e0a8a72015-11-20 13:50:36 -0800400
David Sehr4318a412015-11-11 15:01:55 -0800401 // Merge Alloca instructions, and lay out the stack.
402 static constexpr bool SortAndCombineAllocas = true;
403 Func->processAllocas(SortAndCombineAllocas);
404 Func->dump("After Alloca processing");
405
Karl Schimpfd4699942016-04-02 09:55:31 -0700406 if (!getFlags().getEnablePhiEdgeSplit()) {
John Porto7e93c622015-06-23 10:58:57 -0700407 // Lower Phi instructions.
408 Func->placePhiLoads();
409 if (Func->hasError())
410 return;
411 Func->placePhiStores();
412 if (Func->hasError())
413 return;
414 Func->deletePhis();
415 if (Func->hasError())
416 return;
417 Func->dump("After Phi lowering");
418 }
419
Andrew Scullaa6c1092015-09-03 17:50:30 -0700420 // Run this early so it can be used to focus optimizations on potentially hot
421 // code.
Andrew Scull57e12682015-09-16 11:30:19 -0700422 // TODO(stichnot,ascull): currently only used for regalloc not
423 // expensive high level optimizations which could be focused on potentially
424 // hot code.
Andrew Scullaa6c1092015-09-03 17:50:30 -0700425 Func->computeLoopNestDepth();
426 Func->dump("After loop nest depth analysis");
427
John Porto7e93c622015-06-23 10:58:57 -0700428 // Address mode optimization.
429 Func->getVMetadata()->init(VMK_SingleDefs);
430 Func->doAddressOpt();
431
Andrew Scull57e12682015-09-16 11:30:19 -0700432 // Find read-modify-write opportunities. Do this after address mode
John Porto7e93c622015-06-23 10:58:57 -0700433 // optimization so that doAddressOpt() doesn't need to be applied to RMW
434 // instructions as well.
435 findRMW();
436 Func->dump("After RMW transform");
437
438 // Argument lowering
439 Func->doArgLowering();
440
Andrew Scull57e12682015-09-16 11:30:19 -0700441 // Target lowering. This requires liveness analysis for some parts of the
442 // lowering decisions, such as compare/branch fusing. If non-lightweight
John Porto921856d2015-07-07 11:56:26 -0700443 // liveness analysis is used, the instructions need to be renumbered first
444 // TODO: This renumbering should only be necessary if we're actually
445 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700446 Func->renumberInstructions();
447 if (Func->hasError())
448 return;
449
Andrew Scull57e12682015-09-16 11:30:19 -0700450 // TODO: It should be sufficient to use the fastest liveness calculation,
451 // i.e. livenessLightweight(). However, for some reason that slows down the
452 // rest of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700453 Func->liveness(Liveness_Basic);
454 if (Func->hasError())
455 return;
456 Func->dump("After x86 address mode opt");
457
458 // Disable constant blinding or pooling for load optimization.
459 {
460 BoolFlagSaver B(RandomizationPoolingPaused, true);
461 doLoadOpt();
462 }
463 Func->genCode();
464 if (Func->hasError())
465 return;
John Portoac2388c2016-01-22 07:10:56 -0800466 if (SandboxingType != ST_None) {
467 initSandbox();
468 }
John Porto7e93c622015-06-23 10:58:57 -0700469 Func->dump("After x86 codegen");
470
Andrew Scullaa6c1092015-09-03 17:50:30 -0700471 // Register allocation. This requires instruction renumbering and full
472 // liveness analysis. Loops must be identified before liveness so variable
473 // use weights are correct.
John Porto7e93c622015-06-23 10:58:57 -0700474 Func->renumberInstructions();
475 if (Func->hasError())
476 return;
477 Func->liveness(Liveness_Intervals);
478 if (Func->hasError())
479 return;
Andrew Scull57e12682015-09-16 11:30:19 -0700480 // Validate the live range computations. The expensive validation call is
John Porto921856d2015-07-07 11:56:26 -0700481 // deliberately only made when assertions are enabled.
John Porto7e93c622015-06-23 10:58:57 -0700482 assert(Func->validateLiveness());
John Porto921856d2015-07-07 11:56:26 -0700483 // The post-codegen dump is done here, after liveness analysis and associated
484 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700485 Func->dump("After initial x8632 codegen");
486 Func->getVMetadata()->init(VMK_All);
487 regAlloc(RAK_Global);
488 if (Func->hasError())
489 return;
490 Func->dump("After linear scan regalloc");
491
Karl Schimpfd4699942016-04-02 09:55:31 -0700492 if (getFlags().getEnablePhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700493 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700494 Func->dump("After advanced Phi lowering");
495 }
496
497 // Stack frame mapping.
498 Func->genFrame();
499 if (Func->hasError())
500 return;
501 Func->dump("After stack frame mapping");
502
503 Func->contractEmptyNodes();
504 Func->reorderNodes();
505
Qining Lu969f6a32015-07-31 09:58:34 -0700506 // Shuffle basic block order if -reorder-basic-blocks is enabled.
507 Func->shuffleNodes();
508
Andrew Scull57e12682015-09-16 11:30:19 -0700509 // Branch optimization. This needs to be done just before code emission. In
John Porto921856d2015-07-07 11:56:26 -0700510 // particular, no transformations that insert or reorder CfgNodes should be
Andrew Scull57e12682015-09-16 11:30:19 -0700511 // done after branch optimization. We go ahead and do it before nop insertion
John Porto921856d2015-07-07 11:56:26 -0700512 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700513 Func->doBranchOpt();
514 Func->dump("After branch optimization");
515
Qining Lu969f6a32015-07-31 09:58:34 -0700516 // Nop insertion if -nop-insertion is enabled.
517 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700518
519 // Mark nodes that require sandbox alignment
John Porto56958cb2016-01-14 09:18:18 -0800520 if (NeedSandboxing) {
Andrew Scull86df4e92015-07-30 13:54:44 -0700521 Func->markNodesForSandboxing();
John Porto56958cb2016-01-14 09:18:18 -0800522 }
John Porto7e93c622015-06-23 10:58:57 -0700523}
524
John Porto4a566862016-01-04 09:33:41 -0800525template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() {
John Porto7e93c622015-06-23 10:58:57 -0700526 TimerMarker T(TimerStack::TT_Om1, Func);
527
John Portoac2388c2016-01-22 07:10:56 -0800528 if (SandboxingType != ST_None) {
529 initRebasePtr();
John Porto56958cb2016-01-14 09:18:18 -0800530 }
531
John Porto5e0a8a72015-11-20 13:50:36 -0800532 genTargetHelperCalls();
533
David Sehr4318a412015-11-11 15:01:55 -0800534 // Do not merge Alloca instructions, and lay out the stack.
535 static constexpr bool SortAndCombineAllocas = false;
536 Func->processAllocas(SortAndCombineAllocas);
537 Func->dump("After Alloca processing");
538
John Porto7e93c622015-06-23 10:58:57 -0700539 Func->placePhiLoads();
540 if (Func->hasError())
541 return;
542 Func->placePhiStores();
543 if (Func->hasError())
544 return;
545 Func->deletePhis();
546 if (Func->hasError())
547 return;
548 Func->dump("After Phi lowering");
549
550 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700551 Func->genCode();
552 if (Func->hasError())
553 return;
John Portoac2388c2016-01-22 07:10:56 -0800554 if (SandboxingType != ST_None) {
555 initSandbox();
556 }
John Porto7e93c622015-06-23 10:58:57 -0700557 Func->dump("After initial x8632 codegen");
558
559 regAlloc(RAK_InfOnly);
560 if (Func->hasError())
561 return;
562 Func->dump("After regalloc of infinite-weight variables");
563
564 Func->genFrame();
565 if (Func->hasError())
566 return;
567 Func->dump("After stack frame mapping");
568
Qining Lu969f6a32015-07-31 09:58:34 -0700569 // Shuffle basic block order if -reorder-basic-blocks is enabled.
570 Func->shuffleNodes();
571
572 // Nop insertion if -nop-insertion is enabled.
573 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700574
575 // Mark nodes that require sandbox alignment
John Porto56958cb2016-01-14 09:18:18 -0800576 if (NeedSandboxing)
Andrew Scull86df4e92015-07-30 13:54:44 -0700577 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700578}
579
John Porto5aeed952015-07-21 13:39:09 -0700580inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700581 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700582 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700583 if (isVectorType(Ty))
584 return false;
585 bool isI64 = Ty == IceType_i64;
586
587 switch (Arith->getOp()) {
588 // Not handled for lack of simple lowering:
589 // shift on i64
590 // mul, udiv, urem, sdiv, srem, frem
591 // Not handled for lack of RMW instructions:
592 // fadd, fsub, fmul, fdiv (also vector types)
593 default:
594 return false;
595 case InstArithmetic::Add:
596 case InstArithmetic::Sub:
597 case InstArithmetic::And:
598 case InstArithmetic::Or:
599 case InstArithmetic::Xor:
600 return true;
601 case InstArithmetic::Shl:
602 case InstArithmetic::Lshr:
603 case InstArithmetic::Ashr:
604 return false; // TODO(stichnot): implement
605 return !isI64;
606 }
607}
608
John Porto4a566862016-01-04 09:33:41 -0800609template <typename TraitsType>
John Porto7e93c622015-06-23 10:58:57 -0700610bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
611 if (A == B)
612 return true;
John Porto4a566862016-01-04 09:33:41 -0800613 if (auto *MemA =
614 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
615 A)) {
616 if (auto *MemB =
617 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
618 B)) {
John Porto7e93c622015-06-23 10:58:57 -0700619 return MemA->getBase() == MemB->getBase() &&
620 MemA->getOffset() == MemB->getOffset() &&
621 MemA->getIndex() == MemB->getIndex() &&
622 MemA->getShift() == MemB->getShift() &&
623 MemA->getSegmentRegister() == MemB->getSegmentRegister();
624 }
625 }
626 return false;
627}
628
John Porto4a566862016-01-04 09:33:41 -0800629template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
Jim Stichnothb88d8c82016-03-11 15:33:00 -0800630 TimerMarker _(TimerStack::TT_findRMW, Func);
John Porto7e93c622015-06-23 10:58:57 -0700631 Func->dump("Before RMW");
Andrew Scull00741a02015-09-16 19:04:09 -0700632 if (Func->isVerbose(IceV_RMW))
633 Func->getContext()->lockStr();
John Porto7e93c622015-06-23 10:58:57 -0700634 for (CfgNode *Node : Func->getNodes()) {
635 // Walk through the instructions, considering each sequence of 3
Andrew Scull57e12682015-09-16 11:30:19 -0700636 // instructions, and look for the particular RMW pattern. Note that this
637 // search can be "broken" (false negatives) if there are intervening
638 // deleted instructions, or intervening instructions that could be safely
639 // moved out of the way to reveal an RMW pattern.
John Porto7e93c622015-06-23 10:58:57 -0700640 auto E = Node->getInsts().end();
641 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
642 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
643 // Make I3 skip over deleted instructions.
644 while (I3 != E && I3->isDeleted())
645 ++I3;
646 if (I1 == E || I2 == E || I3 == E)
647 continue;
648 assert(!I1->isDeleted());
649 assert(!I2->isDeleted());
650 assert(!I3->isDeleted());
Andrew Scull00741a02015-09-16 19:04:09 -0700651 auto *Load = llvm::dyn_cast<InstLoad>(I1);
652 auto *Arith = llvm::dyn_cast<InstArithmetic>(I2);
653 auto *Store = llvm::dyn_cast<InstStore>(I3);
654 if (!Load || !Arith || !Store)
655 continue;
656 // Look for:
657 // a = Load addr
658 // b = <op> a, other
659 // Store b, addr
660 // Change to:
661 // a = Load addr
662 // b = <op> a, other
663 // x = FakeDef
664 // RMW <op>, addr, other, x
665 // b = Store b, addr, x
Jim Stichnoth230d4102015-09-25 17:40:32 -0700666 // Note that inferTwoAddress() makes sure setDestRedefined() gets called
667 // on the updated Store instruction, to avoid liveness problems later.
Andrew Scull00741a02015-09-16 19:04:09 -0700668 //
669 // With this transformation, the Store instruction acquires a Dest
670 // variable and is now subject to dead code elimination if there are no
Jim Stichnoth230d4102015-09-25 17:40:32 -0700671 // more uses of "b". Variable "x" is a beacon for determining whether the
672 // Store instruction gets dead-code eliminated. If the Store instruction
673 // is eliminated, then it must be the case that the RMW instruction ends
674 // x's live range, and therefore the RMW instruction will be retained and
675 // later lowered. On the other hand, if the RMW instruction does not end
676 // x's live range, then the Store instruction must still be present, and
677 // therefore the RMW instruction is ignored during lowering because it is
678 // redundant with the Store instruction.
Andrew Scull00741a02015-09-16 19:04:09 -0700679 //
680 // Note that if "a" has further uses, the RMW transformation may still
681 // trigger, resulting in two loads and one store, which is worse than the
682 // original one load and one store. However, this is probably rare, and
683 // caching probably keeps it just as fast.
John Porto4a566862016-01-04 09:33:41 -0800684 if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(),
685 Store->getAddr()))
Andrew Scull00741a02015-09-16 19:04:09 -0700686 continue;
687 Operand *ArithSrcFromLoad = Arith->getSrc(0);
688 Operand *ArithSrcOther = Arith->getSrc(1);
689 if (ArithSrcFromLoad != Load->getDest()) {
690 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
691 continue;
692 std::swap(ArithSrcFromLoad, ArithSrcOther);
John Porto7e93c622015-06-23 10:58:57 -0700693 }
Andrew Scull00741a02015-09-16 19:04:09 -0700694 if (Arith->getDest() != Store->getData())
695 continue;
696 if (!canRMW(Arith))
697 continue;
698 if (Func->isVerbose(IceV_RMW)) {
699 Ostream &Str = Func->getContext()->getStrDump();
700 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
701 Load->dump(Func);
702 Str << "\n ";
703 Arith->dump(Func);
704 Str << "\n ";
705 Store->dump(Func);
706 Str << "\n";
707 }
708 Variable *Beacon = Func->makeVariable(IceType_i32);
709 Beacon->setMustNotHaveReg();
710 Store->setRmwBeacon(Beacon);
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800711 auto *BeaconDef = InstFakeDef::create(Func, Beacon);
Andrew Scull00741a02015-09-16 19:04:09 -0700712 Node->getInsts().insert(I3, BeaconDef);
John Porto4a566862016-01-04 09:33:41 -0800713 auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(),
714 Beacon, Arith->getOp());
Andrew Scull00741a02015-09-16 19:04:09 -0700715 Node->getInsts().insert(I3, RMW);
John Porto7e93c622015-06-23 10:58:57 -0700716 }
717 }
Andrew Scull00741a02015-09-16 19:04:09 -0700718 if (Func->isVerbose(IceV_RMW))
719 Func->getContext()->unlockStr();
John Porto7e93c622015-06-23 10:58:57 -0700720}
721
722// Converts a ConstantInteger32 operand into its constant value, or
723// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700724inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700725 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
John Porto7e93c622015-06-23 10:58:57 -0700726 return Integer->getValue();
727 return Intrinsics::MemoryOrderInvalid;
728}
729
Andrew Scull57e12682015-09-16 11:30:19 -0700730/// Determines whether the dest of a Load instruction can be folded into one of
731/// the src operands of a 2-operand instruction. This is true as long as the
732/// load dest matches exactly one of the binary instruction's src operands.
733/// Replaces Src0 or Src1 with LoadSrc if the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700734inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
735 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700736 if (Src0 == LoadDest && Src1 != LoadDest) {
737 Src0 = LoadSrc;
738 return true;
739 }
740 if (Src0 != LoadDest && Src1 == LoadDest) {
741 Src1 = LoadSrc;
742 return true;
743 }
744 return false;
745}
746
John Porto4a566862016-01-04 09:33:41 -0800747template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
Jim Stichnothb88d8c82016-03-11 15:33:00 -0800748 TimerMarker _(TimerStack::TT_loadOpt, Func);
John Porto7e93c622015-06-23 10:58:57 -0700749 for (CfgNode *Node : Func->getNodes()) {
750 Context.init(Node);
751 while (!Context.atEnd()) {
752 Variable *LoadDest = nullptr;
753 Operand *LoadSrc = nullptr;
754 Inst *CurInst = Context.getCur();
755 Inst *Next = Context.getNextInst();
Andrew Scull57e12682015-09-16 11:30:19 -0700756 // Determine whether the current instruction is a Load instruction or
757 // equivalent.
John Porto7e93c622015-06-23 10:58:57 -0700758 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
759 // An InstLoad always qualifies.
760 LoadDest = Load->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700761 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700762 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
763 LoadDest->getType(), DoLegalize);
764 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700765 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
766 // ordering, and can be implemented in a single instruction (i.e., not
767 // i64 on x86-32).
John Porto7e93c622015-06-23 10:58:57 -0700768 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
769 if (ID == Intrinsics::AtomicLoad &&
John Porto1d235422015-08-12 12:37:53 -0700770 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
John Porto7e93c622015-06-23 10:58:57 -0700771 Intrinsics::isMemoryOrderValid(
772 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
773 LoadDest = Intrin->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700774 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700775 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
776 DoLegalize);
777 }
778 }
Andrew Scull57e12682015-09-16 11:30:19 -0700779 // A Load instruction can be folded into the following instruction only
780 // if the following instruction ends the Load's Dest variable's live
781 // range.
John Porto7e93c622015-06-23 10:58:57 -0700782 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
783 assert(LoadSrc);
784 Inst *NewInst = nullptr;
785 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
786 Operand *Src0 = Arith->getSrc(0);
787 Operand *Src1 = Arith->getSrc(1);
788 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
789 NewInst = InstArithmetic::create(Func, Arith->getOp(),
790 Arith->getDest(), Src0, Src1);
791 }
792 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
793 Operand *Src0 = Icmp->getSrc(0);
794 Operand *Src1 = Icmp->getSrc(1);
795 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
796 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
797 Icmp->getDest(), Src0, Src1);
798 }
799 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
800 Operand *Src0 = Fcmp->getSrc(0);
801 Operand *Src1 = Fcmp->getSrc(1);
802 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
803 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
804 Fcmp->getDest(), Src0, Src1);
805 }
806 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
807 Operand *Src0 = Select->getTrueOperand();
808 Operand *Src1 = Select->getFalseOperand();
809 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
810 NewInst = InstSelect::create(Func, Select->getDest(),
811 Select->getCondition(), Src0, Src1);
812 }
813 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700814 // The load dest can always be folded into a Cast instruction.
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800815 auto *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
John Porto7e93c622015-06-23 10:58:57 -0700816 if (Src0 == LoadDest) {
817 NewInst = InstCast::create(Func, Cast->getCastKind(),
818 Cast->getDest(), LoadSrc);
819 }
820 }
821 if (NewInst) {
822 CurInst->setDeleted();
823 Next->setDeleted();
824 Context.insert(NewInst);
Andrew Scull57e12682015-09-16 11:30:19 -0700825 // Update NewInst->LiveRangesEnded so that target lowering may
826 // benefit. Also update NewInst->HasSideEffects.
John Porto7e93c622015-06-23 10:58:57 -0700827 NewInst->spliceLivenessInfo(Next, CurInst);
828 }
829 }
830 Context.advanceCur();
831 Context.advanceNext();
832 }
833 }
834 Func->dump("After load optimization");
835}
836
John Porto4a566862016-01-04 09:33:41 -0800837template <typename TraitsType>
838bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
839 if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700840 return Br->optimizeBranch(NextNode);
841 }
842 return false;
843}
844
John Porto4a566862016-01-04 09:33:41 -0800845template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800846Variable *TargetX86Base<TraitsType>::getPhysicalRegister(RegNumT RegNum,
John Porto4a566862016-01-04 09:33:41 -0800847 Type Ty) {
John Porto7e93c622015-06-23 10:58:57 -0700848 if (Ty == IceType_void)
849 Ty = IceType_i32;
850 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700851 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800852 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
John Porto7e93c622015-06-23 10:58:57 -0700853 Variable *Reg = PhysicalRegisters[Ty][RegNum];
854 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700855 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700856 Reg->setRegNum(RegNum);
857 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth69660552015-09-18 06:41:02 -0700858 // Specially mark a named physical register as an "argument" so that it is
859 // considered live upon function entry. Otherwise it's possible to get
860 // liveness validation errors for saving callee-save registers.
861 Func->addImplicitArg(Reg);
862 // Don't bother tracking the live range of a named physical register.
863 Reg->setIgnoreLiveness();
John Porto7e93c622015-06-23 10:58:57 -0700864 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800865 assert(Traits::getGprForType(Ty, RegNum) == RegNum);
John Porto7e93c622015-06-23 10:58:57 -0700866 return Reg;
867}
868
John Porto4a566862016-01-04 09:33:41 -0800869template <typename TraitsType>
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700870const char *TargetX86Base<TraitsType>::getRegName(RegNumT RegNum,
871 Type Ty) const {
John Porto008f4ce2015-12-24 13:22:18 -0800872 return Traits::getRegName(Traits::getGprForType(Ty, RegNum));
John Porto7e93c622015-06-23 10:58:57 -0700873}
874
John Porto4a566862016-01-04 09:33:41 -0800875template <typename TraitsType>
876void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700877 if (!BuildDefs::dump())
878 return;
John Porto7e93c622015-06-23 10:58:57 -0700879 Ostream &Str = Ctx->getStrEmit();
880 if (Var->hasReg()) {
John Porto56958cb2016-01-14 09:18:18 -0800881 const bool Is64BitSandboxing = Traits::Is64Bit && NeedSandboxing;
882 const Type VarType = (Var->isRematerializable() && Is64BitSandboxing)
883 ? IceType_i64
884 : Var->getType();
885 Str << "%" << getRegName(Var->getRegNum(), VarType);
John Porto7e93c622015-06-23 10:58:57 -0700886 return;
887 }
Andrew Scull11c9a322015-08-28 14:24:14 -0700888 if (Var->mustHaveReg()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700889 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
Jim Stichnoth45bec542016-02-05 10:26:09 -0800890 ") has no register assigned - function " +
891 Func->getFunctionName());
John Porto7e93c622015-06-23 10:58:57 -0700892 }
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700893 const int32_t Offset = Var->getStackOffset();
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800894 auto BaseRegNum = Var->getBaseRegNum();
Reed Kotler5fa0a5f2016-02-15 20:01:24 -0800895 if (BaseRegNum.hasNoValue())
Jan Voung28068ad2015-07-31 12:58:46 -0700896 BaseRegNum = getFrameOrStackReg();
David Sehr26217e32015-11-26 13:03:50 -0800897 // Print in the form "Offset(%reg)", taking care that:
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700898 // - Offset is never printed when it is 0
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700899
Karl Schimpfd4699942016-04-02 09:55:31 -0700900 const bool DecorateAsm = getFlags().getDecorateAsm();
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700901 // Only print Offset when it is nonzero, regardless of DecorateAsm.
902 if (Offset) {
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700903 if (DecorateAsm) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700904 Str << Var->getSymbolicStackOffset();
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700905 } else {
906 Str << Offset;
907 }
908 }
John Porto1d235422015-08-12 12:37:53 -0700909 const Type FrameSPTy = Traits::WordType;
Jan Voung28068ad2015-07-31 12:58:46 -0700910 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
John Porto7e93c622015-06-23 10:58:57 -0700911}
912
John Porto4a566862016-01-04 09:33:41 -0800913template <typename TraitsType>
914typename TargetX86Base<TraitsType>::X86Address
915TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const {
John Porto7e93c622015-06-23 10:58:57 -0700916 if (Var->hasReg())
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800917 llvm::report_fatal_error("Stack Variable has a register assigned");
Andrew Scull11c9a322015-08-28 14:24:14 -0700918 if (Var->mustHaveReg()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700919 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
Jim Stichnoth45bec542016-02-05 10:26:09 -0800920 ") has no register assigned - function " +
921 Func->getFunctionName());
John Porto7e93c622015-06-23 10:58:57 -0700922 }
923 int32_t Offset = Var->getStackOffset();
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800924 auto BaseRegNum = Var->getBaseRegNum();
Reed Kotler5fa0a5f2016-02-15 20:01:24 -0800925 if (Var->getBaseRegNum().hasNoValue())
Jan Voung28068ad2015-07-31 12:58:46 -0700926 BaseRegNum = getFrameOrStackReg();
John Porto4a566862016-01-04 09:33:41 -0800927 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset,
928 AssemblerFixup::NoFixup);
John Porto7e93c622015-06-23 10:58:57 -0700929}
930
David Sehrb9a404d2016-01-21 08:09:27 -0800931template <typename TraitsType>
932void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
933 // Stack frame layout:
934 //
935 // +------------------------+
936 // | 1. return address |
937 // +------------------------+
938 // | 2. preserved registers |
939 // +------------------------+
940 // | 3. padding |
941 // +------------------------+
942 // | 4. global spill area |
943 // +------------------------+
944 // | 5. padding |
945 // +------------------------+
946 // | 6. local spill area |
947 // +------------------------+
948 // | 7. padding |
949 // +------------------------+
950 // | 8. allocas |
951 // +------------------------+
952 // | 9. padding |
953 // +------------------------+
954 // | 10. out args |
955 // +------------------------+ <--- StackPointer
956 //
957 // The following variables record the size in bytes of the given areas:
958 // * X86_RET_IP_SIZE_BYTES: area 1
959 // * PreservedRegsSizeBytes: area 2
960 // * SpillAreaPaddingBytes: area 3
961 // * GlobalsSize: area 4
962 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
963 // * LocalsSpillAreaSize: area 6
964 // * SpillAreaSizeBytes: areas 3 - 10
965 // * maxOutArgsSizeBytes(): area 10
966
967 // Determine stack frame offsets for each Variable without a register
968 // assignment. This can be done as one variable per stack slot. Or, do
969 // coalescing by running the register allocator again with an infinite set of
970 // registers (as a side effect, this gives variables a second chance at
971 // physical register assignment).
972 //
973 // A middle ground approach is to leverage sparsity and allocate one block of
974 // space on the frame for globals (variables with multi-block lifetime), and
975 // one block to share for locals (single-block lifetime).
976
977 Context.init(Node);
978 Context.setInsertPoint(Context.getCur());
979
John Portoe82b5602016-02-24 15:58:55 -0800980 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
981 RegsUsed = SmallBitVector(CalleeSaves.size());
David Sehrb9a404d2016-01-21 08:09:27 -0800982 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
983 size_t GlobalsSize = 0;
984 // If there is a separate locals area, this represents that area. Otherwise
985 // it counts any variable not counted by GlobalsSize.
986 SpillAreaSizeBytes = 0;
987 // If there is a separate locals area, this specifies the alignment for it.
988 uint32_t LocalsSlotsAlignmentBytes = 0;
989 // The entire spill locations area gets aligned to largest natural alignment
990 // of the variables that have a spill slot.
991 uint32_t SpillAreaAlignmentBytes = 0;
992 // A spill slot linked to a variable with a stack slot should reuse that
993 // stack slot.
994 std::function<bool(Variable *)> TargetVarHook =
995 [&VariablesLinkedToSpillSlots](Variable *Var) {
996 if (auto *SpillVar =
997 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
998 assert(Var->mustNotHaveReg());
999 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
1000 VariablesLinkedToSpillSlots.push_back(Var);
1001 return true;
1002 }
1003 }
1004 return false;
1005 };
1006
1007 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1008 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1009 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1010 &LocalsSlotsAlignmentBytes, TargetVarHook);
1011 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1012 SpillAreaSizeBytes += GlobalsSize;
1013
1014 // Add push instructions for preserved registers.
1015 uint32_t NumCallee = 0;
1016 size_t PreservedRegsSizeBytes = 0;
John Portoe82b5602016-02-24 15:58:55 -08001017 SmallBitVector Pushed(CalleeSaves.size());
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001018 for (RegNumT i : RegNumBVIter(CalleeSaves)) {
1019 const auto Canonical = Traits::getBaseReg(i);
David Sehrb9a404d2016-01-21 08:09:27 -08001020 assert(Canonical == Traits::getBaseReg(Canonical));
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001021 if (RegsUsed[i]) {
David Sehrb9a404d2016-01-21 08:09:27 -08001022 Pushed[Canonical] = true;
1023 }
1024 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001025 for (RegNumT RegNum : RegNumBVIter(Pushed)) {
1026 assert(RegNum == Traits::getBaseReg(RegNum));
David Sehrb9a404d2016-01-21 08:09:27 -08001027 ++NumCallee;
1028 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001029 _push_reg(getPhysicalRegister(RegNum, Traits::WordType));
David Sehrb9a404d2016-01-21 08:09:27 -08001030 }
1031 Ctx->statsUpdateRegistersSaved(NumCallee);
1032
1033 // Generate "push frameptr; mov frameptr, stackptr"
1034 if (IsEbpBasedFrame) {
1035 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
1036 .count() == 0);
1037 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
1038 _link_bp();
1039 }
1040
1041 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1042 // after the preserved registers and before the spill areas.
1043 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1044 // locals area if they are separate.
1045 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
1046 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1047 uint32_t SpillAreaPaddingBytes = 0;
1048 uint32_t LocalsSlotsPaddingBytes = 0;
1049 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
1050 SpillAreaAlignmentBytes, GlobalsSize,
1051 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1052 &LocalsSlotsPaddingBytes);
1053 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1054 uint32_t GlobalsAndSubsequentPaddingSize =
1055 GlobalsSize + LocalsSlotsPaddingBytes;
1056
1057 // Functions returning scalar floating point types may need to convert values
1058 // from an in-register xmm value to the top of the x87 floating point stack.
1059 // This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch
1060 // space on the stack for this.
1061 const Type ReturnType = Func->getReturnType();
1062 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1063 if (isScalarFloatingType(ReturnType)) {
1064 // Avoid misaligned double-precicion load/store.
1065 NeedsStackAlignment = true;
1066 SpillAreaSizeBytes =
1067 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
1068 }
1069 }
1070
1071 // Align esp if necessary.
1072 if (NeedsStackAlignment) {
1073 uint32_t StackOffset =
1074 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1075 uint32_t StackSize =
1076 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1077 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
1078 SpillAreaSizeBytes = StackSize - StackOffset;
1079 } else {
1080 SpillAreaSizeBytes += maxOutArgsSizeBytes();
1081 }
1082
1083 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
1084 // fixed allocations in the prolog.
1085 if (PrologEmitsFixedAllocas)
1086 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1087 if (SpillAreaSizeBytes) {
1088 // Generate "sub stackptr, SpillAreaSizeBytes"
1089 _sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1090 // If the fixed allocas are aligned more than the stack frame, align the
1091 // stack pointer accordingly.
1092 if (PrologEmitsFixedAllocas &&
1093 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
1094 assert(IsEbpBasedFrame);
1095 _and(getPhysicalRegister(getStackReg(), Traits::WordType),
1096 Ctx->getConstantInt32(-FixedAllocaAlignBytes));
1097 }
1098 }
1099
1100 // Account for known-frame-offset alloca instructions that were not already
1101 // combined into the prolog.
1102 if (!PrologEmitsFixedAllocas)
1103 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1104
1105 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1106
1107 // Fill in stack offsets for stack args, and copy args into registers for
1108 // those that were register-allocated. Args are pushed right to left, so
1109 // Arg[0] is closest to the stack/frame pointer.
1110 Variable *FramePtr =
1111 getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
1112 size_t BasicFrameOffset =
1113 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1114 if (!IsEbpBasedFrame)
1115 BasicFrameOffset += SpillAreaSizeBytes;
1116
1117 emitGetIP(Node);
1118
1119 const VarList &Args = Func->getArgs();
1120 size_t InArgsSizeBytes = 0;
1121 unsigned NumXmmArgs = 0;
1122 unsigned NumGPRArgs = 0;
1123 for (Variable *Arg : Args) {
1124 // Skip arguments passed in registers.
1125 if (isVectorType(Arg->getType())) {
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001126 if (Traits::getRegisterForXmmArgNum(NumXmmArgs).hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001127 ++NumXmmArgs;
1128 continue;
1129 }
1130 } else if (isScalarFloatingType(Arg->getType())) {
1131 if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001132 Traits::getRegisterForXmmArgNum(NumXmmArgs).hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001133 ++NumXmmArgs;
1134 continue;
1135 }
1136 } else {
1137 assert(isScalarIntegerType(Arg->getType()));
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001138 if (Traits::getRegisterForGprArgNum(Traits::WordType, NumGPRArgs)
1139 .hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001140 ++NumGPRArgs;
1141 continue;
1142 }
1143 }
1144 // For esp-based frames where the allocas are done outside the prolog, the
1145 // esp value may not stabilize to its home value until after all the
1146 // fixed-size alloca instructions have executed. In this case, a stack
1147 // adjustment is needed when accessing in-args in order to copy them into
1148 // registers.
1149 size_t StackAdjBytes = 0;
1150 if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
1151 StackAdjBytes -= FixedAllocaSizeBytes;
1152 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
1153 InArgsSizeBytes);
1154 }
1155
1156 // Fill in stack offsets for locals.
1157 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1158 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1159 IsEbpBasedFrame);
1160 // Assign stack offsets to variables that have been linked to spilled
1161 // variables.
1162 for (Variable *Var : VariablesLinkedToSpillSlots) {
1163 Variable *Linked =
1164 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
1165 Var->setStackOffset(Linked->getStackOffset());
1166 }
1167 this->HasComputedFrame = true;
1168
1169 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1170 OstreamLocker L(Func->getContext());
1171 Ostream &Str = Func->getContext()->getStrDump();
1172
1173 Str << "Stack layout:\n";
1174 uint32_t EspAdjustmentPaddingSize =
1175 SpillAreaSizeBytes - LocalsSpillAreaSize -
1176 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1177 maxOutArgsSizeBytes();
1178 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1179 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1180 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1181 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1182 << " globals spill area = " << GlobalsSize << " bytes\n"
1183 << " globals-locals spill areas intermediate padding = "
1184 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1185 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1186 << " esp alignment padding = " << EspAdjustmentPaddingSize
1187 << " bytes\n";
1188
1189 Str << "Stack details:\n"
1190 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1191 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1192 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
1193 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1194 << " bytes\n"
1195 << " is ebp based = " << IsEbpBasedFrame << "\n";
1196 }
1197}
1198
Andrew Scull9612d322015-07-06 14:53:25 -07001199/// Helper function for addProlog().
1200///
Andrew Scull57e12682015-09-16 11:30:19 -07001201/// This assumes Arg is an argument passed on the stack. This sets the frame
1202/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1203/// I64 arg that has been split into Lo and Hi components, it calls itself
1204/// recursively on the components, taking care to handle Lo first because of the
1205/// little-endian architecture. Lastly, this function generates an instruction
1206/// to copy Arg into its assigned register if applicable.
John Porto4a566862016-01-04 09:33:41 -08001207template <typename TraitsType>
1208void TargetX86Base<TraitsType>::finishArgumentLowering(
1209 Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset,
1210 size_t StackAdjBytes, size_t &InArgsSizeBytes) {
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001211 if (!Traits::Is64Bit) {
1212 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1213 Variable *Lo = Arg64On32->getLo();
1214 Variable *Hi = Arg64On32->getHi();
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001215 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,
1216 InArgsSizeBytes);
1217 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,
1218 InArgsSizeBytes);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001219 return;
1220 }
John Porto7e93c622015-06-23 10:58:57 -07001221 }
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001222 Type Ty = Arg->getType();
John Porto7e93c622015-06-23 10:58:57 -07001223 if (isVectorType(Ty)) {
1224 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
1225 }
1226 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
1227 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1228 if (Arg->hasReg()) {
John Porto1d235422015-08-12 12:37:53 -07001229 assert(Ty != IceType_i64 || Traits::Is64Bit);
John Porto4a566862016-01-04 09:33:41 -08001230 auto *Mem = X86OperandMem::create(
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001231 Func, Ty, FramePtr,
1232 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
John Porto7e93c622015-06-23 10:58:57 -07001233 if (isVectorType(Arg->getType())) {
1234 _movp(Arg, Mem);
1235 } else {
1236 _mov(Arg, Mem);
1237 }
John Porto4a566862016-01-04 09:33:41 -08001238 // This argument-copying instruction uses an explicit X86OperandMem
Andrew Scull57e12682015-09-16 11:30:19 -07001239 // operand instead of a Variable, so its fill-from-stack operation has to
1240 // be tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -07001241 Ctx->statsUpdateFills();
1242 }
1243}
1244
David Sehrb9a404d2016-01-21 08:09:27 -08001245template <typename TraitsType>
1246void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
1247 InstList &Insts = Node->getInsts();
1248 InstList::reverse_iterator RI, E;
1249 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1250 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
1251 break;
1252 }
1253 if (RI == E)
1254 return;
1255
1256 // Convert the reverse_iterator position into its corresponding (forward)
1257 // iterator position.
1258 InstList::iterator InsertPoint = RI.base();
1259 --InsertPoint;
1260 Context.init(Node);
1261 Context.setInsertPoint(InsertPoint);
1262
1263 if (IsEbpBasedFrame) {
1264 _unlink_bp();
1265 } else {
1266 // add stackptr, SpillAreaSizeBytes
1267 if (SpillAreaSizeBytes != 0) {
1268 _add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1269 }
1270 }
1271
1272 // Add pop instructions for preserved registers.
John Portoe82b5602016-02-24 15:58:55 -08001273 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1274 SmallBitVector Popped(CalleeSaves.size());
David Sehrb9a404d2016-01-21 08:09:27 -08001275 for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001276 const auto RegNum = RegNumT::fromInt(i);
1277 if (RegNum == getFrameReg() && IsEbpBasedFrame)
David Sehrb9a404d2016-01-21 08:09:27 -08001278 continue;
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001279 const RegNumT Canonical = Traits::getBaseReg(RegNum);
David Sehrb9a404d2016-01-21 08:09:27 -08001280 if (CalleeSaves[i] && RegsUsed[i]) {
1281 Popped[Canonical] = true;
1282 }
1283 }
1284 for (int32_t i = Popped.size() - 1; i >= 0; --i) {
1285 if (!Popped[i])
1286 continue;
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001287 const auto RegNum = RegNumT::fromInt(i);
1288 assert(RegNum == Traits::getBaseReg(RegNum));
1289 _pop(getPhysicalRegister(RegNum, Traits::WordType));
David Sehrb9a404d2016-01-21 08:09:27 -08001290 }
1291
1292 if (!NeedSandboxing) {
1293 return;
1294 }
1295 emitSandboxedReturn();
1296 if (RI->getSrcSize()) {
1297 auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1298 Context.insert<InstFakeUse>(RetValue);
1299 }
1300 RI->setDeleted();
1301}
1302
John Porto4a566862016-01-04 09:33:41 -08001303template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
John Porto1d235422015-08-12 12:37:53 -07001304 return Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -07001305}
1306
John Porto4a566862016-01-04 09:33:41 -08001307template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07001308template <typename T>
John Porto1d235422015-08-12 12:37:53 -07001309typename std::enable_if<!T::Is64Bit, Operand>::type *
John Porto4a566862016-01-04 09:33:41 -08001310TargetX86Base<TraitsType>::loOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -07001311 assert(Operand->getType() == IceType_i64 ||
1312 Operand->getType() == IceType_f64);
1313 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1314 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001315 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1316 return Var64On32->getLo();
Jan Voungfbdd2442015-07-15 12:36:20 -07001317 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1318 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001319 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -07001320 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001321 return legalize(ConstInt);
1322 }
John Porto4a566862016-01-04 09:33:41 -08001323 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
1324 auto *MemOperand = X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001325 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
John Porto56958cb2016-01-14 09:18:18 -08001326 Mem->getShift(), Mem->getSegmentRegister(), Mem->getIsRebased());
John Porto7e93c622015-06-23 10:58:57 -07001327 // Test if we should randomize or pool the offset, if so randomize it or
1328 // pool it then create mem operand with the blinded/pooled constant.
1329 // Otherwise, return the mem operand as ordinary mem operand.
1330 return legalize(MemOperand);
1331 }
1332 llvm_unreachable("Unsupported operand type");
1333 return nullptr;
1334}
1335
John Porto4a566862016-01-04 09:33:41 -08001336template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07001337template <typename T>
1338typename std::enable_if<!T::Is64Bit, Operand>::type *
John Porto4a566862016-01-04 09:33:41 -08001339TargetX86Base<TraitsType>::hiOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -07001340 assert(Operand->getType() == IceType_i64 ||
1341 Operand->getType() == IceType_f64);
1342 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1343 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001344 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1345 return Var64On32->getHi();
Jan Voungfbdd2442015-07-15 12:36:20 -07001346 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1347 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001348 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -07001349 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001350 return legalize(ConstInt);
1351 }
John Porto4a566862016-01-04 09:33:41 -08001352 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -07001353 Constant *Offset = Mem->getOffset();
1354 if (Offset == nullptr) {
1355 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -07001356 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001357 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -07001358 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001359 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1360 Offset =
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001361 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName());
John Porto7e93c622015-06-23 10:58:57 -07001362 }
John Porto4a566862016-01-04 09:33:41 -08001363 auto *MemOperand = X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001364 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
John Porto56958cb2016-01-14 09:18:18 -08001365 Mem->getShift(), Mem->getSegmentRegister(), Mem->getIsRebased());
John Porto7e93c622015-06-23 10:58:57 -07001366 // Test if the Offset is an eligible i32 constants for randomization and
1367 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1368 // operand.
1369 return legalize(MemOperand);
1370 }
1371 llvm_unreachable("Unsupported operand type");
1372 return nullptr;
1373}
1374
John Porto4a566862016-01-04 09:33:41 -08001375template <typename TraitsType>
John Portoe82b5602016-02-24 15:58:55 -08001376SmallBitVector
John Porto4a566862016-01-04 09:33:41 -08001377TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include,
1378 RegSetMask Exclude) const {
Karl Schimpfd4699942016-04-02 09:55:31 -07001379 return Traits::getRegisterSet(getFlags(), Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -07001380}
1381
John Porto4a566862016-01-04 09:33:41 -08001382template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001383void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) {
Andrew Scull57e12682015-09-16 11:30:19 -07001384 // Conservatively require the stack to be aligned. Some stack adjustment
1385 // operations implemented below assume that the stack is aligned before the
1386 // alloca. All the alloca code ensures that the stack alignment is preserved
1387 // after the alloca. The stack alignment restriction can be relaxed in some
1388 // cases.
John Porto7e93c622015-06-23 10:58:57 -07001389 NeedsStackAlignment = true;
1390
John Porto7e93c622015-06-23 10:58:57 -07001391 // For default align=0, set it to the real value 1, to avoid any
1392 // bit-manipulation problems below.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001393 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
John Porto7e93c622015-06-23 10:58:57 -07001394
1395 // LLVM enforces power of 2 alignment.
1396 assert(llvm::isPowerOf2_32(AlignmentParam));
1397 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1398
David Sehr26217e32015-11-26 13:03:50 -08001399 const uint32_t Alignment =
John Porto7e93c622015-06-23 10:58:57 -07001400 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
David Sehr26217e32015-11-26 13:03:50 -08001401 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07001402 const bool OptM1 = Func->getOptLevel() == Opt_m1;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001403 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
David Sehr26217e32015-11-26 13:03:50 -08001404 const bool UseFramePointer =
1405 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1406
1407 if (UseFramePointer)
David Sehr4318a412015-11-11 15:01:55 -08001408 setHasFramePointer();
David Sehr26217e32015-11-26 13:03:50 -08001409
John Porto008f4ce2015-12-24 13:22:18 -08001410 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
David Sehr26217e32015-11-26 13:03:50 -08001411 if (OverAligned) {
John Porto7e93c622015-06-23 10:58:57 -07001412 _and(esp, Ctx->getConstantInt32(-Alignment));
1413 }
David Sehr26217e32015-11-26 13:03:50 -08001414
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001415 Variable *Dest = Instr->getDest();
1416 Operand *TotalSize = legalize(Instr->getSizeInBytes());
David Sehr26217e32015-11-26 13:03:50 -08001417
John Porto7e93c622015-06-23 10:58:57 -07001418 if (const auto *ConstantTotalSize =
1419 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
David Sehr26217e32015-11-26 13:03:50 -08001420 const uint32_t Value =
1421 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
John Porto56958cb2016-01-14 09:18:18 -08001422 if (UseFramePointer) {
1423 _sub_sp(Ctx->getConstantInt32(Value));
1424 } else {
David Sehr26217e32015-11-26 13:03:50 -08001425 // If we don't need a Frame Pointer, this alloca has a known offset to the
1426 // stack pointer. We don't need adjust the stack pointer, nor assign any
1427 // value to Dest, as Dest is rematerializable.
1428 assert(Dest->isRematerializable());
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001429 FixedAllocaSizeBytes += Value;
John Porto1d937a82015-12-17 06:19:34 -08001430 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001431 }
John Porto7e93c622015-06-23 10:58:57 -07001432 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07001433 // Non-constant sizes need to be adjusted to the next highest multiple of
1434 // the required alignment at runtime.
John Porto56958cb2016-01-14 09:18:18 -08001435 Variable *T = nullptr;
1436 if (Traits::Is64Bit && TotalSize->getType() != IceType_i64 &&
1437 !NeedSandboxing) {
1438 T = makeReg(IceType_i64);
John Porto008f4ce2015-12-24 13:22:18 -08001439 _movzx(T, TotalSize);
1440 } else {
John Porto56958cb2016-01-14 09:18:18 -08001441 T = makeReg(IceType_i32);
John Porto008f4ce2015-12-24 13:22:18 -08001442 _mov(T, TotalSize);
1443 }
John Porto7e93c622015-06-23 10:58:57 -07001444 _add(T, Ctx->getConstantInt32(Alignment - 1));
1445 _and(T, Ctx->getConstantInt32(-Alignment));
John Porto56958cb2016-01-14 09:18:18 -08001446 _sub_sp(T);
John Porto7e93c622015-06-23 10:58:57 -07001447 }
David Sehr26217e32015-11-26 13:03:50 -08001448 // Add enough to the returned address to account for the out args area.
1449 uint32_t OutArgsSize = maxOutArgsSizeBytes();
1450 if (OutArgsSize > 0) {
1451 Variable *T = makeReg(IceType_i32);
John Porto4a566862016-01-04 09:33:41 -08001452 auto *CalculateOperand = X86OperandMem::create(
John Porto56958cb2016-01-14 09:18:18 -08001453 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
David Sehr26217e32015-11-26 13:03:50 -08001454 _lea(T, CalculateOperand);
1455 _mov(Dest, T);
1456 } else {
1457 _mov(Dest, esp);
1458 }
John Porto7e93c622015-06-23 10:58:57 -07001459}
1460
David Sehr0c68bef2016-01-20 10:00:23 -08001461template <typename TraitsType>
1462void TargetX86Base<TraitsType>::lowerArguments() {
1463 VarList &Args = Func->getArgs();
1464 unsigned NumXmmArgs = 0;
1465 bool XmmSlotsRemain = true;
1466 unsigned NumGprArgs = 0;
1467 bool GprSlotsRemain = true;
1468
1469 Context.init(Func->getEntryNode());
1470 Context.setInsertPoint(Context.getCur());
1471
1472 for (SizeT i = 0, End = Args.size();
1473 i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) {
1474 Variable *Arg = Args[i];
1475 Type Ty = Arg->getType();
1476 Variable *RegisterArg = nullptr;
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001477 RegNumT RegNum;
David Sehr0c68bef2016-01-20 10:00:23 -08001478 if (isVectorType(Ty)) {
1479 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001480 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001481 XmmSlotsRemain = false;
1482 continue;
1483 }
1484 ++NumXmmArgs;
1485 RegisterArg = Func->makeVariable(Ty);
1486 } else if (isScalarFloatingType(Ty)) {
1487 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1488 continue;
1489 }
1490 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001491 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001492 XmmSlotsRemain = false;
1493 continue;
1494 }
1495 ++NumXmmArgs;
1496 RegisterArg = Func->makeVariable(Ty);
1497 } else if (isScalarIntegerType(Ty)) {
1498 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001499 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001500 GprSlotsRemain = false;
1501 continue;
1502 }
1503 ++NumGprArgs;
1504 RegisterArg = Func->makeVariable(Ty);
1505 }
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001506 assert(RegNum.hasValue());
David Sehr0c68bef2016-01-20 10:00:23 -08001507 assert(RegisterArg != nullptr);
1508 // Replace Arg in the argument list with the home register. Then generate
1509 // an instruction in the prolog to copy the home register to the assigned
1510 // location of Arg.
1511 if (BuildDefs::dump())
Jim Stichnotha91c3412016-04-05 15:31:43 -07001512 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
David Sehr0c68bef2016-01-20 10:00:23 -08001513 RegisterArg->setRegNum(RegNum);
1514 RegisterArg->setIsArg();
1515 Arg->setIsArg(false);
1516
1517 Args[i] = RegisterArg;
1518 Context.insert<InstAssign>(Arg, RegisterArg);
1519 }
1520}
1521
Andrew Scull57e12682015-09-16 11:30:19 -07001522/// Strength-reduce scalar integer multiplication by a constant (for i32 or
1523/// narrower) for certain constants. The lea instruction can be used to multiply
1524/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1525/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1526/// lea-based multiplies by 5, combined with left-shifting by 2.
John Porto4a566862016-01-04 09:33:41 -08001527template <typename TraitsType>
1528bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1529 int32_t Src1) {
Andrew Scull57e12682015-09-16 11:30:19 -07001530 // Disable this optimization for Om1 and O0, just to keep things simple
1531 // there.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07001532 if (Func->getOptLevel() < Opt_1)
John Porto7e93c622015-06-23 10:58:57 -07001533 return false;
1534 Type Ty = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07001535 if (Src1 == -1) {
John Porto56958cb2016-01-14 09:18:18 -08001536 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001537 _mov(T, Src0);
1538 _neg(T);
1539 _mov(Dest, T);
1540 return true;
1541 }
1542 if (Src1 == 0) {
1543 _mov(Dest, Ctx->getConstantZero(Ty));
1544 return true;
1545 }
1546 if (Src1 == 1) {
John Porto56958cb2016-01-14 09:18:18 -08001547 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001548 _mov(T, Src0);
1549 _mov(Dest, T);
1550 return true;
1551 }
1552 // Don't bother with the edge case where Src1 == MININT.
1553 if (Src1 == -Src1)
1554 return false;
1555 const bool Src1IsNegative = Src1 < 0;
1556 if (Src1IsNegative)
1557 Src1 = -Src1;
1558 uint32_t Count9 = 0;
1559 uint32_t Count5 = 0;
1560 uint32_t Count3 = 0;
1561 uint32_t Count2 = 0;
1562 uint32_t CountOps = 0;
1563 while (Src1 > 1) {
1564 if (Src1 % 9 == 0) {
1565 ++CountOps;
1566 ++Count9;
1567 Src1 /= 9;
1568 } else if (Src1 % 5 == 0) {
1569 ++CountOps;
1570 ++Count5;
1571 Src1 /= 5;
1572 } else if (Src1 % 3 == 0) {
1573 ++CountOps;
1574 ++Count3;
1575 Src1 /= 3;
1576 } else if (Src1 % 2 == 0) {
1577 if (Count2 == 0)
1578 ++CountOps;
1579 ++Count2;
1580 Src1 /= 2;
1581 } else {
1582 return false;
1583 }
1584 }
John Porto56958cb2016-01-14 09:18:18 -08001585 // Lea optimization only works for i16 and i32 types, not i8.
1586 if (Ty != IceType_i32 && !(Traits::Is64Bit && Ty == IceType_i64) &&
1587 (Count3 || Count5 || Count9))
John Porto7e93c622015-06-23 10:58:57 -07001588 return false;
Andrew Scull57e12682015-09-16 11:30:19 -07001589 // Limit the number of lea/shl operations for a single multiply, to a
1590 // somewhat arbitrary choice of 3.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001591 constexpr uint32_t MaxOpsForOptimizedMul = 3;
John Porto7e93c622015-06-23 10:58:57 -07001592 if (CountOps > MaxOpsForOptimizedMul)
1593 return false;
John Porto56958cb2016-01-14 09:18:18 -08001594 Variable *T = makeReg(Traits::WordType);
1595 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) {
1596 _movzx(T, Src0);
1597 } else {
1598 _mov(T, Src0);
1599 }
John Porto7e93c622015-06-23 10:58:57 -07001600 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1601 for (uint32_t i = 0; i < Count9; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001602 constexpr uint16_t Shift = 3; // log2(9-1)
John Porto4a566862016-01-04 09:33:41 -08001603 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001604 }
1605 for (uint32_t i = 0; i < Count5; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001606 constexpr uint16_t Shift = 2; // log2(5-1)
John Porto4a566862016-01-04 09:33:41 -08001607 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001608 }
1609 for (uint32_t i = 0; i < Count3; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001610 constexpr uint16_t Shift = 1; // log2(3-1)
John Porto4a566862016-01-04 09:33:41 -08001611 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001612 }
1613 if (Count2) {
1614 _shl(T, Ctx->getConstantInt(Ty, Count2));
1615 }
1616 if (Src1IsNegative)
1617 _neg(T);
1618 _mov(Dest, T);
1619 return true;
1620}
1621
John Porto4a566862016-01-04 09:33:41 -08001622template <typename TraitsType>
1623void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,
1624 Operand *Src0Lo, Operand *Src0Hi,
1625 Operand *Src1Lo, Variable *DestLo,
1626 Variable *DestHi) {
David Sehr188eae52015-09-24 11:42:55 -07001627 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1628 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1629 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1630 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1631 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1632 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1633 if (ShiftAmount > 32) {
1634 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);
1635 switch (Op) {
1636 default:
1637 assert(0 && "non-shift op");
1638 break;
1639 case InstArithmetic::Shl: {
1640 // a=b<<c ==>
1641 // t2 = b.lo
1642 // t2 = shl t2, ShiftAmount-32
1643 // t3 = t2
1644 // t2 = 0
1645 _mov(T_2, Src0Lo);
1646 _shl(T_2, ReducedShift);
1647 _mov(DestHi, T_2);
1648 _mov(DestLo, Zero);
1649 } break;
1650 case InstArithmetic::Lshr: {
1651 // a=b>>c (unsigned) ==>
1652 // t2 = b.hi
1653 // t2 = shr t2, ShiftAmount-32
1654 // a.lo = t2
1655 // a.hi = 0
1656 _mov(T_2, Src0Hi);
1657 _shr(T_2, ReducedShift);
1658 _mov(DestLo, T_2);
1659 _mov(DestHi, Zero);
1660 } break;
1661 case InstArithmetic::Ashr: {
1662 // a=b>>c (signed) ==>
1663 // t3 = b.hi
1664 // t3 = sar t3, 0x1f
1665 // t2 = b.hi
1666 // t2 = shrd t2, t3, ShiftAmount-32
1667 // a.lo = t2
1668 // a.hi = t3
1669 _mov(T_3, Src0Hi);
1670 _sar(T_3, SignExtend);
1671 _mov(T_2, Src0Hi);
1672 _shrd(T_2, T_3, ReducedShift);
1673 _mov(DestLo, T_2);
1674 _mov(DestHi, T_3);
1675 } break;
1676 }
1677 } else if (ShiftAmount == 32) {
1678 switch (Op) {
1679 default:
1680 assert(0 && "non-shift op");
1681 break;
1682 case InstArithmetic::Shl: {
1683 // a=b<<c ==>
1684 // t2 = b.lo
1685 // a.hi = t2
1686 // a.lo = 0
1687 _mov(T_2, Src0Lo);
1688 _mov(DestHi, T_2);
1689 _mov(DestLo, Zero);
1690 } break;
1691 case InstArithmetic::Lshr: {
1692 // a=b>>c (unsigned) ==>
1693 // t2 = b.hi
1694 // a.lo = t2
1695 // a.hi = 0
1696 _mov(T_2, Src0Hi);
1697 _mov(DestLo, T_2);
1698 _mov(DestHi, Zero);
1699 } break;
1700 case InstArithmetic::Ashr: {
1701 // a=b>>c (signed) ==>
1702 // t2 = b.hi
1703 // a.lo = t2
1704 // t3 = b.hi
1705 // t3 = sar t3, 0x1f
1706 // a.hi = t3
1707 _mov(T_2, Src0Hi);
1708 _mov(DestLo, T_2);
1709 _mov(T_3, Src0Hi);
1710 _sar(T_3, SignExtend);
1711 _mov(DestHi, T_3);
1712 } break;
1713 }
1714 } else {
1715 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1716 // t2 = b.lo
1717 // t3 = b.hi
1718 _mov(T_2, Src0Lo);
1719 _mov(T_3, Src0Hi);
1720 switch (Op) {
1721 default:
1722 assert(0 && "non-shift op");
1723 break;
1724 case InstArithmetic::Shl: {
1725 // a=b<<c ==>
1726 // t3 = shld t3, t2, ShiftAmount
1727 // t2 = shl t2, ShiftAmount
1728 _shld(T_3, T_2, ConstantShiftAmount);
1729 _shl(T_2, ConstantShiftAmount);
1730 } break;
1731 case InstArithmetic::Lshr: {
1732 // a=b>>c (unsigned) ==>
1733 // t2 = shrd t2, t3, ShiftAmount
1734 // t3 = shr t3, ShiftAmount
1735 _shrd(T_2, T_3, ConstantShiftAmount);
1736 _shr(T_3, ConstantShiftAmount);
1737 } break;
1738 case InstArithmetic::Ashr: {
1739 // a=b>>c (signed) ==>
1740 // t2 = shrd t2, t3, ShiftAmount
1741 // t3 = sar t3, ShiftAmount
1742 _shrd(T_2, T_3, ConstantShiftAmount);
1743 _sar(T_3, ConstantShiftAmount);
1744 } break;
1745 }
1746 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1747 // a.lo = t2
1748 // a.hi = t3
1749 _mov(DestLo, T_2);
1750 _mov(DestHi, T_3);
1751 }
1752 } else {
1753 // NON-CONSTANT CASES.
1754 Constant *BitTest = Ctx->getConstantInt32(0x20);
John Porto4a566862016-01-04 09:33:41 -08001755 InstX86Label *Label = InstX86Label::create(Func, this);
David Sehr188eae52015-09-24 11:42:55 -07001756 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1757 // t1:ecx = c.lo & 0xff
1758 // t2 = b.lo
1759 // t3 = b.hi
Jim Stichnothc59288b2015-11-09 11:38:40 -08001760 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
David Sehr188eae52015-09-24 11:42:55 -07001761 _mov(T_2, Src0Lo);
1762 _mov(T_3, Src0Hi);
1763 switch (Op) {
1764 default:
1765 assert(0 && "non-shift op");
1766 break;
1767 case InstArithmetic::Shl: {
1768 // a=b<<c ==>
1769 // t3 = shld t3, t2, t1
1770 // t2 = shl t2, t1
1771 // test t1, 0x20
1772 // je L1
1773 // use(t3)
1774 // t3 = t2
1775 // t2 = 0
1776 _shld(T_3, T_2, T_1);
1777 _shl(T_2, T_1);
1778 _test(T_1, BitTest);
1779 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001780 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001781 // flow, so we need to use _redefined to avoid liveness problems.
1782 _redefined(_mov(T_3, T_2));
1783 _redefined(_mov(T_2, Zero));
David Sehr188eae52015-09-24 11:42:55 -07001784 } break;
1785 case InstArithmetic::Lshr: {
1786 // a=b>>c (unsigned) ==>
1787 // t2 = shrd t2, t3, t1
1788 // t3 = shr t3, t1
1789 // test t1, 0x20
1790 // je L1
1791 // use(t2)
1792 // t2 = t3
1793 // t3 = 0
1794 _shrd(T_2, T_3, T_1);
1795 _shr(T_3, T_1);
1796 _test(T_1, BitTest);
1797 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001798 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001799 // flow, so we need to use _redefined to avoid liveness problems.
1800 _redefined(_mov(T_2, T_3));
1801 _redefined(_mov(T_3, Zero));
David Sehr188eae52015-09-24 11:42:55 -07001802 } break;
1803 case InstArithmetic::Ashr: {
1804 // a=b>>c (signed) ==>
1805 // t2 = shrd t2, t3, t1
1806 // t3 = sar t3, t1
1807 // test t1, 0x20
1808 // je L1
1809 // use(t2)
1810 // t2 = t3
1811 // t3 = sar t3, 0x1f
1812 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1813 _shrd(T_2, T_3, T_1);
1814 _sar(T_3, T_1);
1815 _test(T_1, BitTest);
1816 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001817 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001818 // flow, so T_2 needs to use _redefined to avoid liveness problems. T_3
1819 // doesn't need special treatment because it is reassigned via _sar
1820 // instead of _mov.
1821 _redefined(_mov(T_2, T_3));
David Sehr188eae52015-09-24 11:42:55 -07001822 _sar(T_3, SignExtend);
1823 } break;
1824 }
1825 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1826 // L1:
1827 // a.lo = t2
1828 // a.hi = t3
1829 Context.insert(Label);
1830 _mov(DestLo, T_2);
1831 _mov(DestHi, T_3);
1832 }
1833}
1834
John Porto4a566862016-01-04 09:33:41 -08001835template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001836void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
1837 Variable *Dest = Instr->getDest();
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001838 if (Dest->isRematerializable()) {
John Porto1d937a82015-12-17 06:19:34 -08001839 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001840 return;
1841 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001842 Type Ty = Dest->getType();
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001843 Operand *Src0 = legalize(Instr->getSrc(0));
1844 Operand *Src1 = legalize(Instr->getSrc(1));
1845 if (Instr->isCommutative()) {
David Sehr487bad02015-10-06 17:41:26 -07001846 uint32_t SwapCount = 0;
1847 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001848 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001849 ++SwapCount;
1850 }
1851 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001852 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001853 ++SwapCount;
1854 }
1855 // Improve two-address code patterns by avoiding a copy to the dest
1856 // register when one of the source operands ends its lifetime here.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001857 if (!Instr->isLastUse(Src0) && Instr->isLastUse(Src1)) {
David Sehr487bad02015-10-06 17:41:26 -07001858 std::swap(Src0, Src1);
1859 ++SwapCount;
1860 }
1861 assert(SwapCount <= 1);
Karl Schimpfa313a122015-10-08 10:40:57 -07001862 (void)SwapCount;
John Porto7e93c622015-06-23 10:58:57 -07001863 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001864 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto1d235422015-08-12 12:37:53 -07001865 // These x86-32 helper-call-involved instructions are lowered in this
Andrew Scull57e12682015-09-16 11:30:19 -07001866 // separate switch. This is because loOperand() and hiOperand() may insert
1867 // redundant instructions for constant blinding and pooling. Such redundant
1868 // instructions will fail liveness analysis under -Om1 setting. And,
1869 // actually these arguments do not need to be processed with loOperand()
1870 // and hiOperand() to be used.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001871 switch (Instr->getOp()) {
David Sehr26217e32015-11-26 13:03:50 -08001872 case InstArithmetic::Udiv:
1873 case InstArithmetic::Sdiv:
1874 case InstArithmetic::Urem:
1875 case InstArithmetic::Srem:
1876 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07001877 return;
John Porto7e93c622015-06-23 10:58:57 -07001878 default:
1879 break;
1880 }
1881
Jim Stichnoth54f3d512015-12-11 09:53:00 -08001882 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1883 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07001884 Operand *Src0Lo = loOperand(Src0);
1885 Operand *Src0Hi = hiOperand(Src0);
1886 Operand *Src1Lo = loOperand(Src1);
1887 Operand *Src1Hi = hiOperand(Src1);
1888 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001889 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07001890 case InstArithmetic::_num:
1891 llvm_unreachable("Unknown arithmetic operator");
1892 break;
1893 case InstArithmetic::Add:
1894 _mov(T_Lo, Src0Lo);
1895 _add(T_Lo, Src1Lo);
1896 _mov(DestLo, T_Lo);
1897 _mov(T_Hi, Src0Hi);
1898 _adc(T_Hi, Src1Hi);
1899 _mov(DestHi, T_Hi);
1900 break;
1901 case InstArithmetic::And:
1902 _mov(T_Lo, Src0Lo);
1903 _and(T_Lo, Src1Lo);
1904 _mov(DestLo, T_Lo);
1905 _mov(T_Hi, Src0Hi);
1906 _and(T_Hi, Src1Hi);
1907 _mov(DestHi, T_Hi);
1908 break;
1909 case InstArithmetic::Or:
1910 _mov(T_Lo, Src0Lo);
1911 _or(T_Lo, Src1Lo);
1912 _mov(DestLo, T_Lo);
1913 _mov(T_Hi, Src0Hi);
1914 _or(T_Hi, Src1Hi);
1915 _mov(DestHi, T_Hi);
1916 break;
1917 case InstArithmetic::Xor:
1918 _mov(T_Lo, Src0Lo);
1919 _xor(T_Lo, Src1Lo);
1920 _mov(DestLo, T_Lo);
1921 _mov(T_Hi, Src0Hi);
1922 _xor(T_Hi, Src1Hi);
1923 _mov(DestHi, T_Hi);
1924 break;
1925 case InstArithmetic::Sub:
1926 _mov(T_Lo, Src0Lo);
1927 _sub(T_Lo, Src1Lo);
1928 _mov(DestLo, T_Lo);
1929 _mov(T_Hi, Src0Hi);
1930 _sbb(T_Hi, Src1Hi);
1931 _mov(DestHi, T_Hi);
1932 break;
1933 case InstArithmetic::Mul: {
1934 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07001935 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1936 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001937 // gcc does the following:
1938 // a=b*c ==>
1939 // t1 = b.hi; t1 *=(imul) c.lo
1940 // t2 = c.hi; t2 *=(imul) b.lo
1941 // t3:eax = b.lo
1942 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1943 // a.lo = t4.lo
1944 // t4.hi += t1
1945 // t4.hi += t2
1946 // a.hi = t4.hi
1947 // The mul instruction cannot take an immediate operand.
1948 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1949 _mov(T_1, Src0Hi);
1950 _imul(T_1, Src1Lo);
John Porto5d0acff2015-06-30 15:29:21 -07001951 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07001952 _mul(T_4Lo, T_3, Src1Lo);
Andrew Scull57e12682015-09-16 11:30:19 -07001953 // The mul instruction produces two dest variables, edx:eax. We create a
1954 // fake definition of edx to account for this.
John Porto1d937a82015-12-17 06:19:34 -08001955 Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
Jim Stichnoth28df6ba2016-02-05 15:43:24 -08001956 Context.insert<InstFakeUse>(T_4Hi);
John Porto7e93c622015-06-23 10:58:57 -07001957 _mov(DestLo, T_4Lo);
1958 _add(T_4Hi, T_1);
Jim Stichnothb40595a2016-01-29 06:14:31 -08001959 _mov(T_2, Src1Hi);
1960 _imul(T_2, Src0Lo);
John Porto7e93c622015-06-23 10:58:57 -07001961 _add(T_4Hi, T_2);
1962 _mov(DestHi, T_4Hi);
1963 } break;
David Sehr188eae52015-09-24 11:42:55 -07001964 case InstArithmetic::Shl:
1965 case InstArithmetic::Lshr:
1966 case InstArithmetic::Ashr:
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001967 lowerShift64(Instr->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi);
David Sehr188eae52015-09-24 11:42:55 -07001968 break;
John Porto7e93c622015-06-23 10:58:57 -07001969 case InstArithmetic::Fadd:
1970 case InstArithmetic::Fsub:
1971 case InstArithmetic::Fmul:
1972 case InstArithmetic::Fdiv:
1973 case InstArithmetic::Frem:
1974 llvm_unreachable("FP instruction with i64 type");
1975 break;
1976 case InstArithmetic::Udiv:
1977 case InstArithmetic::Sdiv:
1978 case InstArithmetic::Urem:
1979 case InstArithmetic::Srem:
1980 llvm_unreachable("Call-helper-involved instruction for i64 type \
1981 should have already been handled before");
1982 break;
1983 }
1984 return;
1985 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001986 if (isVectorType(Ty)) {
Andrew Scull57e12682015-09-16 11:30:19 -07001987 // TODO: Trap on integer divide and integer modulo by zero. See:
1988 // https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto4a566862016-01-04 09:33:41 -08001989 if (llvm::isa<X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07001990 Src1 = legalizeToReg(Src1);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001991 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07001992 case InstArithmetic::_num:
1993 llvm_unreachable("Unknown arithmetic operator");
1994 break;
1995 case InstArithmetic::Add: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08001996 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07001997 _movp(T, Src0);
1998 _padd(T, Src1);
1999 _movp(Dest, T);
2000 } break;
2001 case InstArithmetic::And: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002002 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002003 _movp(T, Src0);
2004 _pand(T, Src1);
2005 _movp(Dest, T);
2006 } break;
2007 case InstArithmetic::Or: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002008 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002009 _movp(T, Src0);
2010 _por(T, Src1);
2011 _movp(Dest, T);
2012 } break;
2013 case InstArithmetic::Xor: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002014 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002015 _movp(T, Src0);
2016 _pxor(T, Src1);
2017 _movp(Dest, T);
2018 } break;
2019 case InstArithmetic::Sub: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002020 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002021 _movp(T, Src0);
2022 _psub(T, Src1);
2023 _movp(Dest, T);
2024 } break;
2025 case InstArithmetic::Mul: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002026 bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
John Porto7e93c622015-06-23 10:58:57 -07002027 bool InstructionSetIsValidForPmull =
Jim Stichnothc59288b2015-11-09 11:38:40 -08002028 Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07002029 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002030 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002031 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002032 _pmull(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002033 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002034 } else if (Ty == IceType_v4i32) {
John Porto7e93c622015-06-23 10:58:57 -07002035 // Lowering sequence:
2036 // Note: The mask arguments have index 0 on the left.
2037 //
2038 // movups T1, Src0
2039 // pshufd T2, Src0, {1,0,3,0}
2040 // pshufd T3, Src1, {1,0,3,0}
2041 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
2042 // pmuludq T1, Src1
2043 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
2044 // pmuludq T2, T3
2045 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
2046 // shufps T1, T2, {0,2,0,2}
2047 // pshufd T4, T1, {0,2,1,3}
2048 // movups Dest, T4
2049
2050 // Mask that directs pshufd to create a vector with entries
2051 // Src[1, 0, 3, 0]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002052 constexpr unsigned Constant1030 = 0x31;
John Porto7e93c622015-06-23 10:58:57 -07002053 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
2054 // Mask that directs shufps to create a vector with entries
2055 // Dest[0, 2], Src[0, 2]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002056 constexpr unsigned Mask0202 = 0x88;
John Porto7e93c622015-06-23 10:58:57 -07002057 // Mask that directs pshufd to create a vector with entries
2058 // Src[0, 2, 1, 3]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002059 constexpr unsigned Mask0213 = 0xd8;
John Porto7e93c622015-06-23 10:58:57 -07002060 Variable *T1 = makeReg(IceType_v4i32);
2061 Variable *T2 = makeReg(IceType_v4i32);
2062 Variable *T3 = makeReg(IceType_v4i32);
2063 Variable *T4 = makeReg(IceType_v4i32);
2064 _movp(T1, Src0);
2065 _pshufd(T2, Src0, Mask1030);
2066 _pshufd(T3, Src1, Mask1030);
2067 _pmuludq(T1, Src1);
2068 _pmuludq(T2, T3);
2069 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
2070 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
2071 _movp(Dest, T4);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002072 } else if (Ty == IceType_v16i8) {
David Sehr26217e32015-11-26 13:03:50 -08002073 llvm::report_fatal_error("Scalarized operation was expected");
Jim Stichnothebbb5912015-10-05 15:12:09 -07002074 } else {
2075 llvm::report_fatal_error("Invalid vector multiply type");
John Porto7e93c622015-06-23 10:58:57 -07002076 }
2077 } break;
2078 case InstArithmetic::Shl:
2079 case InstArithmetic::Lshr:
2080 case InstArithmetic::Ashr:
2081 case InstArithmetic::Udiv:
2082 case InstArithmetic::Urem:
2083 case InstArithmetic::Sdiv:
2084 case InstArithmetic::Srem:
David Sehr26217e32015-11-26 13:03:50 -08002085 llvm::report_fatal_error("Scalarized operation was expected");
John Porto7e93c622015-06-23 10:58:57 -07002086 break;
2087 case InstArithmetic::Fadd: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002088 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002089 _movp(T, Src0);
2090 _addps(T, Src1);
2091 _movp(Dest, T);
2092 } break;
2093 case InstArithmetic::Fsub: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002094 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002095 _movp(T, Src0);
2096 _subps(T, Src1);
2097 _movp(Dest, T);
2098 } break;
2099 case InstArithmetic::Fmul: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002100 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002101 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002102 _mulps(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002103 _movp(Dest, T);
2104 } break;
2105 case InstArithmetic::Fdiv: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002106 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002107 _movp(T, Src0);
2108 _divps(T, Src1);
2109 _movp(Dest, T);
2110 } break;
2111 case InstArithmetic::Frem:
David Sehr26217e32015-11-26 13:03:50 -08002112 llvm::report_fatal_error("Scalarized operation was expected");
John Porto7e93c622015-06-23 10:58:57 -07002113 break;
2114 }
2115 return;
2116 }
2117 Variable *T_edx = nullptr;
2118 Variable *T = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002119 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07002120 case InstArithmetic::_num:
2121 llvm_unreachable("Unknown arithmetic operator");
2122 break;
2123 case InstArithmetic::Add:
2124 _mov(T, Src0);
2125 _add(T, Src1);
2126 _mov(Dest, T);
2127 break;
2128 case InstArithmetic::And:
2129 _mov(T, Src0);
2130 _and(T, Src1);
2131 _mov(Dest, T);
2132 break;
2133 case InstArithmetic::Or:
2134 _mov(T, Src0);
2135 _or(T, Src1);
2136 _mov(Dest, T);
2137 break;
2138 case InstArithmetic::Xor:
2139 _mov(T, Src0);
2140 _xor(T, Src1);
2141 _mov(Dest, T);
2142 break;
2143 case InstArithmetic::Sub:
2144 _mov(T, Src0);
2145 _sub(T, Src1);
2146 _mov(Dest, T);
2147 break;
2148 case InstArithmetic::Mul:
2149 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2150 if (optimizeScalarMul(Dest, Src0, C->getValue()))
2151 return;
2152 }
Andrew Scull57e12682015-09-16 11:30:19 -07002153 // The 8-bit version of imul only allows the form "imul r/m8" where T must
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002154 // be in al.
Jim Stichnothc59288b2015-11-09 11:38:40 -08002155 if (isByteSizedArithType(Ty)) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002156 _mov(T, Src0, Traits::RegisterSet::Reg_al);
John Porto7e93c622015-06-23 10:58:57 -07002157 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
David Sehre11f8782015-10-06 10:26:57 -07002158 _imul(T, Src0 == Src1 ? T : Src1);
2159 _mov(Dest, T);
2160 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002161 T = makeReg(Ty);
David Sehre11f8782015-10-06 10:26:57 -07002162 _imul_imm(T, Src0, ImmConst);
2163 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002164 } else {
2165 _mov(T, Src0);
David Sehre11f8782015-10-06 10:26:57 -07002166 _imul(T, Src0 == Src1 ? T : Src1);
2167 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002168 }
John Porto7e93c622015-06-23 10:58:57 -07002169 break;
2170 case InstArithmetic::Shl:
2171 _mov(T, Src0);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002172 if (!llvm::isa<ConstantInteger32>(Src1))
2173 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002174 _shl(T, Src1);
2175 _mov(Dest, T);
2176 break;
2177 case InstArithmetic::Lshr:
2178 _mov(T, Src0);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002179 if (!llvm::isa<ConstantInteger32>(Src1))
2180 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002181 _shr(T, Src1);
2182 _mov(Dest, T);
2183 break;
2184 case InstArithmetic::Ashr:
2185 _mov(T, Src0);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002186 if (!llvm::isa<ConstantInteger32>(Src1))
2187 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002188 _sar(T, Src1);
2189 _mov(Dest, T);
2190 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002191 case InstArithmetic::Udiv: {
John Porto7e93c622015-06-23 10:58:57 -07002192 // div and idiv are the few arithmetic operators that do not allow
2193 // immediates as the operand.
2194 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002195 RegNumT Eax;
2196 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002197 switch (Ty) {
2198 default:
John Porto3c275ce2015-12-22 08:14:00 -08002199 llvm::report_fatal_error("Bad type for udiv");
2200 case IceType_i64:
2201 Eax = Traits::getRaxOrDie();
2202 Edx = Traits::getRdxOrDie();
John Porto008f4ce2015-12-24 13:22:18 -08002203 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002204 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002205 Eax = Traits::RegisterSet::Reg_eax;
2206 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002207 break;
2208 case IceType_i16:
2209 Eax = Traits::RegisterSet::Reg_ax;
2210 Edx = Traits::RegisterSet::Reg_dx;
2211 break;
2212 case IceType_i8:
2213 Eax = Traits::RegisterSet::Reg_al;
2214 Edx = Traits::RegisterSet::Reg_ah;
2215 break;
John Porto7e93c622015-06-23 10:58:57 -07002216 }
John Porto008f4ce2015-12-24 13:22:18 -08002217 T_edx = makeReg(Ty, Edx);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002218 _mov(T, Src0, Eax);
John Porto008f4ce2015-12-24 13:22:18 -08002219 _mov(T_edx, Ctx->getConstantZero(Ty));
Jim Stichnothc59288b2015-11-09 11:38:40 -08002220 _div(T, Src1, T_edx);
2221 _mov(Dest, T);
2222 } break;
John Porto7e93c622015-06-23 10:58:57 -07002223 case InstArithmetic::Sdiv:
Andrew Scull57e12682015-09-16 11:30:19 -07002224 // TODO(stichnot): Enable this after doing better performance and cross
2225 // testing.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07002226 if (false && Func->getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07002227 // Optimize division by constant power of 2, but not for Om1 or O0, just
2228 // to keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07002229 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002230 const int32_t Divisor = C->getValue();
2231 const uint32_t UDivisor = Divisor;
John Porto7e93c622015-06-23 10:58:57 -07002232 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2233 uint32_t LogDiv = llvm::Log2_32(UDivisor);
John Porto7e93c622015-06-23 10:58:57 -07002234 // LLVM does the following for dest=src/(1<<log):
2235 // t=src
2236 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2237 // shr t,typewidth-log
2238 // add t,src
2239 // sar t,log
2240 // dest=t
2241 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2242 _mov(T, Src0);
Andrew Scull57e12682015-09-16 11:30:19 -07002243 // If for some reason we are dividing by 1, just treat it like an
2244 // assignment.
John Porto7e93c622015-06-23 10:58:57 -07002245 if (LogDiv > 0) {
2246 // The initial sar is unnecessary when dividing by 2.
2247 if (LogDiv > 1)
2248 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2249 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2250 _add(T, Src0);
2251 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
2252 }
2253 _mov(Dest, T);
2254 return;
2255 }
2256 }
2257 }
2258 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002259 switch (Ty) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002260 default:
John Porto3c275ce2015-12-22 08:14:00 -08002261 llvm::report_fatal_error("Bad type for sdiv");
2262 case IceType_i64:
2263 T_edx = makeReg(Ty, Traits::getRdxOrDie());
2264 _mov(T, Src0, Traits::getRaxOrDie());
2265 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002266 case IceType_i32:
2267 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
John Porto5d0acff2015-06-30 15:29:21 -07002268 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002269 break;
2270 case IceType_i16:
2271 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
2272 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
2273 break;
2274 case IceType_i8:
2275 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
2276 _mov(T, Src0, Traits::RegisterSet::Reg_al);
2277 break;
John Porto7e93c622015-06-23 10:58:57 -07002278 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002279 _cbwdq(T_edx, T);
2280 _idiv(T, Src1, T_edx);
2281 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002282 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002283 case InstArithmetic::Urem: {
John Porto7e93c622015-06-23 10:58:57 -07002284 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002285 RegNumT Eax;
2286 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002287 switch (Ty) {
2288 default:
John Porto3c275ce2015-12-22 08:14:00 -08002289 llvm::report_fatal_error("Bad type for urem");
2290 case IceType_i64:
2291 Eax = Traits::getRaxOrDie();
2292 Edx = Traits::getRdxOrDie();
2293 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002294 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002295 Eax = Traits::RegisterSet::Reg_eax;
2296 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002297 break;
2298 case IceType_i16:
2299 Eax = Traits::RegisterSet::Reg_ax;
2300 Edx = Traits::RegisterSet::Reg_dx;
2301 break;
2302 case IceType_i8:
2303 Eax = Traits::RegisterSet::Reg_al;
2304 Edx = Traits::RegisterSet::Reg_ah;
2305 break;
John Porto7e93c622015-06-23 10:58:57 -07002306 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002307 T_edx = makeReg(Ty, Edx);
2308 _mov(T_edx, Ctx->getConstantZero(Ty));
2309 _mov(T, Src0, Eax);
2310 _div(T_edx, Src1, T);
Jim Stichnoth2655d962016-04-21 05:38:15 -07002311 if (Ty == IceType_i8) {
2312 // Register ah must be moved into one of {al,bl,cl,dl} before it can be
2313 // moved into a general 8-bit register.
2314 auto *T_AhRcvr = makeReg(Ty);
2315 T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
2316 _mov(T_AhRcvr, T_edx);
2317 T_edx = T_AhRcvr;
2318 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002319 _mov(Dest, T_edx);
2320 } break;
2321 case InstArithmetic::Srem: {
Andrew Scull57e12682015-09-16 11:30:19 -07002322 // TODO(stichnot): Enable this after doing better performance and cross
2323 // testing.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07002324 if (false && Func->getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07002325 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
2326 // keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07002327 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002328 const int32_t Divisor = C->getValue();
2329 const uint32_t UDivisor = Divisor;
John Porto7e93c622015-06-23 10:58:57 -07002330 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2331 uint32_t LogDiv = llvm::Log2_32(UDivisor);
John Porto7e93c622015-06-23 10:58:57 -07002332 // LLVM does the following for dest=src%(1<<log):
2333 // t=src
2334 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2335 // shr t,typewidth-log
2336 // add t,src
2337 // and t, -(1<<log)
2338 // sub t,src
2339 // neg t
2340 // dest=t
2341 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2342 // If for some reason we are dividing by 1, just assign 0.
2343 if (LogDiv == 0) {
2344 _mov(Dest, Ctx->getConstantZero(Ty));
2345 return;
2346 }
2347 _mov(T, Src0);
2348 // The initial sar is unnecessary when dividing by 2.
2349 if (LogDiv > 1)
2350 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2351 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2352 _add(T, Src0);
2353 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
2354 _sub(T, Src0);
2355 _neg(T);
2356 _mov(Dest, T);
2357 return;
2358 }
2359 }
2360 }
2361 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002362 RegNumT Eax;
2363 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002364 switch (Ty) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002365 default:
John Porto3c275ce2015-12-22 08:14:00 -08002366 llvm::report_fatal_error("Bad type for srem");
2367 case IceType_i64:
2368 Eax = Traits::getRaxOrDie();
2369 Edx = Traits::getRdxOrDie();
2370 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002371 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002372 Eax = Traits::RegisterSet::Reg_eax;
2373 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002374 break;
2375 case IceType_i16:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002376 Eax = Traits::RegisterSet::Reg_ax;
2377 Edx = Traits::RegisterSet::Reg_dx;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002378 break;
2379 case IceType_i8:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002380 Eax = Traits::RegisterSet::Reg_al;
2381 Edx = Traits::RegisterSet::Reg_ah;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002382 break;
John Porto7e93c622015-06-23 10:58:57 -07002383 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002384 T_edx = makeReg(Ty, Edx);
2385 _mov(T, Src0, Eax);
2386 _cbwdq(T_edx, T);
2387 _idiv(T_edx, Src1, T);
Jim Stichnoth2655d962016-04-21 05:38:15 -07002388 if (Ty == IceType_i8) {
2389 // Register ah must be moved into one of {al,bl,cl,dl} before it can be
2390 // moved into a general 8-bit register.
2391 auto *T_AhRcvr = makeReg(Ty);
2392 T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
2393 _mov(T_AhRcvr, T_edx);
2394 T_edx = T_AhRcvr;
2395 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002396 _mov(Dest, T_edx);
2397 } break;
John Porto7e93c622015-06-23 10:58:57 -07002398 case InstArithmetic::Fadd:
2399 _mov(T, Src0);
2400 _addss(T, Src1);
2401 _mov(Dest, T);
2402 break;
2403 case InstArithmetic::Fsub:
2404 _mov(T, Src0);
2405 _subss(T, Src1);
2406 _mov(Dest, T);
2407 break;
2408 case InstArithmetic::Fmul:
2409 _mov(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002410 _mulss(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002411 _mov(Dest, T);
2412 break;
2413 case InstArithmetic::Fdiv:
2414 _mov(T, Src0);
2415 _divss(T, Src1);
2416 _mov(Dest, T);
2417 break;
David Sehr26217e32015-11-26 13:03:50 -08002418 case InstArithmetic::Frem:
2419 llvm::report_fatal_error("Helper call was expected");
2420 break;
John Porto7e93c622015-06-23 10:58:57 -07002421 }
2422}
2423
John Porto4a566862016-01-04 09:33:41 -08002424template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002425void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Instr) {
2426 Variable *Dest = Instr->getDest();
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08002427 if (Dest->isRematerializable()) {
John Porto1d937a82015-12-17 06:19:34 -08002428 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08002429 return;
2430 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002431 Operand *Src = Instr->getSrc(0);
David Sehre3984282015-12-15 17:34:55 -08002432 assert(Dest->getType() == Src->getType());
2433 lowerMove(Dest, Src, false);
John Porto7e93c622015-06-23 10:58:57 -07002434}
2435
John Porto4a566862016-01-04 09:33:41 -08002436template <typename TraitsType>
2437void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) {
David Sehre3984282015-12-15 17:34:55 -08002438 if (Br->isUnconditional()) {
2439 _br(Br->getTargetUnconditional());
John Porto7e93c622015-06-23 10:58:57 -07002440 return;
2441 }
David Sehre3984282015-12-15 17:34:55 -08002442 Operand *Cond = Br->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07002443
2444 // Handle folding opportunities.
David Sehre3984282015-12-15 17:34:55 -08002445 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
John Porto7e93c622015-06-23 10:58:57 -07002446 assert(Producer->isDeleted());
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002447 switch (BoolFolding<Traits>::getProducerKind(Producer)) {
John Porto7e93c622015-06-23 10:58:57 -07002448 default:
2449 break;
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002450 case BoolFolding<Traits>::PK_Icmp32:
2451 case BoolFolding<Traits>::PK_Icmp64: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002452 lowerIcmpAndConsumer(llvm::cast<InstIcmp>(Producer), Br);
John Porto7e93c622015-06-23 10:58:57 -07002453 return;
2454 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002455 case BoolFolding<Traits>::PK_Fcmp: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002456 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br);
David Sehrdaf096c2015-11-11 10:56:58 -08002457 return;
2458 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002459 case BoolFolding<Traits>::PK_Arith: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002460 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br);
David Sehrdaf096c2015-11-11 10:56:58 -08002461 return;
2462 }
John Porto7e93c622015-06-23 10:58:57 -07002463 }
2464 }
John Porto7e93c622015-06-23 10:58:57 -07002465 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2466 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2467 _cmp(Src0, Zero);
David Sehre3984282015-12-15 17:34:55 -08002468 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07002469}
2470
David Sehr0c68bef2016-01-20 10:00:23 -08002471// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
2472// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
2473inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {
2474 return S0 < S1 ? S1 : S0;
2475}
2476
2477template <typename TraitsType>
2478void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
2479 // Common x86 calling convention lowering:
2480 //
2481 // * At the point before the call, the stack must be aligned to 16 bytes.
2482 //
2483 // * Non-register arguments are pushed onto the stack in right-to-left order,
2484 // such that the left-most argument ends up on the top of the stack at the
2485 // lowest memory address.
2486 //
2487 // * Stack arguments of vector type are aligned to start at the next highest
2488 // multiple of 16 bytes. Other stack arguments are aligned to the next word
2489 // size boundary (4 or 8 bytes, respectively).
2490 NeedsStackAlignment = true;
2491
2492 using OperandList =
2493 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
2494 Traits::X86_MAX_GPR_ARGS)>;
2495 OperandList XmmArgs;
2496 CfgVector<std::pair<const Type, Operand *>> GprArgs;
2497 OperandList StackArgs, StackArgLocations;
2498 uint32_t ParameterAreaSizeBytes = 0;
2499
2500 // Classify each argument operand according to the location where the argument
2501 // is passed.
2502 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2503 Operand *Arg = Instr->getArg(i);
2504 const Type Ty = Arg->getType();
2505 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2506 assert(typeWidthInBytes(Ty) >= 4);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002507 if (isVectorType(Ty) &&
2508 Traits::getRegisterForXmmArgNum(XmmArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002509 XmmArgs.push_back(Arg);
2510 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002511 Traits::getRegisterForXmmArgNum(XmmArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002512 XmmArgs.push_back(Arg);
2513 } else if (isScalarIntegerType(Ty) &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002514 Traits::getRegisterForGprArgNum(Ty, GprArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002515 GprArgs.emplace_back(Ty, Arg);
2516 } else {
2517 // Place on stack.
2518 StackArgs.push_back(Arg);
2519 if (isVectorType(Arg->getType())) {
2520 ParameterAreaSizeBytes =
2521 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2522 }
2523 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
2524 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2525 StackArgLocations.push_back(
2526 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2527 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2528 }
2529 }
2530 // Ensure there is enough space for the fstp/movs for floating returns.
2531 Variable *Dest = Instr->getDest();
2532 const Type DestTy = Dest ? Dest->getType() : IceType_void;
John Porto4ab4fbe2016-01-20 13:44:30 -08002533 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
David Sehr0c68bef2016-01-20 10:00:23 -08002534 if (isScalarFloatingType(DestTy)) {
2535 ParameterAreaSizeBytes =
2536 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
2537 typeWidthInBytesOnStack(DestTy));
2538 }
2539 }
2540 // Adjust the parameter area so that the stack is aligned. It is assumed that
2541 // the stack is already aligned at the start of the calling sequence.
2542 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2543 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
2544 // Copy arguments that are passed on the stack to the appropriate stack
2545 // locations.
2546 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
2547 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
2548 }
2549 // Copy arguments to be passed in registers to the appropriate registers.
2550 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
2551 Variable *Reg =
2552 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));
2553 // Generate a FakeUse of register arguments so that they do not get dead
2554 // code eliminated as a result of the FakeKill of scratch registers after
2555 // the call.
2556 Context.insert<InstFakeUse>(Reg);
2557 }
2558 // Materialize moves for arguments passed in GPRs.
2559 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
2560 const Type SignatureTy = GprArgs[i].first;
2561 Operand *Arg = GprArgs[i].second;
2562 Variable *Reg =
2563 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
2564 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
2565 assert(SignatureTy == Arg->getType());
2566 (void)SignatureTy;
2567 Context.insert<InstFakeUse>(Reg);
2568 }
2569 // Generate the call instruction. Assign its result to a temporary with high
2570 // register allocation weight.
2571 // ReturnReg doubles as ReturnRegLo as necessary.
2572 Variable *ReturnReg = nullptr;
2573 Variable *ReturnRegHi = nullptr;
2574 if (Dest) {
2575 switch (DestTy) {
2576 case IceType_NUM:
2577 case IceType_void:
2578 case IceType_i1:
2579 case IceType_i8:
2580 case IceType_i16:
2581 llvm::report_fatal_error("Invalid Call dest type");
2582 break;
2583 case IceType_i32:
2584 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
2585 break;
2586 case IceType_i64:
2587 if (Traits::Is64Bit) {
Jim Stichnothee1aae82016-02-02 09:29:21 -08002588 ReturnReg = makeReg(IceType_i64, Traits::getRaxOrDie());
David Sehr0c68bef2016-01-20 10:00:23 -08002589 } else {
2590 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2591 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2592 }
2593 break;
2594 case IceType_f32:
2595 case IceType_f64:
2596 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2597 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2598 // the fstp instruction.
2599 break;
2600 }
2601 // Fallthrough intended.
2602 case IceType_v4i1:
2603 case IceType_v8i1:
2604 case IceType_v16i1:
2605 case IceType_v16i8:
2606 case IceType_v8i16:
2607 case IceType_v4i32:
2608 case IceType_v4f32:
2609 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
2610 break;
2611 }
2612 }
2613 // Emit the call to the function.
2614 Operand *CallTarget =
2615 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
2616 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg);
2617 // Keep the upper return register live on 32-bit platform.
2618 if (ReturnRegHi)
2619 Context.insert<InstFakeDef>(ReturnRegHi);
2620 // Mark the call as killing all the caller-save registers.
2621 Context.insert<InstFakeKill>(NewCall);
2622 // Handle x86-32 floating point returns.
2623 if (Dest != nullptr && isScalarFloatingType(Dest->getType()) &&
2624 !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2625 // Special treatment for an FP function which returns its result in st(0).
2626 // If Dest ends up being a physical xmm register, the fstp emit code will
2627 // route st(0) through the space reserved in the function argument area
2628 // we allocated.
2629 _fstp(Dest);
2630 // Create a fake use of Dest in case it actually isn't used, because st(0)
2631 // still needs to be popped.
2632 Context.insert<InstFakeUse>(Dest);
2633 }
2634 // Generate a FakeUse to keep the call live if necessary.
2635 if (Instr->hasSideEffects() && ReturnReg) {
2636 Context.insert<InstFakeUse>(ReturnReg);
2637 }
2638 // Process the return value, if any.
2639 if (Dest == nullptr)
2640 return;
2641 // Assign the result of the call to Dest.
2642 if (isVectorType(DestTy)) {
2643 assert(ReturnReg && "Vector type requires a return register");
2644 _movp(Dest, ReturnReg);
2645 } else if (isScalarFloatingType(DestTy)) {
2646 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2647 assert(ReturnReg && "FP type requires a return register");
2648 _mov(Dest, ReturnReg);
2649 }
2650 } else {
2651 assert(isScalarIntegerType(DestTy));
2652 assert(ReturnReg && "Integer type requires a return register");
2653 if (DestTy == IceType_i64 && !Traits::Is64Bit) {
2654 assert(ReturnRegHi && "64-bit type requires two return registers");
2655 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
2656 Variable *DestLo = Dest64On32->getLo();
2657 Variable *DestHi = Dest64On32->getHi();
2658 _mov(DestLo, ReturnReg);
2659 _mov(DestHi, ReturnRegHi);
2660 } else {
2661 _mov(Dest, ReturnReg);
2662 }
2663 }
2664}
2665
John Porto4a566862016-01-04 09:33:41 -08002666template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002667void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
John Porto7e93c622015-06-23 10:58:57 -07002668 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002669 InstCast::OpKind CastKind = Instr->getCastKind();
2670 Variable *Dest = Instr->getDest();
Jim Stichnothc59288b2015-11-09 11:38:40 -08002671 Type DestTy = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07002672 switch (CastKind) {
2673 default:
2674 Func->setError("Cast type not supported");
2675 return;
2676 case InstCast::Sext: {
2677 // Src0RM is the source operand legalized to physical register or memory,
2678 // but not immediate, since the relevant x86 native instructions don't
Andrew Scull57e12682015-09-16 11:30:19 -07002679 // allow an immediate operand. If the operand is an immediate, we could
2680 // consider computing the strength-reduced result at translation time, but
2681 // we're unlikely to see something like that in the bitcode that the
2682 // optimizer wouldn't have already taken care of.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002683 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002684 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002685 if (DestTy == IceType_v16i8) {
2686 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
Jim Stichnothc59288b2015-11-09 11:38:40 -08002687 Variable *OneMask = makeVectorOfOnes(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002688 Variable *T = makeReg(DestTy);
2689 _movp(T, Src0RM);
2690 _pand(T, OneMask);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002691 Variable *Zeros = makeVectorOfZeros(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002692 _pcmpgt(T, Zeros);
2693 _movp(Dest, T);
2694 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07002695 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07002696 SizeT ShiftAmount =
2697 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2698 1;
2699 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2700 Variable *T = makeReg(DestTy);
2701 _movp(T, Src0RM);
2702 _psll(T, ShiftConstant);
2703 _psra(T, ShiftConstant);
2704 _movp(Dest, T);
2705 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002706 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002707 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2708 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002709 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2710 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07002711 Variable *T_Lo = makeReg(DestLo->getType());
2712 if (Src0RM->getType() == IceType_i32) {
2713 _mov(T_Lo, Src0RM);
2714 } else if (Src0RM->getType() == IceType_i1) {
2715 _movzx(T_Lo, Src0RM);
2716 _shl(T_Lo, Shift);
2717 _sar(T_Lo, Shift);
2718 } else {
2719 _movsx(T_Lo, Src0RM);
2720 }
2721 _mov(DestLo, T_Lo);
2722 Variable *T_Hi = nullptr;
2723 _mov(T_Hi, T_Lo);
2724 if (Src0RM->getType() != IceType_i1)
2725 // For i1, the sar instruction is already done above.
2726 _sar(T_Hi, Shift);
2727 _mov(DestHi, T_Hi);
2728 } else if (Src0RM->getType() == IceType_i1) {
2729 // t1 = src
2730 // shl t1, dst_bitwidth - 1
2731 // sar t1, dst_bitwidth - 1
2732 // dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002733 size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002734 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002735 Variable *T = makeReg(DestTy);
2736 if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
John Porto7e93c622015-06-23 10:58:57 -07002737 _mov(T, Src0RM);
2738 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07002739 // Widen the source using movsx or movzx. (It doesn't matter which one,
2740 // since the following shl/sar overwrite the bits.)
John Porto7e93c622015-06-23 10:58:57 -07002741 _movzx(T, Src0RM);
2742 }
2743 _shl(T, ShiftAmount);
2744 _sar(T, ShiftAmount);
2745 _mov(Dest, T);
2746 } else {
2747 // t1 = movsx src; dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002748 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002749 _movsx(T, Src0RM);
2750 _mov(Dest, T);
2751 }
2752 break;
2753 }
2754 case InstCast::Zext: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002755 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002756 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002757 // onemask = materialize(1,1,...); dest = onemask & src
John Porto7e93c622015-06-23 10:58:57 -07002758 Variable *OneMask = makeVectorOfOnes(DestTy);
2759 Variable *T = makeReg(DestTy);
2760 _movp(T, Src0RM);
2761 _pand(T, OneMask);
2762 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002763 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002764 // t1=movzx src; dst.lo=t1; dst.hi=0
2765 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002766 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2767 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07002768 Variable *Tmp = makeReg(DestLo->getType());
2769 if (Src0RM->getType() == IceType_i32) {
2770 _mov(Tmp, Src0RM);
2771 } else {
2772 _movzx(Tmp, Src0RM);
2773 }
John Porto7e93c622015-06-23 10:58:57 -07002774 _mov(DestLo, Tmp);
2775 _mov(DestHi, Zero);
2776 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth485d0772015-10-09 06:52:19 -07002777 // t = Src0RM; Dest = t
John Porto1d235422015-08-12 12:37:53 -07002778 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07002779 if (DestTy == IceType_i8) {
John Porto7e93c622015-06-23 10:58:57 -07002780 _mov(T, Src0RM);
2781 } else {
John Porto1d235422015-08-12 12:37:53 -07002782 assert(DestTy != IceType_i1);
2783 assert(Traits::Is64Bit || DestTy != IceType_i64);
John Porto7e93c622015-06-23 10:58:57 -07002784 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
John Porto1d235422015-08-12 12:37:53 -07002785 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
2786 // written to the stack (i.e., in -Om1) will be fully zero-extended.
2787 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07002788 _movzx(T, Src0RM);
2789 }
John Porto7e93c622015-06-23 10:58:57 -07002790 _mov(Dest, T);
2791 } else {
2792 // t1 = movzx src; dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002793 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002794 _movzx(T, Src0RM);
2795 _mov(Dest, T);
2796 }
2797 break;
2798 }
2799 case InstCast::Trunc: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002800 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002801 // onemask = materialize(1,1,...); dst = src & onemask
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002802 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002803 Type Src0Ty = Src0RM->getType();
2804 Variable *OneMask = makeVectorOfOnes(Src0Ty);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002805 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002806 _movp(T, Src0RM);
2807 _pand(T, OneMask);
2808 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002809 } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
2810 // Make sure we truncate from and into valid registers.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002811 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
Jim Stichnothc59288b2015-11-09 11:38:40 -08002812 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
2813 Src0 = loOperand(Src0);
2814 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2815 Variable *T = copyToReg8(Src0RM);
2816 if (DestTy == IceType_i1)
2817 _and(T, Ctx->getConstantInt1(1));
2818 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002819 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002820 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
John Porto1d235422015-08-12 12:37:53 -07002821 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -07002822 Src0 = loOperand(Src0);
2823 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2824 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002825 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002826 _mov(T, Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002827 _mov(Dest, T);
2828 }
2829 break;
2830 }
2831 case InstCast::Fptrunc:
2832 case InstCast::Fpext: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002833 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002834 // t1 = cvt Src0RM; Dest = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002835 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002836 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002837 _mov(Dest, T);
2838 break;
2839 }
2840 case InstCast::Fptosi:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002841 if (isVectorType(DestTy)) {
2842 assert(DestTy == IceType_v4i32 &&
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002843 Instr->getSrc(0)->getType() == IceType_v4f32);
2844 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08002845 if (llvm::isa<X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002846 Src0RM = legalizeToReg(Src0RM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002847 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002848 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002849 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002850 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
David Sehr26217e32015-11-26 13:03:50 -08002851 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002852 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002853 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002854 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002855 Variable *T_1 = nullptr;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002856 if (Traits::Is64Bit && DestTy == IceType_i64) {
John Porto1d235422015-08-12 12:37:53 -07002857 T_1 = makeReg(IceType_i64);
2858 } else {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002859 assert(DestTy != IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002860 T_1 = makeReg(IceType_i32);
2861 }
2862 // cvt() requires its integer argument to be a GPR.
Jim Stichnothc59288b2015-11-09 11:38:40 -08002863 Variable *T_2 = makeReg(DestTy);
2864 if (isByteSizedType(DestTy)) {
2865 assert(T_1->getType() == IceType_i32);
2866 T_1->setRegClass(RCX86_Is32To8);
2867 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2868 }
John Porto921856d2015-07-07 11:56:26 -07002869 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002870 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnothc59288b2015-11-09 11:38:40 -08002871 if (DestTy == IceType_i1)
John Porto7e93c622015-06-23 10:58:57 -07002872 _and(T_2, Ctx->getConstantInt1(1));
2873 _mov(Dest, T_2);
2874 }
2875 break;
2876 case InstCast::Fptoui:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002877 if (isVectorType(DestTy)) {
David Sehr26217e32015-11-26 13:03:50 -08002878 llvm::report_fatal_error("Helper call was expected");
Jim Stichnothc59288b2015-11-09 11:38:40 -08002879 } else if (DestTy == IceType_i64 ||
2880 (!Traits::Is64Bit && DestTy == IceType_i32)) {
David Sehr26217e32015-11-26 13:03:50 -08002881 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002882 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002883 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002884 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Jim Stichnothc59288b2015-11-09 11:38:40 -08002885 assert(DestTy != IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002886 Variable *T_1 = nullptr;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002887 if (Traits::Is64Bit && DestTy == IceType_i32) {
John Porto1d235422015-08-12 12:37:53 -07002888 T_1 = makeReg(IceType_i64);
2889 } else {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002890 assert(DestTy != IceType_i32);
John Porto1d235422015-08-12 12:37:53 -07002891 T_1 = makeReg(IceType_i32);
2892 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002893 Variable *T_2 = makeReg(DestTy);
2894 if (isByteSizedType(DestTy)) {
2895 assert(T_1->getType() == IceType_i32);
2896 T_1->setRegClass(RCX86_Is32To8);
2897 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2898 }
John Porto921856d2015-07-07 11:56:26 -07002899 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002900 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnothc59288b2015-11-09 11:38:40 -08002901 if (DestTy == IceType_i1)
John Porto7e93c622015-06-23 10:58:57 -07002902 _and(T_2, Ctx->getConstantInt1(1));
2903 _mov(Dest, T_2);
2904 }
2905 break;
2906 case InstCast::Sitofp:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002907 if (isVectorType(DestTy)) {
2908 assert(DestTy == IceType_v4f32 &&
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002909 Instr->getSrc(0)->getType() == IceType_v4i32);
2910 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08002911 if (llvm::isa<X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002912 Src0RM = legalizeToReg(Src0RM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002913 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002914 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07002915 _movp(Dest, T);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002916 } else if (!Traits::Is64Bit && Instr->getSrc(0)->getType() == IceType_i64) {
David Sehr26217e32015-11-26 13:03:50 -08002917 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002918 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002919 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002920 // Sign-extend the operand.
2921 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07002922 Variable *T_1 = nullptr;
2923 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2924 T_1 = makeReg(IceType_i64);
2925 } else {
2926 assert(Src0RM->getType() != IceType_i64);
2927 T_1 = makeReg(IceType_i32);
2928 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002929 Variable *T_2 = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07002930 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07002931 _mov(T_1, Src0RM);
2932 else
2933 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002934 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002935 _mov(Dest, T_2);
2936 }
2937 break;
2938 case InstCast::Uitofp: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002939 Operand *Src0 = Instr->getSrc(0);
John Porto7e93c622015-06-23 10:58:57 -07002940 if (isVectorType(Src0->getType())) {
David Sehr26217e32015-11-26 13:03:50 -08002941 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002942 } else if (Src0->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07002943 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
David Sehr26217e32015-11-26 13:03:50 -08002944 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002945 } else {
2946 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2947 // Zero-extend the operand.
2948 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07002949 Variable *T_1 = nullptr;
2950 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2951 T_1 = makeReg(IceType_i64);
2952 } else {
2953 assert(Src0RM->getType() != IceType_i64);
2954 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2955 T_1 = makeReg(IceType_i32);
2956 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002957 Variable *T_2 = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07002958 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07002959 _mov(T_1, Src0RM);
2960 else
2961 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07002962 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07002963 _mov(Dest, T_2);
2964 }
2965 break;
2966 }
2967 case InstCast::Bitcast: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002968 Operand *Src0 = Instr->getSrc(0);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002969 if (DestTy == Src0->getType()) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002970 auto *Assign = InstAssign::create(Func, Dest, Src0);
John Porto7e93c622015-06-23 10:58:57 -07002971 lowerAssign(Assign);
2972 return;
2973 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002974 switch (DestTy) {
John Porto7e93c622015-06-23 10:58:57 -07002975 default:
2976 llvm_unreachable("Unexpected Bitcast dest type");
2977 case IceType_i8: {
David Sehr26217e32015-11-26 13:03:50 -08002978 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002979 } break;
2980 case IceType_i16: {
David Sehr26217e32015-11-26 13:03:50 -08002981 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002982 } break;
2983 case IceType_i32:
2984 case IceType_f32: {
2985 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002986 Type SrcType = Src0RM->getType();
Jim Stichnothc59288b2015-11-09 11:38:40 -08002987 assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
2988 (DestTy == IceType_f32 && SrcType == IceType_i32));
John Porto7e93c622015-06-23 10:58:57 -07002989 // a.i32 = bitcast b.f32 ==>
2990 // t.f32 = b.f32
2991 // s.f32 = spill t.f32
2992 // a.i32 = s.f32
2993 Variable *T = nullptr;
2994 // TODO: Should be able to force a spill setup by calling legalize() with
2995 // Legal_Mem and not Legal_Reg or Legal_Imm.
John Porto4a566862016-01-04 09:33:41 -08002996 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
John Porto7e93c622015-06-23 10:58:57 -07002997 SpillVar->setLinkedTo(Dest);
2998 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07002999 Spill->setMustNotHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07003000 _mov(T, Src0RM);
3001 _mov(Spill, T);
3002 _mov(Dest, Spill);
3003 } break;
3004 case IceType_i64: {
John Porto1d235422015-08-12 12:37:53 -07003005 assert(Src0->getType() == IceType_f64);
3006 if (Traits::Is64Bit) {
John Porto1d235422015-08-12 12:37:53 -07003007 Variable *Src0R = legalizeToReg(Src0);
3008 Variable *T = makeReg(IceType_i64);
3009 _movd(T, Src0R);
3010 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07003011 } else {
John Porto1d235422015-08-12 12:37:53 -07003012 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3013 // a.i64 = bitcast b.f64 ==>
3014 // s.f64 = spill b.f64
3015 // t_lo.i32 = lo(s.f64)
3016 // a_lo.i32 = t_lo.i32
3017 // t_hi.i32 = hi(s.f64)
3018 // a_hi.i32 = t_hi.i32
3019 Operand *SpillLo, *SpillHi;
3020 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
John Porto4a566862016-01-04 09:33:41 -08003021 SpillVariable *SpillVar =
3022 Func->makeVariable<SpillVariable>(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003023 SpillVar->setLinkedTo(Src0Var);
3024 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07003025 Spill->setMustNotHaveReg();
John Porto1d235422015-08-12 12:37:53 -07003026 _movq(Spill, Src0RM);
3027 SpillLo = Traits::VariableSplit::create(Func, Spill,
3028 Traits::VariableSplit::Low);
3029 SpillHi = Traits::VariableSplit::create(Func, Spill,
3030 Traits::VariableSplit::High);
3031 } else {
3032 SpillLo = loOperand(Src0RM);
3033 SpillHi = hiOperand(Src0RM);
3034 }
3035
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003036 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3037 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto1d235422015-08-12 12:37:53 -07003038 Variable *T_Lo = makeReg(IceType_i32);
3039 Variable *T_Hi = makeReg(IceType_i32);
3040
3041 _mov(T_Lo, SpillLo);
3042 _mov(DestLo, T_Lo);
3043 _mov(T_Hi, SpillHi);
3044 _mov(DestHi, T_Hi);
John Porto7e93c622015-06-23 10:58:57 -07003045 }
John Porto7e93c622015-06-23 10:58:57 -07003046 } break;
3047 case IceType_f64: {
John Porto7e93c622015-06-23 10:58:57 -07003048 assert(Src0->getType() == IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07003049 if (Traits::Is64Bit) {
3050 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3051 Variable *T = makeReg(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003052 _movd(T, Src0RM);
3053 _mov(Dest, T);
3054 } else {
3055 Src0 = legalize(Src0);
John Porto4a566862016-01-04 09:33:41 -08003056 if (llvm::isa<X86OperandMem>(Src0)) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08003057 Variable *T = Func->makeVariable(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003058 _movq(T, Src0);
3059 _movq(Dest, T);
3060 break;
3061 }
3062 // a.f64 = bitcast b.i64 ==>
3063 // t_lo.i32 = b_lo.i32
3064 // FakeDef(s.f64)
3065 // lo(s.f64) = t_lo.i32
3066 // t_hi.i32 = b_hi.i32
3067 // hi(s.f64) = t_hi.i32
3068 // a.f64 = s.f64
John Porto4a566862016-01-04 09:33:41 -08003069 SpillVariable *SpillVar =
3070 Func->makeVariable<SpillVariable>(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003071 SpillVar->setLinkedTo(Dest);
3072 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07003073 Spill->setMustNotHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07003074
John Porto1d235422015-08-12 12:37:53 -07003075 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003076 auto *SpillLo = Traits::VariableSplit::create(
John Porto1d235422015-08-12 12:37:53 -07003077 Func, Spill, Traits::VariableSplit::Low);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003078 auto *SpillHi = Traits::VariableSplit::create(
John Porto1d235422015-08-12 12:37:53 -07003079 Func, Spill, Traits::VariableSplit::High);
3080 _mov(T_Lo, loOperand(Src0));
3081 // Technically, the Spill is defined after the _store happens, but
Andrew Scull57e12682015-09-16 11:30:19 -07003082 // SpillLo is considered a "use" of Spill so define Spill before it is
3083 // used.
John Porto1d937a82015-12-17 06:19:34 -08003084 Context.insert<InstFakeDef>(Spill);
John Porto1d235422015-08-12 12:37:53 -07003085 _store(T_Lo, SpillLo);
3086 _mov(T_Hi, hiOperand(Src0));
3087 _store(T_Hi, SpillHi);
3088 _movq(Dest, Spill);
3089 }
John Porto7e93c622015-06-23 10:58:57 -07003090 } break;
3091 case IceType_v8i1: {
David Sehr26217e32015-11-26 13:03:50 -08003092 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003093 } break;
3094 case IceType_v16i1: {
David Sehr26217e32015-11-26 13:03:50 -08003095 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003096 } break;
3097 case IceType_v8i16:
3098 case IceType_v16i8:
3099 case IceType_v4i32:
3100 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07003101 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07003102 } break;
3103 }
3104 break;
3105 }
3106 }
3107}
3108
John Porto4a566862016-01-04 09:33:41 -08003109template <typename TraitsType>
3110void TargetX86Base<TraitsType>::lowerExtractElement(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003111 const InstExtractElement *Instr) {
3112 Operand *SourceVectNotLegalized = Instr->getSrc(0);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003113 auto *ElementIndex = llvm::dyn_cast<ConstantInteger32>(Instr->getSrc(1));
John Porto7e93c622015-06-23 10:58:57 -07003114 // Only constant indices are allowed in PNaCl IR.
3115 assert(ElementIndex);
3116
3117 unsigned Index = ElementIndex->getValue();
3118 Type Ty = SourceVectNotLegalized->getType();
3119 Type ElementTy = typeElementType(Ty);
3120 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
John Porto7e93c622015-06-23 10:58:57 -07003121
3122 // TODO(wala): Determine the best lowering sequences for each type.
3123 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003124 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
3125 Variable *ExtractedElementR =
3126 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
3127 if (CanUsePextr) {
3128 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
3129 // bits of the destination register, so we represent this by always
3130 // extracting into an i32 register. The _mov into Dest below will do
3131 // truncation as necessary.
John Porto7e93c622015-06-23 10:58:57 -07003132 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07003133 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003134 _pextr(ExtractedElementR, SourceVectR, Mask);
3135 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3136 // Use pshufd and movd/movss.
3137 Variable *T = nullptr;
3138 if (Index) {
Andrew Scull57e12682015-09-16 11:30:19 -07003139 // The shuffle only needs to occur if the element to be extracted is not
3140 // at the lowest index.
John Porto7e93c622015-06-23 10:58:57 -07003141 Constant *Mask = Ctx->getConstantInt32(Index);
3142 T = makeReg(Ty);
3143 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
3144 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003145 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003146 }
3147
3148 if (InVectorElementTy == IceType_i32) {
3149 _movd(ExtractedElementR, T);
3150 } else { // Ty == IceType_f32
Andrew Scull57e12682015-09-16 11:30:19 -07003151 // TODO(wala): _movss is only used here because _mov does not allow a
3152 // vector source and a scalar destination. _mov should be able to be
3153 // used here.
3154 // _movss is a binary instruction, so the FakeDef is needed to keep the
3155 // live range analysis consistent.
John Porto1d937a82015-12-17 06:19:34 -08003156 Context.insert<InstFakeDef>(ExtractedElementR);
John Porto7e93c622015-06-23 10:58:57 -07003157 _movss(ExtractedElementR, T);
3158 }
3159 } else {
3160 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
3161 // Spill the value to a stack slot and do the extraction in memory.
3162 //
Andrew Scull57e12682015-09-16 11:30:19 -07003163 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3164 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003165 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003166 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003167 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003168
3169 // Compute the location of the element in memory.
3170 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto4a566862016-01-04 09:33:41 -08003171 X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003172 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3173 _mov(ExtractedElementR, Loc);
3174 }
3175
3176 if (ElementTy == IceType_i1) {
3177 // Truncate extracted integers to i1s if necessary.
3178 Variable *T = makeReg(IceType_i1);
3179 InstCast *Cast =
3180 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
3181 lowerCast(Cast);
3182 ExtractedElementR = T;
3183 }
3184
3185 // Copy the element to the destination.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003186 Variable *Dest = Instr->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003187 _mov(Dest, ExtractedElementR);
3188}
3189
John Porto4a566862016-01-04 09:33:41 -08003190template <typename TraitsType>
3191void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) {
David Sehre3984282015-12-15 17:34:55 -08003192 Variable *Dest = Fcmp->getDest();
3193
3194 if (isVectorType(Dest->getType())) {
3195 lowerFcmpVector(Fcmp);
3196 } else {
3197 constexpr Inst *Consumer = nullptr;
3198 lowerFcmpAndConsumer(Fcmp, Consumer);
3199 }
David Sehrdaf096c2015-11-11 10:56:58 -08003200}
3201
John Porto4a566862016-01-04 09:33:41 -08003202template <typename TraitsType>
3203void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
3204 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003205 Operand *Src0 = Fcmp->getSrc(0);
3206 Operand *Src1 = Fcmp->getSrc(1);
3207 Variable *Dest = Fcmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003208
David Sehre3984282015-12-15 17:34:55 -08003209 if (isVectorType(Dest->getType()))
3210 llvm::report_fatal_error("Vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07003211
David Sehre3984282015-12-15 17:34:55 -08003212 if (Consumer != nullptr) {
3213 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3214 if (lowerOptimizeFcmpSelect(Fcmp, Select))
3215 return;
John Porto7e93c622015-06-23 10:58:57 -07003216 }
John Porto7e93c622015-06-23 10:58:57 -07003217 }
3218
3219 // Lowering a = fcmp cond, b, c
3220 // ucomiss b, c /* only if C1 != Br_None */
3221 // /* but swap b,c order if SwapOperands==true */
3222 // mov a, <default>
3223 // j<C1> label /* only if C1 != Br_None */
3224 // j<C2> label /* only if C2 != Br_None */
3225 // FakeUse(a) /* only if C1 != Br_None */
3226 // mov a, !<default> /* only if C1 != Br_None */
3227 // label: /* only if C1 != Br_None */
3228 //
3229 // setcc lowering when C1 != Br_None && C2 == Br_None:
3230 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
3231 // setcc a, C1
David Sehre3984282015-12-15 17:34:55 -08003232 InstFcmp::FCond Condition = Fcmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003233 assert(Condition < Traits::TableFcmpSize);
3234 if (Traits::TableFcmp[Condition].SwapScalarOperands)
John Porto7e93c622015-06-23 10:58:57 -07003235 std::swap(Src0, Src1);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003236 const bool HasC1 = (Traits::TableFcmp[Condition].C1 != Traits::Cond::Br_None);
3237 const bool HasC2 = (Traits::TableFcmp[Condition].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07003238 if (HasC1) {
3239 Src0 = legalize(Src0);
3240 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3241 Variable *T = nullptr;
3242 _mov(T, Src0);
3243 _ucomiss(T, Src1RM);
3244 if (!HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003245 assert(Traits::TableFcmp[Condition].Default);
3246 setccOrConsumer(Traits::TableFcmp[Condition].C1, Dest, Consumer);
John Porto7e93c622015-06-23 10:58:57 -07003247 return;
3248 }
3249 }
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003250 int32_t IntDefault = Traits::TableFcmp[Condition].Default;
David Sehre3984282015-12-15 17:34:55 -08003251 if (Consumer == nullptr) {
David Sehrdaf096c2015-11-11 10:56:58 -08003252 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
3253 _mov(Dest, Default);
3254 if (HasC1) {
John Porto4a566862016-01-04 09:33:41 -08003255 InstX86Label *Label = InstX86Label::create(Func, this);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003256 _br(Traits::TableFcmp[Condition].C1, Label);
David Sehrdaf096c2015-11-11 10:56:58 -08003257 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003258 _br(Traits::TableFcmp[Condition].C2, Label);
David Sehrdaf096c2015-11-11 10:56:58 -08003259 }
3260 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
David Sehre3984282015-12-15 17:34:55 -08003261 _redefined(_mov(Dest, NonDefault));
David Sehrdaf096c2015-11-11 10:56:58 -08003262 Context.insert(Label);
John Porto7e93c622015-06-23 10:58:57 -07003263 }
David Sehre3984282015-12-15 17:34:55 -08003264 return;
3265 }
3266 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrdaf096c2015-11-11 10:56:58 -08003267 CfgNode *TrueSucc = Br->getTargetTrue();
3268 CfgNode *FalseSucc = Br->getTargetFalse();
3269 if (IntDefault != 0)
3270 std::swap(TrueSucc, FalseSucc);
3271 if (HasC1) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003272 _br(Traits::TableFcmp[Condition].C1, FalseSucc);
David Sehrdaf096c2015-11-11 10:56:58 -08003273 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003274 _br(Traits::TableFcmp[Condition].C2, FalseSucc);
David Sehrdaf096c2015-11-11 10:56:58 -08003275 }
3276 _br(TrueSucc);
3277 return;
3278 }
3279 _br(FalseSucc);
David Sehre3984282015-12-15 17:34:55 -08003280 return;
John Porto7e93c622015-06-23 10:58:57 -07003281 }
David Sehre3984282015-12-15 17:34:55 -08003282 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3283 Operand *SrcT = Select->getTrueOperand();
3284 Operand *SrcF = Select->getFalseOperand();
3285 Variable *SelectDest = Select->getDest();
3286 if (IntDefault != 0)
3287 std::swap(SrcT, SrcF);
3288 lowerMove(SelectDest, SrcF, false);
3289 if (HasC1) {
John Porto4a566862016-01-04 09:33:41 -08003290 InstX86Label *Label = InstX86Label::create(Func, this);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003291 _br(Traits::TableFcmp[Condition].C1, Label);
David Sehre3984282015-12-15 17:34:55 -08003292 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003293 _br(Traits::TableFcmp[Condition].C2, Label);
David Sehre3984282015-12-15 17:34:55 -08003294 }
3295 static constexpr bool IsRedefinition = true;
3296 lowerMove(SelectDest, SrcT, IsRedefinition);
3297 Context.insert(Label);
3298 }
3299 return;
3300 }
3301 llvm::report_fatal_error("Unexpected consumer type");
3302}
3303
John Porto4a566862016-01-04 09:33:41 -08003304template <typename TraitsType>
3305void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) {
David Sehre3984282015-12-15 17:34:55 -08003306 Operand *Src0 = Fcmp->getSrc(0);
3307 Operand *Src1 = Fcmp->getSrc(1);
3308 Variable *Dest = Fcmp->getDest();
3309
3310 if (!isVectorType(Dest->getType()))
3311 llvm::report_fatal_error("Expected vector compare");
3312
3313 InstFcmp::FCond Condition = Fcmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003314 assert(Condition < Traits::TableFcmpSize);
David Sehre3984282015-12-15 17:34:55 -08003315
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003316 if (Traits::TableFcmp[Condition].SwapVectorOperands)
David Sehre3984282015-12-15 17:34:55 -08003317 std::swap(Src0, Src1);
3318
3319 Variable *T = nullptr;
3320
3321 if (Condition == InstFcmp::True) {
3322 // makeVectorOfOnes() requires an integer vector type.
3323 T = makeVectorOfMinusOnes(IceType_v4i32);
3324 } else if (Condition == InstFcmp::False) {
3325 T = makeVectorOfZeros(Dest->getType());
3326 } else {
3327 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3328 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08003329 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003330 Src1RM = legalizeToReg(Src1RM);
3331
3332 switch (Condition) {
3333 default: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003334 const CmppsCond Predicate = Traits::TableFcmp[Condition].Predicate;
David Sehre3984282015-12-15 17:34:55 -08003335 assert(Predicate != Traits::Cond::Cmpps_Invalid);
3336 T = makeReg(Src0RM->getType());
3337 _movp(T, Src0RM);
3338 _cmpps(T, Src1RM, Predicate);
3339 } break;
3340 case InstFcmp::One: {
3341 // Check both unequal and ordered.
3342 T = makeReg(Src0RM->getType());
3343 Variable *T2 = makeReg(Src0RM->getType());
3344 _movp(T, Src0RM);
3345 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
3346 _movp(T2, Src0RM);
3347 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
3348 _pand(T, T2);
3349 } break;
3350 case InstFcmp::Ueq: {
3351 // Check both equal or unordered.
3352 T = makeReg(Src0RM->getType());
3353 Variable *T2 = makeReg(Src0RM->getType());
3354 _movp(T, Src0RM);
3355 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
3356 _movp(T2, Src0RM);
3357 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
3358 _por(T, T2);
3359 } break;
3360 }
3361 }
3362
3363 assert(T != nullptr);
3364 _movp(Dest, T);
3365 eliminateNextVectorSextInstruction(Dest);
John Porto7e93c622015-06-23 10:58:57 -07003366}
3367
David Sehr5c875422015-10-15 10:38:53 -07003368inline bool isZero(const Operand *Opnd) {
3369 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
3370 return C64->getValue() == 0;
3371 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
3372 return C32->getValue() == 0;
3373 return false;
3374}
3375
John Porto4a566862016-01-04 09:33:41 -08003376template <typename TraitsType>
3377void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
3378 const Inst *Consumer) {
David Sehrd9810252015-10-16 13:23:17 -07003379 Operand *Src0 = legalize(Icmp->getSrc(0));
3380 Operand *Src1 = legalize(Icmp->getSrc(1));
3381 Variable *Dest = Icmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003382
David Sehre3984282015-12-15 17:34:55 -08003383 if (isVectorType(Dest->getType()))
3384 llvm::report_fatal_error("Vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07003385
John Porto1d235422015-08-12 12:37:53 -07003386 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
David Sehre3984282015-12-15 17:34:55 -08003387 lowerIcmp64(Icmp, Consumer);
John Porto7e93c622015-06-23 10:58:57 -07003388 return;
3389 }
3390
3391 // cmp b, c
David Sehr5c875422015-10-15 10:38:53 -07003392 if (isZero(Src1)) {
David Sehrd9810252015-10-16 13:23:17 -07003393 switch (Icmp->getCondition()) {
David Sehr5c875422015-10-15 10:38:53 -07003394 default:
3395 break;
3396 case InstIcmp::Uge:
David Sehre3984282015-12-15 17:34:55 -08003397 movOrConsumer(true, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003398 return;
3399 case InstIcmp::Ult:
David Sehre3984282015-12-15 17:34:55 -08003400 movOrConsumer(false, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003401 return;
3402 }
3403 }
John Porto7e93c622015-06-23 10:58:57 -07003404 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
3405 _cmp(Src0RM, Src1);
David Sehre3984282015-12-15 17:34:55 -08003406 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
3407 Consumer);
3408}
3409
John Porto4a566862016-01-04 09:33:41 -08003410template <typename TraitsType>
3411void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) {
David Sehre3984282015-12-15 17:34:55 -08003412 Operand *Src0 = legalize(Icmp->getSrc(0));
3413 Operand *Src1 = legalize(Icmp->getSrc(1));
3414 Variable *Dest = Icmp->getDest();
3415
3416 if (!isVectorType(Dest->getType()))
3417 llvm::report_fatal_error("Expected a vector compare");
3418
3419 Type Ty = Src0->getType();
3420 // Promote i1 vectors to 128 bit integer vector types.
3421 if (typeElementType(Ty) == IceType_i1) {
3422 Type NewTy = IceType_NUM;
3423 switch (Ty) {
3424 default:
3425 llvm::report_fatal_error("unexpected type");
3426 break;
3427 case IceType_v4i1:
3428 NewTy = IceType_v4i32;
3429 break;
3430 case IceType_v8i1:
3431 NewTy = IceType_v8i16;
3432 break;
3433 case IceType_v16i1:
3434 NewTy = IceType_v16i8;
3435 break;
3436 }
3437 Variable *NewSrc0 = Func->makeVariable(NewTy);
3438 Variable *NewSrc1 = Func->makeVariable(NewTy);
3439 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
3440 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
3441 Src0 = NewSrc0;
3442 Src1 = NewSrc1;
3443 Ty = NewTy;
3444 }
3445
3446 InstIcmp::ICond Condition = Icmp->getCondition();
3447
3448 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3449 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3450
3451 // SSE2 only has signed comparison operations. Transform unsigned inputs in
3452 // a manner that allows for the use of signed comparison operations by
3453 // flipping the high order bits.
3454 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
3455 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
3456 Variable *T0 = makeReg(Ty);
3457 Variable *T1 = makeReg(Ty);
3458 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
3459 _movp(T0, Src0RM);
3460 _pxor(T0, HighOrderBits);
3461 _movp(T1, Src1RM);
3462 _pxor(T1, HighOrderBits);
3463 Src0RM = T0;
3464 Src1RM = T1;
3465 }
3466
3467 Variable *T = makeReg(Ty);
3468 switch (Condition) {
3469 default:
3470 llvm_unreachable("unexpected condition");
3471 break;
3472 case InstIcmp::Eq: {
John Porto4a566862016-01-04 09:33:41 -08003473 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003474 Src1RM = legalizeToReg(Src1RM);
3475 _movp(T, Src0RM);
3476 _pcmpeq(T, Src1RM);
3477 } break;
3478 case InstIcmp::Ne: {
John Porto4a566862016-01-04 09:33:41 -08003479 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003480 Src1RM = legalizeToReg(Src1RM);
3481 _movp(T, Src0RM);
3482 _pcmpeq(T, Src1RM);
3483 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3484 _pxor(T, MinusOne);
3485 } break;
3486 case InstIcmp::Ugt:
3487 case InstIcmp::Sgt: {
John Porto4a566862016-01-04 09:33:41 -08003488 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003489 Src1RM = legalizeToReg(Src1RM);
3490 _movp(T, Src0RM);
3491 _pcmpgt(T, Src1RM);
3492 } break;
3493 case InstIcmp::Uge:
3494 case InstIcmp::Sge: {
3495 // !(Src1RM > Src0RM)
John Porto4a566862016-01-04 09:33:41 -08003496 if (llvm::isa<X86OperandMem>(Src0RM))
David Sehre3984282015-12-15 17:34:55 -08003497 Src0RM = legalizeToReg(Src0RM);
3498 _movp(T, Src1RM);
3499 _pcmpgt(T, Src0RM);
3500 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3501 _pxor(T, MinusOne);
3502 } break;
3503 case InstIcmp::Ult:
3504 case InstIcmp::Slt: {
John Porto4a566862016-01-04 09:33:41 -08003505 if (llvm::isa<X86OperandMem>(Src0RM))
David Sehre3984282015-12-15 17:34:55 -08003506 Src0RM = legalizeToReg(Src0RM);
3507 _movp(T, Src1RM);
3508 _pcmpgt(T, Src0RM);
3509 } break;
3510 case InstIcmp::Ule:
3511 case InstIcmp::Sle: {
3512 // !(Src0RM > Src1RM)
John Porto4a566862016-01-04 09:33:41 -08003513 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003514 Src1RM = legalizeToReg(Src1RM);
3515 _movp(T, Src0RM);
3516 _pcmpgt(T, Src1RM);
3517 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3518 _pxor(T, MinusOne);
3519 } break;
3520 }
3521
3522 _movp(Dest, T);
3523 eliminateNextVectorSextInstruction(Dest);
John Porto7e93c622015-06-23 10:58:57 -07003524}
3525
John Porto4a566862016-01-04 09:33:41 -08003526template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07003527template <typename T>
3528typename std::enable_if<!T::Is64Bit, void>::type
John Porto4a566862016-01-04 09:33:41 -08003529TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp,
3530 const Inst *Consumer) {
John Porto1d235422015-08-12 12:37:53 -07003531 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
David Sehrd9810252015-10-16 13:23:17 -07003532 Operand *Src0 = legalize(Icmp->getSrc(0));
3533 Operand *Src1 = legalize(Icmp->getSrc(1));
3534 Variable *Dest = Icmp->getDest();
3535 InstIcmp::ICond Condition = Icmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003536 assert(Condition < Traits::TableIcmp64Size);
David Sehr5c875422015-10-15 10:38:53 -07003537 Operand *Src0LoRM = nullptr;
3538 Operand *Src0HiRM = nullptr;
3539 // Legalize the portions of Src0 that are going to be needed.
3540 if (isZero(Src1)) {
3541 switch (Condition) {
3542 default:
3543 llvm_unreachable("unexpected condition");
3544 break;
3545 // These two are not optimized, so we fall through to the general case,
3546 // which needs the upper and lower halves legalized.
3547 case InstIcmp::Sgt:
3548 case InstIcmp::Sle:
Jim Stichnoth1fb030c2015-10-15 11:10:38 -07003549 // These four compare after performing an "or" of the high and low half, so
3550 // they need the upper and lower halves legalized.
David Sehr5c875422015-10-15 10:38:53 -07003551 case InstIcmp::Eq:
3552 case InstIcmp::Ule:
3553 case InstIcmp::Ne:
3554 case InstIcmp::Ugt:
3555 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3556 // These two test only the high half's sign bit, so they need only
3557 // the upper half legalized.
3558 case InstIcmp::Sge:
3559 case InstIcmp::Slt:
3560 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3561 break;
3562
3563 // These two move constants and hence need no legalization.
3564 case InstIcmp::Uge:
3565 case InstIcmp::Ult:
3566 break;
3567 }
3568 } else {
3569 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3570 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3571 }
3572 // Optimize comparisons with zero.
3573 if (isZero(Src1)) {
3574 Constant *SignMask = Ctx->getConstantInt32(0x80000000);
3575 Variable *Temp = nullptr;
3576 switch (Condition) {
3577 default:
3578 llvm_unreachable("unexpected condition");
3579 break;
3580 case InstIcmp::Eq:
3581 case InstIcmp::Ule:
David Sehraa0b1a12015-10-27 16:55:40 -07003582 // Mov Src0HiRM first, because it was legalized most recently, and will
3583 // sometimes avoid a move before the OR.
3584 _mov(Temp, Src0HiRM);
3585 _or(Temp, Src0LoRM);
John Porto1d937a82015-12-17 06:19:34 -08003586 Context.insert<InstFakeUse>(Temp);
David Sehre3984282015-12-15 17:34:55 -08003587 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003588 return;
3589 case InstIcmp::Ne:
3590 case InstIcmp::Ugt:
David Sehraa0b1a12015-10-27 16:55:40 -07003591 // Mov Src0HiRM first, because it was legalized most recently, and will
3592 // sometimes avoid a move before the OR.
3593 _mov(Temp, Src0HiRM);
3594 _or(Temp, Src0LoRM);
John Porto1d937a82015-12-17 06:19:34 -08003595 Context.insert<InstFakeUse>(Temp);
David Sehre3984282015-12-15 17:34:55 -08003596 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003597 return;
3598 case InstIcmp::Uge:
David Sehre3984282015-12-15 17:34:55 -08003599 movOrConsumer(true, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003600 return;
3601 case InstIcmp::Ult:
David Sehre3984282015-12-15 17:34:55 -08003602 movOrConsumer(false, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003603 return;
3604 case InstIcmp::Sgt:
3605 break;
3606 case InstIcmp::Sge:
3607 _test(Src0HiRM, SignMask);
David Sehre3984282015-12-15 17:34:55 -08003608 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003609 return;
3610 case InstIcmp::Slt:
3611 _test(Src0HiRM, SignMask);
David Sehre3984282015-12-15 17:34:55 -08003612 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003613 return;
3614 case InstIcmp::Sle:
3615 break;
3616 }
3617 }
3618 // Handle general compares.
3619 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3620 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
David Sehre3984282015-12-15 17:34:55 -08003621 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003622 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
3623 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
John Porto4a566862016-01-04 09:33:41 -08003624 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
3625 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
David Sehrd9810252015-10-16 13:23:17 -07003626 _mov(Dest, One);
3627 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003628 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3629 _br(Traits::TableIcmp64[Condition].C1, LabelTrue);
3630 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3631 _br(Traits::TableIcmp64[Condition].C2, LabelFalse);
David Sehrd9810252015-10-16 13:23:17 -07003632 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003633 _br(Traits::TableIcmp64[Condition].C3, LabelTrue);
David Sehrd9810252015-10-16 13:23:17 -07003634 Context.insert(LabelFalse);
David Sehre3984282015-12-15 17:34:55 -08003635 _redefined(_mov(Dest, Zero));
David Sehrd9810252015-10-16 13:23:17 -07003636 Context.insert(LabelTrue);
David Sehre3984282015-12-15 17:34:55 -08003637 return;
3638 }
3639 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrd9810252015-10-16 13:23:17 -07003640 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003641 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3642 _br(Traits::TableIcmp64[Condition].C1, Br->getTargetTrue());
3643 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3644 _br(Traits::TableIcmp64[Condition].C2, Br->getTargetFalse());
David Sehrd9810252015-10-16 13:23:17 -07003645 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003646 _br(Traits::TableIcmp64[Condition].C3, Br->getTargetTrue(),
David Sehrd9810252015-10-16 13:23:17 -07003647 Br->getTargetFalse());
David Sehre3984282015-12-15 17:34:55 -08003648 return;
David Sehrd9810252015-10-16 13:23:17 -07003649 }
David Sehre3984282015-12-15 17:34:55 -08003650 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3651 Operand *SrcT = Select->getTrueOperand();
3652 Operand *SrcF = Select->getFalseOperand();
3653 Variable *SelectDest = Select->getDest();
John Porto4a566862016-01-04 09:33:41 -08003654 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
3655 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
David Sehre3984282015-12-15 17:34:55 -08003656 lowerMove(SelectDest, SrcT, false);
3657 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003658 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3659 _br(Traits::TableIcmp64[Condition].C1, LabelTrue);
3660 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3661 _br(Traits::TableIcmp64[Condition].C2, LabelFalse);
David Sehre3984282015-12-15 17:34:55 -08003662 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003663 _br(Traits::TableIcmp64[Condition].C3, LabelTrue);
David Sehre3984282015-12-15 17:34:55 -08003664 Context.insert(LabelFalse);
3665 static constexpr bool IsRedefinition = true;
3666 lowerMove(SelectDest, SrcF, IsRedefinition);
3667 Context.insert(LabelTrue);
3668 return;
3669 }
3670 llvm::report_fatal_error("Unexpected consumer type");
David Sehrd9810252015-10-16 13:23:17 -07003671}
3672
John Porto4a566862016-01-04 09:33:41 -08003673template <typename TraitsType>
3674void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition,
3675 Variable *Dest,
3676 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003677 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003678 _setcc(Dest, Condition);
David Sehre3984282015-12-15 17:34:55 -08003679 return;
David Sehrd9810252015-10-16 13:23:17 -07003680 }
David Sehre3984282015-12-15 17:34:55 -08003681 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3682 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
3683 return;
3684 }
3685 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3686 Operand *SrcT = Select->getTrueOperand();
3687 Operand *SrcF = Select->getFalseOperand();
3688 Variable *SelectDest = Select->getDest();
3689 lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
3690 return;
3691 }
3692 llvm::report_fatal_error("Unexpected consumer type");
David Sehrd9810252015-10-16 13:23:17 -07003693}
3694
John Porto4a566862016-01-04 09:33:41 -08003695template <typename TraitsType>
3696void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest,
3697 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003698 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003699 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
David Sehre3984282015-12-15 17:34:55 -08003700 return;
3701 }
3702 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrd9810252015-10-16 13:23:17 -07003703 // TODO(sehr,stichnot): This could be done with a single unconditional
3704 // branch instruction, but subzero doesn't know how to handle the resulting
3705 // control flow graph changes now. Make it do so to eliminate mov and cmp.
3706 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
3707 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
3708 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
David Sehre3984282015-12-15 17:34:55 -08003709 return;
David Sehrd9810252015-10-16 13:23:17 -07003710 }
David Sehre3984282015-12-15 17:34:55 -08003711 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3712 Operand *Src = nullptr;
3713 if (IcmpResult) {
3714 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
3715 } else {
3716 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
3717 }
3718 Variable *SelectDest = Select->getDest();
3719 lowerMove(SelectDest, Src, false);
3720 return;
3721 }
3722 llvm::report_fatal_error("Unexpected consumer type");
John Porto1d235422015-08-12 12:37:53 -07003723}
3724
John Porto4a566862016-01-04 09:33:41 -08003725template <typename TraitsType>
3726void TargetX86Base<TraitsType>::lowerArithAndConsumer(
3727 const InstArithmetic *Arith, const Inst *Consumer) {
David Sehrdaf096c2015-11-11 10:56:58 -08003728 Variable *T = nullptr;
3729 Operand *Src0 = legalize(Arith->getSrc(0));
3730 Operand *Src1 = legalize(Arith->getSrc(1));
3731 Variable *Dest = Arith->getDest();
3732 switch (Arith->getOp()) {
3733 default:
3734 llvm_unreachable("arithmetic operator not AND or OR");
3735 break;
3736 case InstArithmetic::And:
3737 _mov(T, Src0);
3738 // Test cannot have an address in the second position. Since T is
3739 // guaranteed to be a register and Src1 could be a memory load, ensure
3740 // that the second argument is a register.
3741 if (llvm::isa<Constant>(Src1))
3742 _test(T, Src1);
3743 else
3744 _test(Src1, T);
3745 break;
3746 case InstArithmetic::Or:
3747 _mov(T, Src0);
3748 _or(T, Src1);
3749 break;
3750 }
David Sehre3984282015-12-15 17:34:55 -08003751
3752 if (Consumer == nullptr) {
3753 llvm::report_fatal_error("Expected a consumer instruction");
3754 }
3755 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
John Porto1d937a82015-12-17 06:19:34 -08003756 Context.insert<InstFakeUse>(T);
3757 Context.insert<InstFakeDef>(Dest);
David Sehre3984282015-12-15 17:34:55 -08003758 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3759 return;
3760 }
3761 llvm::report_fatal_error("Unexpected consumer type");
David Sehrdaf096c2015-11-11 10:56:58 -08003762}
3763
John Porto4a566862016-01-04 09:33:41 -08003764template <typename TraitsType>
3765void TargetX86Base<TraitsType>::lowerInsertElement(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003766 const InstInsertElement *Instr) {
3767 Operand *SourceVectNotLegalized = Instr->getSrc(0);
3768 Operand *ElementToInsertNotLegalized = Instr->getSrc(1);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003769 auto *ElementIndex = llvm::dyn_cast<ConstantInteger32>(Instr->getSrc(2));
John Porto7e93c622015-06-23 10:58:57 -07003770 // Only constant indices are allowed in PNaCl IR.
3771 assert(ElementIndex);
3772 unsigned Index = ElementIndex->getValue();
3773 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3774
3775 Type Ty = SourceVectNotLegalized->getType();
3776 Type ElementTy = typeElementType(Ty);
3777 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3778
3779 if (ElementTy == IceType_i1) {
Andrew Scull57e12682015-09-16 11:30:19 -07003780 // Expand the element to the appropriate size for it to be inserted in the
3781 // vector.
John Porto5aeed952015-07-21 13:39:09 -07003782 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003783 auto *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3784 ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003785 lowerCast(Cast);
3786 ElementToInsertNotLegalized = Expanded;
3787 }
3788
3789 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07003790 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07003791 // Use insertps, pinsrb, pinsrw, or pinsrd.
3792 Operand *ElementRM =
3793 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3794 Operand *SourceVectRM =
3795 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3796 Variable *T = makeReg(Ty);
3797 _movp(T, SourceVectRM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08003798 if (Ty == IceType_v4f32) {
John Porto7e93c622015-06-23 10:58:57 -07003799 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Jim Stichnothc59288b2015-11-09 11:38:40 -08003800 } else {
3801 // For the pinsrb and pinsrw instructions, when the source operand is a
3802 // register, it must be a full r32 register like eax, and not ax/al/ah.
John Porto4a566862016-01-04 09:33:41 -08003803 // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for
3804 // the use
Jim Stichnothc59288b2015-11-09 11:38:40 -08003805 // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
3806 // validates that the original and base register encodings are the same.
3807 if (ElementRM->getType() == IceType_i8 &&
3808 llvm::isa<Variable>(ElementRM)) {
3809 // Don't use ah/bh/ch/dh for pinsrb.
3810 ElementRM = copyToReg8(ElementRM);
3811 }
John Porto7e93c622015-06-23 10:58:57 -07003812 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Jim Stichnothc59288b2015-11-09 11:38:40 -08003813 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003814 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003815 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3816 // Use shufps or movss.
3817 Variable *ElementR = nullptr;
3818 Operand *SourceVectRM =
3819 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3820
3821 if (InVectorElementTy == IceType_f32) {
3822 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07003823 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003824 } else {
3825 // Copy an integer to an XMM register.
3826 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3827 ElementR = makeReg(Ty);
3828 _movd(ElementR, T);
3829 }
3830
3831 if (Index == 0) {
3832 Variable *T = makeReg(Ty);
3833 _movp(T, SourceVectRM);
3834 _movss(T, ElementR);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003835 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003836 return;
3837 }
3838
Andrew Scull57e12682015-09-16 11:30:19 -07003839 // shufps treats the source and destination operands as vectors of four
3840 // doublewords. The destination's two high doublewords are selected from
3841 // the source operand and the two low doublewords are selected from the
3842 // (original value of) the destination operand. An insertelement operation
3843 // can be effected with a sequence of two shufps operations with
3844 // appropriate masks. In all cases below, Element[0] is being inserted into
3845 // SourceVectOperand. Indices are ordered from left to right.
John Porto7e93c622015-06-23 10:58:57 -07003846 //
3847 // insertelement into index 1 (result is stored in ElementR):
3848 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3849 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
3850 //
3851 // insertelement into index 2 (result is stored in T):
3852 // T := SourceVectRM
3853 // ElementR := ElementR[0, 0] T[0, 3]
3854 // T := T[0, 1] ElementR[0, 3]
3855 //
3856 // insertelement into index 3 (result is stored in T):
3857 // T := SourceVectRM
3858 // ElementR := ElementR[0, 0] T[0, 2]
3859 // T := T[0, 1] ElementR[3, 0]
3860 const unsigned char Mask1[3] = {0, 192, 128};
3861 const unsigned char Mask2[3] = {227, 196, 52};
3862
3863 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3864 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
3865
3866 if (Index == 1) {
3867 _shufps(ElementR, SourceVectRM, Mask1Constant);
3868 _shufps(ElementR, SourceVectRM, Mask2Constant);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003869 _movp(Instr->getDest(), ElementR);
John Porto7e93c622015-06-23 10:58:57 -07003870 } else {
3871 Variable *T = makeReg(Ty);
3872 _movp(T, SourceVectRM);
3873 _shufps(ElementR, T, Mask1Constant);
3874 _shufps(T, ElementR, Mask2Constant);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003875 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003876 }
3877 } else {
3878 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
Andrew Scull57e12682015-09-16 11:30:19 -07003879 // Spill the value to a stack slot and perform the insertion in memory.
John Porto7e93c622015-06-23 10:58:57 -07003880 //
Andrew Scull57e12682015-09-16 11:30:19 -07003881 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3882 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003883 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003884 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003885 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003886
3887 // Compute the location of the position to insert in memory.
3888 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto4a566862016-01-04 09:33:41 -08003889 X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003890 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07003891 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07003892
3893 Variable *T = makeReg(Ty);
3894 _movp(T, Slot);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003895 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003896 }
3897}
3898
John Porto4a566862016-01-04 09:33:41 -08003899template <typename TraitsType>
3900void TargetX86Base<TraitsType>::lowerIntrinsicCall(
John Porto7e93c622015-06-23 10:58:57 -07003901 const InstIntrinsicCall *Instr) {
3902 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3903 case Intrinsics::AtomicCmpxchg: {
3904 if (!Intrinsics::isMemoryOrderValid(
3905 ID, getConstantMemoryOrder(Instr->getArg(3)),
3906 getConstantMemoryOrder(Instr->getArg(4)))) {
3907 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3908 return;
3909 }
3910 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07003911 Operand *PtrToMem = legalize(Instr->getArg(0));
3912 Operand *Expected = legalize(Instr->getArg(1));
3913 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003914 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3915 return;
3916 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
3917 return;
3918 }
3919 case Intrinsics::AtomicFence:
3920 if (!Intrinsics::isMemoryOrderValid(
3921 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
3922 Func->setError("Unexpected memory ordering for AtomicFence");
3923 return;
3924 }
3925 _mfence();
3926 return;
3927 case Intrinsics::AtomicFenceAll:
Andrew Scull57e12682015-09-16 11:30:19 -07003928 // NOTE: FenceAll should prevent and load/store from being moved across the
3929 // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
3930 // currently marked coarsely as "HasSideEffects".
John Porto7e93c622015-06-23 10:58:57 -07003931 _mfence();
3932 return;
3933 case Intrinsics::AtomicIsLockFree: {
3934 // X86 is always lock free for 8/16/32/64 bit accesses.
Andrew Scull57e12682015-09-16 11:30:19 -07003935 // TODO(jvoung): Since the result is constant when given a constant byte
3936 // size, this opens up DCE opportunities.
John Porto7e93c622015-06-23 10:58:57 -07003937 Operand *ByteSize = Instr->getArg(0);
3938 Variable *Dest = Instr->getDest();
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003939 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
John Porto7e93c622015-06-23 10:58:57 -07003940 Constant *Result;
3941 switch (CI->getValue()) {
3942 default:
Andrew Scull57e12682015-09-16 11:30:19 -07003943 // Some x86-64 processors support the cmpxchg16b instruction, which can
3944 // make 16-byte operations lock free (when used with the LOCK prefix).
3945 // However, that's not supported in 32-bit mode, so just return 0 even
3946 // for large sizes.
John Porto7e93c622015-06-23 10:58:57 -07003947 Result = Ctx->getConstantZero(IceType_i32);
3948 break;
3949 case 1:
3950 case 2:
3951 case 4:
3952 case 8:
3953 Result = Ctx->getConstantInt32(1);
3954 break;
3955 }
3956 _mov(Dest, Result);
3957 return;
3958 }
3959 // The PNaCl ABI requires the byte size to be a compile-time constant.
3960 Func->setError("AtomicIsLockFree byte size should be compile-time const");
3961 return;
3962 }
3963 case Intrinsics::AtomicLoad: {
Andrew Scull57e12682015-09-16 11:30:19 -07003964 // We require the memory address to be naturally aligned. Given that is the
3965 // case, then normal loads are atomic.
John Porto7e93c622015-06-23 10:58:57 -07003966 if (!Intrinsics::isMemoryOrderValid(
3967 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
3968 Func->setError("Unexpected memory ordering for AtomicLoad");
3969 return;
3970 }
3971 Variable *Dest = Instr->getDest();
Andrew Scull6d47bcd2015-09-17 17:10:05 -07003972 if (!Traits::Is64Bit) {
3973 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) {
3974 // Follow what GCC does and use a movq instead of what lowerLoad()
3975 // normally does (split the load into two). Thus, this skips
3976 // load/arithmetic op folding. Load/arithmetic folding can't happen
3977 // anyway, since this is x86-32 and integer arithmetic only happens on
3978 // 32-bit quantities.
3979 Variable *T = makeReg(IceType_f64);
John Porto4a566862016-01-04 09:33:41 -08003980 X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07003981 _movq(T, Addr);
3982 // Then cast the bits back out of the XMM register to the i64 Dest.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003983 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07003984 lowerCast(Cast);
3985 // Make sure that the atomic load isn't elided when unused.
John Porto1d937a82015-12-17 06:19:34 -08003986 Context.insert<InstFakeUse>(Dest64On32->getLo());
3987 Context.insert<InstFakeUse>(Dest64On32->getHi());
Andrew Scull6d47bcd2015-09-17 17:10:05 -07003988 return;
3989 }
John Porto7e93c622015-06-23 10:58:57 -07003990 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003991 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
John Porto7e93c622015-06-23 10:58:57 -07003992 lowerLoad(Load);
3993 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
Andrew Scull57e12682015-09-16 11:30:19 -07003994 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
3995 // the FakeUse on the last-inserted instruction's dest.
John Porto1d937a82015-12-17 06:19:34 -08003996 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
John Porto7e93c622015-06-23 10:58:57 -07003997 return;
3998 }
3999 case Intrinsics::AtomicRMW:
4000 if (!Intrinsics::isMemoryOrderValid(
4001 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4002 Func->setError("Unexpected memory ordering for AtomicRMW");
4003 return;
4004 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004005 lowerAtomicRMW(
4006 Instr->getDest(),
4007 static_cast<uint32_t>(
4008 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
4009 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004010 return;
4011 case Intrinsics::AtomicStore: {
4012 if (!Intrinsics::isMemoryOrderValid(
4013 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4014 Func->setError("Unexpected memory ordering for AtomicStore");
4015 return;
4016 }
Andrew Scull57e12682015-09-16 11:30:19 -07004017 // We require the memory address to be naturally aligned. Given that is the
4018 // case, then normal stores are atomic. Add a fence after the store to make
4019 // it visible.
John Porto7e93c622015-06-23 10:58:57 -07004020 Operand *Value = Instr->getArg(0);
4021 Operand *Ptr = Instr->getArg(1);
John Porto1d235422015-08-12 12:37:53 -07004022 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
Andrew Scull57e12682015-09-16 11:30:19 -07004023 // Use a movq instead of what lowerStore() normally does (split the store
4024 // into two), following what GCC does. Cast the bits from int -> to an
4025 // xmm register first.
John Porto7e93c622015-06-23 10:58:57 -07004026 Variable *T = makeReg(IceType_f64);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004027 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
John Porto7e93c622015-06-23 10:58:57 -07004028 lowerCast(Cast);
4029 // Then store XMM w/ a movq.
John Porto4a566862016-01-04 09:33:41 -08004030 X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07004031 _storeq(T, Addr);
4032 _mfence();
4033 return;
4034 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004035 auto *Store = InstStore::create(Func, Value, Ptr);
John Porto7e93c622015-06-23 10:58:57 -07004036 lowerStore(Store);
4037 _mfence();
4038 return;
4039 }
4040 case Intrinsics::Bswap: {
4041 Variable *Dest = Instr->getDest();
4042 Operand *Val = Instr->getArg(0);
Andrew Scull57e12682015-09-16 11:30:19 -07004043 // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
4044 // must be a register. Use rotate left for 16-bit bswap.
John Porto1d235422015-08-12 12:37:53 -07004045 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004046 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07004047 Variable *T_Lo = legalizeToReg(loOperand(Val));
4048 Variable *T_Hi = legalizeToReg(hiOperand(Val));
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004049 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4050 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004051 _bswap(T_Lo);
4052 _bswap(T_Hi);
4053 _mov(DestLo, T_Hi);
4054 _mov(DestHi, T_Lo);
John Porto1d235422015-08-12 12:37:53 -07004055 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
4056 Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004057 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07004058 _bswap(T);
4059 _mov(Dest, T);
4060 } else {
4061 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07004062 Constant *Eight = Ctx->getConstantInt16(8);
4063 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07004064 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07004065 _mov(T, Val);
4066 _rol(T, Eight);
4067 _mov(Dest, T);
4068 }
4069 return;
4070 }
4071 case Intrinsics::Ctpop: {
4072 Variable *Dest = Instr->getDest();
John Porto1d235422015-08-12 12:37:53 -07004073 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07004074 Operand *Val = Instr->getArg(0);
John Porto1d235422015-08-12 12:37:53 -07004075 Type ValTy = Val->getType();
4076 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
4077
4078 if (!Traits::Is64Bit) {
4079 T = Dest;
4080 } else {
4081 T = makeReg(IceType_i64);
4082 if (ValTy == IceType_i32) {
4083 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
4084 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
4085 // ensure we will not have any bits set on Val's upper 32 bits.
4086 Variable *V = makeReg(IceType_i64);
4087 _movzx(V, Val);
4088 Val = V;
4089 }
4090 ValTy = IceType_i64;
4091 }
4092
Karl Schimpf20070e82016-03-17 13:30:13 -07004093 InstCall *Call =
4094 makeHelperCall(ValTy == IceType_i32 ? RuntimeHelper::H_call_ctpop_i32
4095 : RuntimeHelper::H_call_ctpop_i64,
4096 T, 1);
John Porto7e93c622015-06-23 10:58:57 -07004097 Call->addArg(Val);
4098 lowerCall(Call);
4099 // The popcount helpers always return 32-bit values, while the intrinsic's
4100 // signature matches the native POPCNT instruction and fills a 64-bit reg
4101 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
4102 // the user doesn't do that in the IR. If the user does that in the IR,
4103 // then this zero'ing instruction is dead and gets optimized out.
John Porto1d235422015-08-12 12:37:53 -07004104 if (!Traits::Is64Bit) {
4105 assert(T == Dest);
4106 if (Val->getType() == IceType_i64) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004107 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto1d235422015-08-12 12:37:53 -07004108 Constant *Zero = Ctx->getConstantZero(IceType_i32);
4109 _mov(DestHi, Zero);
4110 }
4111 } else {
4112 assert(Val->getType() == IceType_i64);
4113 // T is 64 bit. It needs to be copied to dest. We need to:
4114 //
4115 // T_1.32 = trunc T.64 to i32
4116 // T_2.64 = zext T_1.32 to i64
4117 // Dest.<<right_size>> = T_2.<<right_size>>
4118 //
4119 // which ensures the upper 32 bits will always be cleared. Just doing a
4120 //
4121 // mov Dest.32 = trunc T.32 to i32
4122 //
4123 // is dangerous because there's a chance the compiler will optimize this
4124 // copy out. To use _movzx we need two new registers (one 32-, and
4125 // another 64-bit wide.)
4126 Variable *T_1 = makeReg(IceType_i32);
4127 _mov(T_1, T);
4128 Variable *T_2 = makeReg(IceType_i64);
4129 _movzx(T_2, T_1);
4130 _mov(Dest, T_2);
John Porto7e93c622015-06-23 10:58:57 -07004131 }
4132 return;
4133 }
4134 case Intrinsics::Ctlz: {
Andrew Scull57e12682015-09-16 11:30:19 -07004135 // The "is zero undef" parameter is ignored and we always return a
4136 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07004137 Operand *Val = legalize(Instr->getArg(0));
4138 Operand *FirstVal;
4139 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07004140 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004141 FirstVal = loOperand(Val);
4142 SecondVal = hiOperand(Val);
4143 } else {
4144 FirstVal = Val;
4145 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004146 constexpr bool IsCttz = false;
John Porto7e93c622015-06-23 10:58:57 -07004147 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
4148 SecondVal);
4149 return;
4150 }
4151 case Intrinsics::Cttz: {
Andrew Scull57e12682015-09-16 11:30:19 -07004152 // The "is zero undef" parameter is ignored and we always return a
4153 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07004154 Operand *Val = legalize(Instr->getArg(0));
4155 Operand *FirstVal;
4156 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07004157 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004158 FirstVal = hiOperand(Val);
4159 SecondVal = loOperand(Val);
4160 } else {
4161 FirstVal = Val;
4162 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004163 constexpr bool IsCttz = true;
John Porto7e93c622015-06-23 10:58:57 -07004164 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
4165 SecondVal);
4166 return;
4167 }
4168 case Intrinsics::Fabs: {
4169 Operand *Src = legalize(Instr->getArg(0));
4170 Type Ty = Src->getType();
4171 Variable *Dest = Instr->getDest();
4172 Variable *T = makeVectorOfFabsMask(Ty);
Andrew Scull57e12682015-09-16 11:30:19 -07004173 // The pand instruction operates on an m128 memory operand, so if Src is an
4174 // f32 or f64, we need to make sure it's in a register.
John Porto7e93c622015-06-23 10:58:57 -07004175 if (isVectorType(Ty)) {
John Porto4a566862016-01-04 09:33:41 -08004176 if (llvm::isa<X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07004177 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07004178 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004179 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07004180 }
4181 _pand(T, Src);
4182 if (isVectorType(Ty))
4183 _movp(Dest, T);
4184 else
4185 _mov(Dest, T);
4186 return;
4187 }
4188 case Intrinsics::Longjmp: {
Karl Schimpf20070e82016-03-17 13:30:13 -07004189 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_longjmp, nullptr, 2);
John Porto7e93c622015-06-23 10:58:57 -07004190 Call->addArg(Instr->getArg(0));
4191 Call->addArg(Instr->getArg(1));
4192 lowerCall(Call);
4193 return;
4194 }
4195 case Intrinsics::Memcpy: {
Andrew Scull9df4a372015-08-07 09:19:35 -07004196 lowerMemcpy(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004197 return;
4198 }
4199 case Intrinsics::Memmove: {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004200 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004201 return;
4202 }
4203 case Intrinsics::Memset: {
Andrew Scull713dbde2015-08-04 14:25:27 -07004204 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004205 return;
4206 }
4207 case Intrinsics::NaClReadTP: {
John Porto56958cb2016-01-14 09:18:18 -08004208 if (NeedSandboxing) {
John Porto4a566862016-01-04 09:33:41 -08004209 Operand *Src =
4210 dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07004211 Variable *Dest = Instr->getDest();
4212 Variable *T = nullptr;
4213 _mov(T, Src);
4214 _mov(Dest, T);
4215 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07004216 InstCall *Call =
4217 makeHelperCall(RuntimeHelper::H_call_read_tp, Instr->getDest(), 0);
John Porto7e93c622015-06-23 10:58:57 -07004218 lowerCall(Call);
4219 }
4220 return;
4221 }
4222 case Intrinsics::Setjmp: {
Karl Schimpf20070e82016-03-17 13:30:13 -07004223 InstCall *Call =
4224 makeHelperCall(RuntimeHelper::H_call_setjmp, Instr->getDest(), 1);
John Porto7e93c622015-06-23 10:58:57 -07004225 Call->addArg(Instr->getArg(0));
4226 lowerCall(Call);
4227 return;
4228 }
4229 case Intrinsics::Sqrt: {
4230 Operand *Src = legalize(Instr->getArg(0));
4231 Variable *Dest = Instr->getDest();
4232 Variable *T = makeReg(Dest->getType());
4233 _sqrtss(T, Src);
4234 _mov(Dest, T);
4235 return;
4236 }
4237 case Intrinsics::Stacksave: {
John Porto56958cb2016-01-14 09:18:18 -08004238 if (!Traits::Is64Bit || !NeedSandboxing) {
4239 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg(),
4240 Traits::WordType);
4241 Variable *Dest = Instr->getDest();
4242 _mov(Dest, esp);
4243 return;
4244 }
4245 Variable *esp = Func->getTarget()->getPhysicalRegister(
4246 Traits::RegisterSet::Reg_esp, IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07004247 Variable *Dest = Instr->getDest();
4248 _mov(Dest, esp);
John Porto56958cb2016-01-14 09:18:18 -08004249
John Porto7e93c622015-06-23 10:58:57 -07004250 return;
4251 }
4252 case Intrinsics::Stackrestore: {
John Porto008f4ce2015-12-24 13:22:18 -08004253 Operand *Src = Instr->getArg(0);
John Porto56958cb2016-01-14 09:18:18 -08004254 _mov_sp(Src);
John Porto7e93c622015-06-23 10:58:57 -07004255 return;
4256 }
John Porto56958cb2016-01-14 09:18:18 -08004257
John Porto7e93c622015-06-23 10:58:57 -07004258 case Intrinsics::Trap:
4259 _ud2();
4260 return;
4261 case Intrinsics::UnknownIntrinsic:
4262 Func->setError("Should not be lowering UnknownIntrinsic");
4263 return;
4264 }
4265 return;
4266}
4267
John Porto4a566862016-01-04 09:33:41 -08004268template <typename TraitsType>
4269void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev,
4270 Operand *Ptr,
4271 Operand *Expected,
4272 Operand *Desired) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004273 Type Ty = Expected->getType();
4274 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004275 // Reserve the pre-colored registers first, before adding any more
4276 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07004277 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
4278 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
4279 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
4280 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07004281 _mov(T_eax, loOperand(Expected));
4282 _mov(T_edx, hiOperand(Expected));
4283 _mov(T_ebx, loOperand(Desired));
4284 _mov(T_ecx, hiOperand(Desired));
John Porto4a566862016-01-04 09:33:41 -08004285 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004286 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004287 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004288 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
4289 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
John Porto7e93c622015-06-23 10:58:57 -07004290 _mov(DestLo, T_eax);
4291 _mov(DestHi, T_edx);
4292 return;
4293 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -08004294 RegNumT Eax;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004295 switch (Ty) {
4296 default:
John Porto3c275ce2015-12-22 08:14:00 -08004297 llvm::report_fatal_error("Bad type for cmpxchg");
4298 case IceType_i64:
4299 Eax = Traits::getRaxOrDie();
4300 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004301 case IceType_i32:
4302 Eax = Traits::RegisterSet::Reg_eax;
4303 break;
4304 case IceType_i16:
4305 Eax = Traits::RegisterSet::Reg_ax;
4306 break;
4307 case IceType_i8:
4308 Eax = Traits::RegisterSet::Reg_al;
4309 break;
4310 }
4311 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07004312 _mov(T_eax, Expected);
John Porto4a566862016-01-04 09:33:41 -08004313 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Andrew Scull97f460d2015-07-21 10:07:42 -07004314 Variable *DesiredReg = legalizeToReg(Desired);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004315 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004316 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
4317 _mov(DestPrev, T_eax);
4318}
4319
John Porto4a566862016-01-04 09:33:41 -08004320template <typename TraitsType>
4321bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
4322 Operand *PtrToMem,
4323 Operand *Expected,
4324 Operand *Desired) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07004325 if (Func->getOptLevel() == Opt_m1)
John Porto7e93c622015-06-23 10:58:57 -07004326 return false;
4327 // Peek ahead a few instructions and see how Dest is used.
4328 // It's very common to have:
4329 //
4330 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
4331 // [%y_phi = ...] // list of phi stores
4332 // %p = icmp eq i32 %x, %expected
4333 // br i1 %p, label %l1, label %l2
4334 //
4335 // which we can optimize into:
4336 //
4337 // %x = <cmpxchg code>
4338 // [%y_phi = ...] // list of phi stores
4339 // br eq, %l1, %l2
4340 InstList::iterator I = Context.getCur();
4341 // I is currently the InstIntrinsicCall. Peek past that.
4342 // This assumes that the atomic cmpxchg has not been lowered yet,
4343 // so that the instructions seen in the scan from "Cur" is simple.
4344 assert(llvm::isa<InstIntrinsicCall>(*I));
4345 Inst *NextInst = Context.getNextInst(I);
4346 if (!NextInst)
4347 return false;
4348 // There might be phi assignments right before the compare+branch, since this
4349 // could be a backward branch for a loop. This placement of assignments is
4350 // determined by placePhiStores().
John Portoe82b5602016-02-24 15:58:55 -08004351 CfgVector<InstAssign *> PhiAssigns;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004352 while (auto *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004353 if (PhiAssign->getDest() == Dest)
4354 return false;
4355 PhiAssigns.push_back(PhiAssign);
4356 NextInst = Context.getNextInst(I);
4357 if (!NextInst)
4358 return false;
4359 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004360 if (auto *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004361 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
4362 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
4363 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
4364 return false;
4365 }
4366 NextInst = Context.getNextInst(I);
4367 if (!NextInst)
4368 return false;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004369 if (auto *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004370 if (!NextBr->isUnconditional() &&
4371 NextCmp->getDest() == NextBr->getCondition() &&
4372 NextBr->isLastUse(NextCmp->getDest())) {
4373 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
4374 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
4375 // Lower the phi assignments now, before the branch (same placement
4376 // as before).
4377 InstAssign *PhiAssign = PhiAssigns[i];
4378 PhiAssign->setDeleted();
4379 lowerAssign(PhiAssign);
4380 Context.advanceNext();
4381 }
John Porto5d0acff2015-06-30 15:29:21 -07004382 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
4383 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07004384 // Skip over the old compare and branch, by deleting them.
4385 NextCmp->setDeleted();
4386 NextBr->setDeleted();
4387 Context.advanceNext();
4388 Context.advanceNext();
4389 return true;
4390 }
4391 }
4392 }
4393 return false;
4394}
4395
John Porto4a566862016-01-04 09:33:41 -08004396template <typename TraitsType>
4397void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest,
4398 uint32_t Operation, Operand *Ptr,
4399 Operand *Val) {
John Porto7e93c622015-06-23 10:58:57 -07004400 bool NeedsCmpxchg = false;
4401 LowerBinOp Op_Lo = nullptr;
4402 LowerBinOp Op_Hi = nullptr;
4403 switch (Operation) {
4404 default:
4405 Func->setError("Unknown AtomicRMW operation");
4406 return;
4407 case Intrinsics::AtomicAdd: {
John Porto1d235422015-08-12 12:37:53 -07004408 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004409 // All the fall-through paths must set this to true, but use this
4410 // for asserting.
4411 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004412 Op_Lo = &TargetX86Base<TraitsType>::_add;
4413 Op_Hi = &TargetX86Base<TraitsType>::_adc;
John Porto7e93c622015-06-23 10:58:57 -07004414 break;
4415 }
John Porto4a566862016-01-04 09:33:41 -08004416 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004417 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004418 Variable *T = nullptr;
4419 _mov(T, Val);
4420 _xadd(Addr, T, Locked);
4421 _mov(Dest, T);
4422 return;
4423 }
4424 case Intrinsics::AtomicSub: {
John Porto1d235422015-08-12 12:37:53 -07004425 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004426 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004427 Op_Lo = &TargetX86Base<TraitsType>::_sub;
4428 Op_Hi = &TargetX86Base<TraitsType>::_sbb;
John Porto7e93c622015-06-23 10:58:57 -07004429 break;
4430 }
John Porto4a566862016-01-04 09:33:41 -08004431 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004432 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004433 Variable *T = nullptr;
4434 _mov(T, Val);
4435 _neg(T);
4436 _xadd(Addr, T, Locked);
4437 _mov(Dest, T);
4438 return;
4439 }
4440 case Intrinsics::AtomicOr:
4441 // TODO(jvoung): If Dest is null or dead, then some of these
4442 // operations do not need an "exchange", but just a locked op.
4443 // That appears to be "worth" it for sub, or, and, and xor.
4444 // xadd is probably fine vs lock add for add, and xchg is fine
4445 // vs an atomic store.
4446 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004447 Op_Lo = &TargetX86Base<TraitsType>::_or;
4448 Op_Hi = &TargetX86Base<TraitsType>::_or;
John Porto7e93c622015-06-23 10:58:57 -07004449 break;
4450 case Intrinsics::AtomicAnd:
4451 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004452 Op_Lo = &TargetX86Base<TraitsType>::_and;
4453 Op_Hi = &TargetX86Base<TraitsType>::_and;
John Porto7e93c622015-06-23 10:58:57 -07004454 break;
4455 case Intrinsics::AtomicXor:
4456 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004457 Op_Lo = &TargetX86Base<TraitsType>::_xor;
4458 Op_Hi = &TargetX86Base<TraitsType>::_xor;
John Porto7e93c622015-06-23 10:58:57 -07004459 break;
4460 case Intrinsics::AtomicExchange:
John Porto1d235422015-08-12 12:37:53 -07004461 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004462 NeedsCmpxchg = true;
4463 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
4464 // just need to be moved to the ecx and ebx registers.
4465 Op_Lo = nullptr;
4466 Op_Hi = nullptr;
4467 break;
4468 }
John Porto4a566862016-01-04 09:33:41 -08004469 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07004470 Variable *T = nullptr;
4471 _mov(T, Val);
4472 _xchg(Addr, T);
4473 _mov(Dest, T);
4474 return;
4475 }
4476 // Otherwise, we need a cmpxchg loop.
4477 (void)NeedsCmpxchg;
4478 assert(NeedsCmpxchg);
4479 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
4480}
4481
John Porto4a566862016-01-04 09:33:41 -08004482template <typename TraitsType>
4483void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
4484 LowerBinOp Op_Hi,
4485 Variable *Dest,
4486 Operand *Ptr,
4487 Operand *Val) {
John Porto7e93c622015-06-23 10:58:57 -07004488 // Expand a more complex RMW operation as a cmpxchg loop:
4489 // For 64-bit:
4490 // mov eax, [ptr]
4491 // mov edx, [ptr + 4]
4492 // .LABEL:
4493 // mov ebx, eax
4494 // <Op_Lo> ebx, <desired_adj_lo>
4495 // mov ecx, edx
4496 // <Op_Hi> ecx, <desired_adj_hi>
4497 // lock cmpxchg8b [ptr]
4498 // jne .LABEL
4499 // mov <dest_lo>, eax
4500 // mov <dest_lo>, edx
4501 //
4502 // For 32-bit:
4503 // mov eax, [ptr]
4504 // .LABEL:
4505 // mov <reg>, eax
4506 // op <reg>, [desired_adj]
4507 // lock cmpxchg [ptr], <reg>
4508 // jne .LABEL
4509 // mov <dest>, eax
4510 //
4511 // If Op_{Lo,Hi} are nullptr, then just copy the value.
4512 Val = legalize(Val);
4513 Type Ty = Val->getType();
John Porto1d235422015-08-12 12:37:53 -07004514 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07004515 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
4516 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto4a566862016-01-04 09:33:41 -08004517 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07004518 _mov(T_eax, loOperand(Addr));
4519 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07004520 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
4521 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto4a566862016-01-04 09:33:41 -08004522 InstX86Label *Label = InstX86Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004523 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
4524 if (!IsXchg8b) {
4525 Context.insert(Label);
4526 _mov(T_ebx, T_eax);
4527 (this->*Op_Lo)(T_ebx, loOperand(Val));
4528 _mov(T_ecx, T_edx);
4529 (this->*Op_Hi)(T_ecx, hiOperand(Val));
4530 } else {
4531 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
4532 // It just needs the Val loaded into ebx and ecx.
4533 // That can also be done before the loop.
4534 _mov(T_ebx, loOperand(Val));
4535 _mov(T_ecx, hiOperand(Val));
4536 Context.insert(Label);
4537 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004538 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004539 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07004540 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07004541 if (!IsXchg8b) {
4542 // If Val is a variable, model the extended live range of Val through
4543 // the end of the loop, since it will be re-used by the loop.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004544 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
4545 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar));
4546 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
John Porto1d937a82015-12-17 06:19:34 -08004547 Context.insert<InstFakeUse>(ValLo);
4548 Context.insert<InstFakeUse>(ValHi);
John Porto7e93c622015-06-23 10:58:57 -07004549 }
4550 } else {
4551 // For xchg, the loop is slightly smaller and ebx/ecx are used.
John Porto1d937a82015-12-17 06:19:34 -08004552 Context.insert<InstFakeUse>(T_ebx);
4553 Context.insert<InstFakeUse>(T_ecx);
John Porto7e93c622015-06-23 10:58:57 -07004554 }
4555 // The address base (if any) is also reused in the loop.
4556 if (Variable *Base = Addr->getBase())
John Porto1d937a82015-12-17 06:19:34 -08004557 Context.insert<InstFakeUse>(Base);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004558 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4559 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004560 _mov(DestLo, T_eax);
4561 _mov(DestHi, T_edx);
4562 return;
4563 }
John Porto4a566862016-01-04 09:33:41 -08004564 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08004565 RegNumT Eax;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004566 switch (Ty) {
4567 default:
John Porto3c275ce2015-12-22 08:14:00 -08004568 llvm::report_fatal_error("Bad type for atomicRMW");
4569 case IceType_i64:
4570 Eax = Traits::getRaxOrDie();
4571 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004572 case IceType_i32:
4573 Eax = Traits::RegisterSet::Reg_eax;
4574 break;
4575 case IceType_i16:
4576 Eax = Traits::RegisterSet::Reg_ax;
4577 break;
4578 case IceType_i8:
4579 Eax = Traits::RegisterSet::Reg_al;
4580 break;
4581 }
4582 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07004583 _mov(T_eax, Addr);
John Porto4a566862016-01-04 09:33:41 -08004584 auto *Label = Context.insert<InstX86Label>(this);
John Porto7e93c622015-06-23 10:58:57 -07004585 // We want to pick a different register for T than Eax, so don't use
4586 // _mov(T == nullptr, T_eax).
4587 Variable *T = makeReg(Ty);
4588 _mov(T, T_eax);
4589 (this->*Op_Lo)(T, Val);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004590 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004591 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07004592 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07004593 // If Val is a variable, model the extended live range of Val through
4594 // the end of the loop, since it will be re-used by the loop.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004595 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
John Porto1d937a82015-12-17 06:19:34 -08004596 Context.insert<InstFakeUse>(ValVar);
John Porto7e93c622015-06-23 10:58:57 -07004597 }
4598 // The address base (if any) is also reused in the loop.
4599 if (Variable *Base = Addr->getBase())
John Porto1d937a82015-12-17 06:19:34 -08004600 Context.insert<InstFakeUse>(Base);
John Porto7e93c622015-06-23 10:58:57 -07004601 _mov(Dest, T_eax);
4602}
4603
Andrew Scull9612d322015-07-06 14:53:25 -07004604/// Lowers count {trailing, leading} zeros intrinsic.
4605///
4606/// We could do constant folding here, but that should have
4607/// been done by the front-end/middle-end optimizations.
John Porto4a566862016-01-04 09:33:41 -08004608template <typename TraitsType>
4609void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty,
4610 Variable *Dest,
4611 Operand *FirstVal,
4612 Operand *SecondVal) {
John Porto7e93c622015-06-23 10:58:57 -07004613 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
4614 // Then the instructions will handle the Val == 0 case much more simply
4615 // and won't require conversion from bit position to number of zeros.
4616 //
4617 // Otherwise:
4618 // bsr IF_NOT_ZERO, Val
John Porto34d276a2016-01-27 06:31:53 -08004619 // mov T_DEST, ((Ty == i32) ? 63 : 127)
John Porto7e93c622015-06-23 10:58:57 -07004620 // cmovne T_DEST, IF_NOT_ZERO
John Porto34d276a2016-01-27 06:31:53 -08004621 // xor T_DEST, ((Ty == i32) ? 31 : 63)
John Porto7e93c622015-06-23 10:58:57 -07004622 // mov DEST, T_DEST
4623 //
4624 // NOTE: T_DEST must be a register because cmov requires its dest to be a
4625 // register. Also, bsf and bsr require their dest to be a register.
4626 //
John Porto34d276a2016-01-27 06:31:53 -08004627 // The xor DEST, C(31|63) converts a bit position to # of leading zeroes.
John Porto7e93c622015-06-23 10:58:57 -07004628 // E.g., for 000... 00001100, bsr will say that the most significant bit
4629 // set is at position 3, while the number of leading zeros is 28. Xor is
John Porto34d276a2016-01-27 06:31:53 -08004630 // like (M - N) for N <= M, and converts 63 to 32, and 127 to 64 (for the
4631 // all-zeros case).
John Porto7e93c622015-06-23 10:58:57 -07004632 //
John Porto34d276a2016-01-27 06:31:53 -08004633 // X8632 only: Similar for 64-bit, but start w/ speculating that the upper 32
4634 // bits are all zero, and compute the result for that case (checking the
4635 // lower 32 bits). Then actually compute the result for the upper bits and
John Porto7e93c622015-06-23 10:58:57 -07004636 // cmov in the result from the lower computation if the earlier speculation
4637 // was correct.
4638 //
4639 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
4640 // bit position conversion, and the speculation is reversed.
John Porto34d276a2016-01-27 06:31:53 -08004641
4642 // TODO(jpp): refactor this method.
John Porto7e93c622015-06-23 10:58:57 -07004643 assert(Ty == IceType_i32 || Ty == IceType_i64);
John Porto3c275ce2015-12-22 08:14:00 -08004644 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32;
4645 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07004646 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
4647 if (Cttz) {
4648 _bsf(T, FirstValRM);
4649 } else {
4650 _bsr(T, FirstValRM);
4651 }
John Porto3c275ce2015-12-22 08:14:00 -08004652 Variable *T_Dest = makeReg(DestTy);
4653 Constant *_31 = Ctx->getConstantInt32(31);
4654 Constant *_32 = Ctx->getConstantInt(DestTy, 32);
John Porto34d276a2016-01-27 06:31:53 -08004655 Constant *_63 = Ctx->getConstantInt(DestTy, 63);
4656 Constant *_64 = Ctx->getConstantInt(DestTy, 64);
John Porto7e93c622015-06-23 10:58:57 -07004657 if (Cttz) {
John Porto34d276a2016-01-27 06:31:53 -08004658 if (DestTy == IceType_i64) {
4659 _mov(T_Dest, _64);
4660 } else {
4661 _mov(T_Dest, _32);
4662 }
John Porto7e93c622015-06-23 10:58:57 -07004663 } else {
John Porto34d276a2016-01-27 06:31:53 -08004664 Constant *_127 = Ctx->getConstantInt(DestTy, 127);
4665 if (DestTy == IceType_i64) {
4666 _mov(T_Dest, _127);
4667 } else {
4668 _mov(T_Dest, _63);
4669 }
John Porto7e93c622015-06-23 10:58:57 -07004670 }
John Porto5d0acff2015-06-30 15:29:21 -07004671 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07004672 if (!Cttz) {
John Porto34d276a2016-01-27 06:31:53 -08004673 if (DestTy == IceType_i64) {
4674 // Even though there's a _63 available at this point, that constant might
4675 // not be an i32, which will cause the xor emission to fail.
4676 Constant *_63 = Ctx->getConstantInt32(63);
4677 _xor(T_Dest, _63);
4678 } else {
4679 _xor(T_Dest, _31);
4680 }
John Porto7e93c622015-06-23 10:58:57 -07004681 }
John Porto1d235422015-08-12 12:37:53 -07004682 if (Traits::Is64Bit || Ty == IceType_i32) {
John Porto7e93c622015-06-23 10:58:57 -07004683 _mov(Dest, T_Dest);
4684 return;
4685 }
John Porto3c275ce2015-12-22 08:14:00 -08004686 _add(T_Dest, _32);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004687 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4688 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004689 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07004690 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07004691 Variable *T_Dest2 = makeReg(IceType_i32);
4692 if (Cttz) {
4693 _bsf(T_Dest2, SecondVar);
4694 } else {
4695 _bsr(T_Dest2, SecondVar);
John Porto3c275ce2015-12-22 08:14:00 -08004696 _xor(T_Dest2, _31);
John Porto7e93c622015-06-23 10:58:57 -07004697 }
4698 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07004699 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07004700 _mov(DestLo, T_Dest2);
4701 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4702}
4703
John Porto4a566862016-01-04 09:33:41 -08004704template <typename TraitsType>
4705void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,
4706 Variable *Base, Constant *Offset) {
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08004707 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4708 // legalize Mem properly.
4709 if (Offset)
4710 assert(!llvm::isa<ConstantRelocatable>(Offset));
4711
John Porto4a566862016-01-04 09:33:41 -08004712 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004713
4714 if (isVectorType(Ty))
4715 _movp(Dest, Mem);
4716 else if (Ty == IceType_f64)
4717 _movq(Dest, Mem);
4718 else
4719 _mov(Dest, Mem);
4720}
4721
John Porto4a566862016-01-04 09:33:41 -08004722template <typename TraitsType>
4723void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value,
4724 Variable *Base, Constant *Offset) {
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08004725 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4726 // legalize Mem properly.
4727 if (Offset)
4728 assert(!llvm::isa<ConstantRelocatable>(Offset));
4729
John Porto4a566862016-01-04 09:33:41 -08004730 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004731
4732 if (isVectorType(Ty))
4733 _storep(Value, Mem);
4734 else if (Ty == IceType_f64)
4735 _storeq(Value, Mem);
4736 else
4737 _store(Value, Mem);
4738}
4739
John Porto4a566862016-01-04 09:33:41 -08004740template <typename TraitsType>
4741void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest,
4742 Variable *Src, int32_t OffsetAmt) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004743 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
4744 // TODO(ascull): this or add nullptr test to _movp, _movq
4745 Variable *Data = makeReg(Ty);
4746
4747 typedLoad(Ty, Data, Src, Offset);
4748 typedStore(Ty, Data, Dest, Offset);
4749}
4750
John Porto4a566862016-01-04 09:33:41 -08004751template <typename TraitsType>
4752void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src,
4753 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004754 // There is a load and store for each chunk in the unroll
Andrew Scull9df4a372015-08-07 09:19:35 -07004755 constexpr uint32_t BytesPerStorep = 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07004756
4757 // Check if the operands are constants
4758 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4759 const bool IsCountConst = CountConst != nullptr;
4760 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4761
Andrew Scullcfa628b2015-08-20 14:23:05 -07004762 if (shouldOptimizeMemIntrins() && IsCountConst &&
4763 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004764 // Unlikely, but nothing to do if it does happen
4765 if (CountValue == 0)
4766 return;
4767
4768 Variable *SrcBase = legalizeToReg(Src);
4769 Variable *DestBase = legalizeToReg(Dest);
4770
Andrew Scullcfa628b2015-08-20 14:23:05 -07004771 // Find the largest type that can be used and use it as much as possible in
4772 // reverse order. Then handle any remainder with overlapping copies. Since
4773 // the remainder will be at the end, there will be reduced pressure on the
4774 // memory unit as the accesses to the same memory are far apart.
4775 Type Ty = largestTypeInSize(CountValue);
4776 uint32_t TyWidth = typeWidthInBytes(Ty);
Andrew Scull9df4a372015-08-07 09:19:35 -07004777
Andrew Scullcfa628b2015-08-20 14:23:05 -07004778 uint32_t RemainingBytes = CountValue;
4779 int32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
4780 while (RemainingBytes >= TyWidth) {
4781 copyMemory(Ty, DestBase, SrcBase, Offset);
4782 RemainingBytes -= TyWidth;
4783 Offset -= TyWidth;
Andrew Scull9df4a372015-08-07 09:19:35 -07004784 }
4785
Andrew Scullcfa628b2015-08-20 14:23:05 -07004786 if (RemainingBytes == 0)
Andrew Scull9df4a372015-08-07 09:19:35 -07004787 return;
Andrew Scull9df4a372015-08-07 09:19:35 -07004788
Andrew Scullcfa628b2015-08-20 14:23:05 -07004789 // Lower the remaining bytes. Adjust to larger types in order to make use
4790 // of overlaps in the copies.
4791 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
4792 Offset = CountValue - typeWidthInBytes(LeftOverTy);
4793 copyMemory(LeftOverTy, DestBase, SrcBase, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07004794 return;
4795 }
4796
4797 // Fall back on a function call
Karl Schimpf20070e82016-03-17 13:30:13 -07004798 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memcpy, nullptr, 3);
Andrew Scull9df4a372015-08-07 09:19:35 -07004799 Call->addArg(Dest);
4800 Call->addArg(Src);
4801 Call->addArg(Count);
4802 lowerCall(Call);
4803}
4804
John Porto4a566862016-01-04 09:33:41 -08004805template <typename TraitsType>
4806void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src,
4807 Operand *Count) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004808 // There is a load and store for each chunk in the unroll
4809 constexpr uint32_t BytesPerStorep = 16;
4810
4811 // Check if the operands are constants
4812 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4813 const bool IsCountConst = CountConst != nullptr;
4814 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4815
4816 if (shouldOptimizeMemIntrins() && IsCountConst &&
4817 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) {
4818 // Unlikely, but nothing to do if it does happen
4819 if (CountValue == 0)
4820 return;
4821
4822 Variable *SrcBase = legalizeToReg(Src);
4823 Variable *DestBase = legalizeToReg(Dest);
4824
4825 std::tuple<Type, Constant *, Variable *>
4826 Moves[Traits::MEMMOVE_UNROLL_LIMIT];
4827 Constant *Offset;
4828 Variable *Reg;
4829
4830 // Copy the data into registers as the source and destination could overlap
Andrew Scull57e12682015-09-16 11:30:19 -07004831 // so make sure not to clobber the memory. This also means overlapping
4832 // moves can be used as we are taking a safe snapshot of the memory.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004833 Type Ty = largestTypeInSize(CountValue);
4834 uint32_t TyWidth = typeWidthInBytes(Ty);
4835
4836 uint32_t RemainingBytes = CountValue;
4837 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;
4838 size_t N = 0;
4839 while (RemainingBytes >= TyWidth) {
4840 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4841 Offset = Ctx->getConstantInt32(OffsetAmt);
4842 Reg = makeReg(Ty);
4843 typedLoad(Ty, Reg, SrcBase, Offset);
4844 RemainingBytes -= TyWidth;
4845 OffsetAmt -= TyWidth;
4846 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4847 }
4848
4849 if (RemainingBytes != 0) {
4850 // Lower the remaining bytes. Adjust to larger types in order to make use
4851 // of overlaps in the copies.
4852 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4853 Ty = firstTypeThatFitsSize(RemainingBytes);
4854 Offset = Ctx->getConstantInt32(CountValue - typeWidthInBytes(Ty));
4855 Reg = makeReg(Ty);
4856 typedLoad(Ty, Reg, SrcBase, Offset);
4857 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4858 }
4859
4860 // Copy the data out into the destination memory
4861 for (size_t i = 0; i < N; ++i) {
4862 std::tie(Ty, Offset, Reg) = Moves[i];
4863 typedStore(Ty, Reg, DestBase, Offset);
4864 }
4865
4866 return;
4867 }
4868
4869 // Fall back on a function call
Karl Schimpf20070e82016-03-17 13:30:13 -07004870 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memmove, nullptr, 3);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004871 Call->addArg(Dest);
4872 Call->addArg(Src);
4873 Call->addArg(Count);
4874 lowerCall(Call);
4875}
4876
John Porto4a566862016-01-04 09:33:41 -08004877template <typename TraitsType>
4878void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val,
4879 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004880 constexpr uint32_t BytesPerStorep = 16;
4881 constexpr uint32_t BytesPerStoreq = 8;
4882 constexpr uint32_t BytesPerStorei32 = 4;
Andrew Scull713dbde2015-08-04 14:25:27 -07004883 assert(Val->getType() == IceType_i8);
4884
4885 // Check if the operands are constants
4886 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4887 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
4888 const bool IsCountConst = CountConst != nullptr;
4889 const bool IsValConst = ValConst != nullptr;
4890 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4891 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
4892
4893 // Unlikely, but nothing to do if it does happen
4894 if (IsCountConst && CountValue == 0)
4895 return;
4896
4897 // TODO(ascull): if the count is constant but val is not it would be possible
4898 // to inline by spreading the value across 4 bytes and accessing subregs e.g.
4899 // eax, ax and al.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004900 if (shouldOptimizeMemIntrins() && IsCountConst && IsValConst) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004901 Variable *Base = nullptr;
Andrew Scullcfa628b2015-08-20 14:23:05 -07004902 Variable *VecReg = nullptr;
Andrew Scull9df4a372015-08-07 09:19:35 -07004903 const uint32_t SpreadValue =
4904 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
Andrew Scull713dbde2015-08-04 14:25:27 -07004905
Andrew Scull9df4a372015-08-07 09:19:35 -07004906 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
Jim Stichnoth992f91d2015-08-10 11:18:38 -07004907 uint32_t OffsetAmt) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004908 assert(Base != nullptr);
4909 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07004910
Andrew Scull9df4a372015-08-07 09:19:35 -07004911 // TODO(ascull): is 64-bit better with vector or scalar movq?
John Porto4a566862016-01-04 09:33:41 -08004912 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07004913 if (isVectorType(Ty)) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004914 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004915 _storep(VecReg, Mem);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004916 } else if (Ty == IceType_f64) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004917 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004918 _storeq(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07004919 } else {
John Porto3c275ce2015-12-22 08:14:00 -08004920 assert(Ty != IceType_i64);
Andrew Scull9df4a372015-08-07 09:19:35 -07004921 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
Andrew Scull713dbde2015-08-04 14:25:27 -07004922 }
4923 };
4924
Andrew Scullcfa628b2015-08-20 14:23:05 -07004925 // Find the largest type that can be used and use it as much as possible in
4926 // reverse order. Then handle any remainder with overlapping copies. Since
4927 // the remainder will be at the end, there will be reduces pressure on the
4928 // memory unit as the access to the same memory are far apart.
4929 Type Ty;
Andrew Scull9df4a372015-08-07 09:19:35 -07004930 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
Andrew Scullcfa628b2015-08-20 14:23:05 -07004931 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
4932 // When the value is zero it can be loaded into a vector register cheaply
4933 // using the xor trick.
Andrew Scull9df4a372015-08-07 09:19:35 -07004934 Base = legalizeToReg(Dest);
4935 VecReg = makeVectorOfZeros(IceType_v16i8);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004936 Ty = largestTypeInSize(CountValue);
4937 } else if (CountValue <= BytesPerStorei32 * Traits::MEMCPY_UNROLL_LIMIT) {
4938 // When the value is non-zero or the count is small we can't use vector
4939 // instructions so are limited to 32-bit stores.
4940 Base = legalizeToReg(Dest);
4941 constexpr uint32_t MaxSize = 4;
4942 Ty = largestTypeInSize(CountValue, MaxSize);
Andrew Scull713dbde2015-08-04 14:25:27 -07004943 }
4944
Andrew Scullcfa628b2015-08-20 14:23:05 -07004945 if (Base) {
4946 uint32_t TyWidth = typeWidthInBytes(Ty);
4947
4948 uint32_t RemainingBytes = CountValue;
4949 uint32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
4950 while (RemainingBytes >= TyWidth) {
4951 lowerSet(Ty, Offset);
4952 RemainingBytes -= TyWidth;
4953 Offset -= TyWidth;
Andrew Scull713dbde2015-08-04 14:25:27 -07004954 }
Andrew Scull9df4a372015-08-07 09:19:35 -07004955
Andrew Scullcfa628b2015-08-20 14:23:05 -07004956 if (RemainingBytes == 0)
4957 return;
4958
4959 // Lower the remaining bytes. Adjust to larger types in order to make use
4960 // of overlaps in the copies.
4961 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
4962 Offset = CountValue - typeWidthInBytes(LeftOverTy);
4963 lowerSet(LeftOverTy, Offset);
Andrew Scull713dbde2015-08-04 14:25:27 -07004964 return;
4965 }
4966 }
4967
4968 // Fall back on calling the memset function. The value operand needs to be
4969 // extended to a stack slot size because the PNaCl ABI requires arguments to
4970 // be at least 32 bits wide.
4971 Operand *ValExt;
4972 if (IsValConst) {
4973 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
4974 } else {
4975 Variable *ValExtVar = Func->makeVariable(stackSlotType());
4976 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
4977 ValExt = ValExtVar;
4978 }
Karl Schimpf20070e82016-03-17 13:30:13 -07004979 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memset, nullptr, 3);
Andrew Scull713dbde2015-08-04 14:25:27 -07004980 Call->addArg(Dest);
4981 Call->addArg(ValExt);
4982 Call->addArg(Count);
4983 lowerCall(Call);
4984}
4985
John Portoac2388c2016-01-22 07:10:56 -08004986class AddressOptimizer {
4987 AddressOptimizer() = delete;
4988 AddressOptimizer(const AddressOptimizer &) = delete;
4989 AddressOptimizer &operator=(const AddressOptimizer &) = delete;
John Porto7e93c622015-06-23 10:58:57 -07004990
John Portoac2388c2016-01-22 07:10:56 -08004991public:
4992 explicit AddressOptimizer(const Cfg *Func)
4993 : Func(Func), VMetadata(Func->getVMetadata()) {}
4994
4995 inline void dumpAddressOpt(const ConstantRelocatable *const Relocatable,
4996 int32_t Offset, const Variable *Base,
4997 const Variable *Index, uint16_t Shift,
4998 const Inst *Reason) const;
4999
5000 inline const Inst *matchAssign(Variable **Var,
5001 ConstantRelocatable **Relocatable,
5002 int32_t *Offset);
5003
5004 inline const Inst *matchCombinedBaseIndex(Variable **Base, Variable **Index,
5005 uint16_t *Shift);
5006
5007 inline const Inst *matchShiftedIndex(Variable **Index, uint16_t *Shift);
5008
5009 inline const Inst *matchOffsetBase(Variable **Base,
5010 ConstantRelocatable **Relocatable,
5011 int32_t *Offset);
5012
5013private:
5014 const Cfg *const Func;
5015 const VariablesMetadata *const VMetadata;
5016
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005017 static bool isAdd(const Inst *Instr) {
5018 if (auto *Arith = llvm::dyn_cast_or_null<const InstArithmetic>(Instr)) {
John Portoac2388c2016-01-22 07:10:56 -08005019 return (Arith->getOp() == InstArithmetic::Add);
5020 }
5021 return false;
5022 }
5023};
5024
5025void AddressOptimizer::dumpAddressOpt(
5026 const ConstantRelocatable *const Relocatable, int32_t Offset,
5027 const Variable *Base, const Variable *Index, uint16_t Shift,
5028 const Inst *Reason) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005029 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005030 return;
5031 if (!Func->isVerbose(IceV_AddrOpt))
5032 return;
5033 OstreamLocker L(Func->getContext());
5034 Ostream &Str = Func->getContext()->getStrDump();
5035 Str << "Instruction: ";
5036 Reason->dumpDecorated(Func);
5037 Str << " results in Base=";
5038 if (Base)
5039 Base->dump(Func);
5040 else
5041 Str << "<null>";
5042 Str << ", Index=";
5043 if (Index)
5044 Index->dump(Func);
5045 else
5046 Str << "<null>";
David Sehraa0b1a12015-10-27 16:55:40 -07005047 Str << ", Shift=" << Shift << ", Offset=" << Offset
5048 << ", Relocatable=" << Relocatable << "\n";
John Porto7e93c622015-06-23 10:58:57 -07005049}
5050
John Portoac2388c2016-01-22 07:10:56 -08005051const Inst *AddressOptimizer::matchAssign(Variable **Var,
5052 ConstantRelocatable **Relocatable,
5053 int32_t *Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07005054 // Var originates from Var=SrcVar ==> set Var:=SrcVar
John Portoac2388c2016-01-22 07:10:56 -08005055 if (*Var == nullptr)
5056 return nullptr;
5057 if (const Inst *VarAssign = VMetadata->getSingleDefinition(*Var)) {
5058 assert(!VMetadata->isMultiDef(*Var));
John Porto7e93c622015-06-23 10:58:57 -07005059 if (llvm::isa<InstAssign>(VarAssign)) {
5060 Operand *SrcOp = VarAssign->getSrc(0);
5061 assert(SrcOp);
David Sehraa0b1a12015-10-27 16:55:40 -07005062 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
John Porto7e93c622015-06-23 10:58:57 -07005063 if (!VMetadata->isMultiDef(SrcVar) &&
5064 // TODO: ensure SrcVar stays single-BB
5065 true) {
John Portoac2388c2016-01-22 07:10:56 -08005066 *Var = SrcVar;
5067 return VarAssign;
John Porto7e93c622015-06-23 10:58:57 -07005068 }
David Sehraa0b1a12015-10-27 16:55:40 -07005069 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5070 int32_t MoreOffset = Const->getValue();
John Portoac2388c2016-01-22 07:10:56 -08005071 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5072 return nullptr;
5073 *Var = nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005074 Offset += MoreOffset;
John Portoac2388c2016-01-22 07:10:56 -08005075 return VarAssign;
David Sehraa0b1a12015-10-27 16:55:40 -07005076 } else if (auto *AddReloc = llvm::dyn_cast<ConstantRelocatable>(SrcOp)) {
John Portoac2388c2016-01-22 07:10:56 -08005077 if (*Relocatable == nullptr) {
5078 // It is always safe to fold a relocatable through assignment -- the
5079 // assignment frees a slot in the address operand that can be used to
5080 // hold the Sandbox Pointer -- if any.
5081 *Var = nullptr;
5082 *Relocatable = AddReloc;
5083 return VarAssign;
David Sehraa0b1a12015-10-27 16:55:40 -07005084 }
John Porto7e93c622015-06-23 10:58:57 -07005085 }
5086 }
5087 }
John Portoac2388c2016-01-22 07:10:56 -08005088 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005089}
5090
John Portoac2388c2016-01-22 07:10:56 -08005091const Inst *AddressOptimizer::matchCombinedBaseIndex(Variable **Base,
5092 Variable **Index,
5093 uint16_t *Shift) {
John Porto7e93c622015-06-23 10:58:57 -07005094 // Index==nullptr && Base is Base=Var1+Var2 ==>
5095 // set Base=Var1, Index=Var2, Shift=0
John Portoac2388c2016-01-22 07:10:56 -08005096 if (*Base == nullptr)
5097 return nullptr;
5098 if (*Index != nullptr)
5099 return nullptr;
5100 auto *BaseInst = VMetadata->getSingleDefinition(*Base);
John Porto7e93c622015-06-23 10:58:57 -07005101 if (BaseInst == nullptr)
John Portoac2388c2016-01-22 07:10:56 -08005102 return nullptr;
5103 assert(!VMetadata->isMultiDef(*Base));
John Porto7e93c622015-06-23 10:58:57 -07005104 if (BaseInst->getSrcSize() < 2)
John Portoac2388c2016-01-22 07:10:56 -08005105 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005106 if (auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
John Porto7e93c622015-06-23 10:58:57 -07005107 if (VMetadata->isMultiDef(Var1))
John Portoac2388c2016-01-22 07:10:56 -08005108 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005109 if (auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
John Porto7e93c622015-06-23 10:58:57 -07005110 if (VMetadata->isMultiDef(Var2))
John Portoac2388c2016-01-22 07:10:56 -08005111 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005112 if (isAdd(BaseInst) &&
5113 // TODO: ensure Var1 and Var2 stay single-BB
5114 true) {
John Portoac2388c2016-01-22 07:10:56 -08005115 *Base = Var1;
5116 *Index = Var2;
5117 *Shift = 0; // should already have been 0
5118 return BaseInst;
John Porto7e93c622015-06-23 10:58:57 -07005119 }
5120 }
5121 }
John Portoac2388c2016-01-22 07:10:56 -08005122 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005123}
5124
John Portoac2388c2016-01-22 07:10:56 -08005125const Inst *AddressOptimizer::matchShiftedIndex(Variable **Index,
5126 uint16_t *Shift) {
John Porto7e93c622015-06-23 10:58:57 -07005127 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
5128 // Index=Var, Shift+=log2(Const)
John Portoac2388c2016-01-22 07:10:56 -08005129 if (*Index == nullptr)
5130 return nullptr;
5131 auto *IndexInst = VMetadata->getSingleDefinition(*Index);
John Porto7e93c622015-06-23 10:58:57 -07005132 if (IndexInst == nullptr)
John Portoac2388c2016-01-22 07:10:56 -08005133 return nullptr;
5134 assert(!VMetadata->isMultiDef(*Index));
John Porto7e93c622015-06-23 10:58:57 -07005135 if (IndexInst->getSrcSize() < 2)
John Portoac2388c2016-01-22 07:10:56 -08005136 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005137 if (auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst)) {
5138 if (auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
5139 if (auto *Const =
John Porto7e93c622015-06-23 10:58:57 -07005140 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
David Sehraa0b1a12015-10-27 16:55:40 -07005141 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
John Portoac2388c2016-01-22 07:10:56 -08005142 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005143 switch (ArithInst->getOp()) {
5144 default:
John Portoac2388c2016-01-22 07:10:56 -08005145 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005146 case InstArithmetic::Mul: {
5147 uint32_t Mult = Const->getValue();
John Porto7e93c622015-06-23 10:58:57 -07005148 uint32_t LogMult;
5149 switch (Mult) {
5150 case 1:
5151 LogMult = 0;
5152 break;
5153 case 2:
5154 LogMult = 1;
5155 break;
5156 case 4:
5157 LogMult = 2;
5158 break;
5159 case 8:
5160 LogMult = 3;
5161 break;
5162 default:
John Portoac2388c2016-01-22 07:10:56 -08005163 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005164 }
John Portoac2388c2016-01-22 07:10:56 -08005165 if (*Shift + LogMult <= 3) {
5166 *Index = Var;
5167 *Shift += LogMult;
5168 return IndexInst;
John Porto7e93c622015-06-23 10:58:57 -07005169 }
5170 }
David Sehraa0b1a12015-10-27 16:55:40 -07005171 case InstArithmetic::Shl: {
5172 uint32_t ShiftAmount = Const->getValue();
5173 switch (ShiftAmount) {
5174 case 0:
5175 case 1:
5176 case 2:
5177 case 3:
5178 break;
5179 default:
John Portoac2388c2016-01-22 07:10:56 -08005180 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005181 }
John Portoac2388c2016-01-22 07:10:56 -08005182 if (*Shift + ShiftAmount <= 3) {
5183 *Index = Var;
5184 *Shift += ShiftAmount;
5185 return IndexInst;
David Sehraa0b1a12015-10-27 16:55:40 -07005186 }
5187 }
5188 }
John Porto7e93c622015-06-23 10:58:57 -07005189 }
5190 }
5191 }
John Portoac2388c2016-01-22 07:10:56 -08005192 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005193}
5194
John Portoac2388c2016-01-22 07:10:56 -08005195const Inst *AddressOptimizer::matchOffsetBase(Variable **Base,
5196 ConstantRelocatable **Relocatable,
5197 int32_t *Offset) {
John Porto7e93c622015-06-23 10:58:57 -07005198 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5199 // set Base=Var, Offset+=Const
5200 // Base is Base=Var-Const ==>
5201 // set Base=Var, Offset-=Const
John Portoac2388c2016-01-22 07:10:56 -08005202 if (*Base == nullptr) {
5203 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005204 }
John Portoac2388c2016-01-22 07:10:56 -08005205 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
David Sehraa0b1a12015-10-27 16:55:40 -07005206 if (BaseInst == nullptr) {
John Portoac2388c2016-01-22 07:10:56 -08005207 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005208 }
John Portoac2388c2016-01-22 07:10:56 -08005209 assert(!VMetadata->isMultiDef(*Base));
David Sehraa0b1a12015-10-27 16:55:40 -07005210 if (auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
John Porto7e93c622015-06-23 10:58:57 -07005211 if (ArithInst->getOp() != InstArithmetic::Add &&
5212 ArithInst->getOp() != InstArithmetic::Sub)
John Portoac2388c2016-01-22 07:10:56 -08005213 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005214 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
David Sehraa0b1a12015-10-27 16:55:40 -07005215 Operand *Src0 = ArithInst->getSrc(0);
5216 Operand *Src1 = ArithInst->getSrc(1);
5217 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5218 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5219 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5220 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5221 auto *Reloc0 = llvm::dyn_cast<ConstantRelocatable>(Src0);
5222 auto *Reloc1 = llvm::dyn_cast<ConstantRelocatable>(Src1);
5223 Variable *NewBase = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005224 int32_t NewOffset = *Offset;
5225 ConstantRelocatable *NewRelocatable = *Relocatable;
David Sehraa0b1a12015-10-27 16:55:40 -07005226 if (Var0 && Var1)
5227 // TODO(sehr): merge base/index splitting into here.
John Portoac2388c2016-01-22 07:10:56 -08005228 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005229 if (!IsAdd && Var1)
John Portoac2388c2016-01-22 07:10:56 -08005230 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005231 if (Var0)
5232 NewBase = Var0;
5233 else if (Var1)
5234 NewBase = Var1;
5235 // Don't know how to add/subtract two relocatables.
John Portoac2388c2016-01-22 07:10:56 -08005236 if ((*Relocatable && (Reloc0 || Reloc1)) || (Reloc0 && Reloc1))
5237 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005238 // Don't know how to subtract a relocatable.
5239 if (!IsAdd && Reloc1)
John Portoac2388c2016-01-22 07:10:56 -08005240 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005241 // Incorporate ConstantRelocatables.
5242 if (Reloc0)
5243 NewRelocatable = Reloc0;
5244 else if (Reloc1)
5245 NewRelocatable = Reloc1;
5246 // Compute the updated constant offset.
5247 if (Const0) {
John Porto56958cb2016-01-14 09:18:18 -08005248 const int32_t MoreOffset =
5249 IsAdd ? Const0->getValue() : -Const0->getValue();
David Sehraa0b1a12015-10-27 16:55:40 -07005250 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
John Portoac2388c2016-01-22 07:10:56 -08005251 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005252 NewOffset += MoreOffset;
John Porto7e93c622015-06-23 10:58:57 -07005253 }
David Sehraa0b1a12015-10-27 16:55:40 -07005254 if (Const1) {
John Porto56958cb2016-01-14 09:18:18 -08005255 const int32_t MoreOffset =
5256 IsAdd ? Const1->getValue() : -Const1->getValue();
David Sehraa0b1a12015-10-27 16:55:40 -07005257 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
John Portoac2388c2016-01-22 07:10:56 -08005258 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005259 NewOffset += MoreOffset;
5260 }
John Portoac2388c2016-01-22 07:10:56 -08005261 *Base = NewBase;
5262 *Offset = NewOffset;
5263 *Relocatable = NewRelocatable;
5264 return BaseInst;
John Porto7e93c622015-06-23 10:58:57 -07005265 }
John Portoac2388c2016-01-22 07:10:56 -08005266 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005267}
5268
John Portoac2388c2016-01-22 07:10:56 -08005269template <typename TypeTraits>
5270typename TargetX86Base<TypeTraits>::X86OperandMem *
5271TargetX86Base<TypeTraits>::computeAddressOpt(const Inst *Instr, Type MemType,
5272 Operand *Addr) {
John Porto7e93c622015-06-23 10:58:57 -07005273 Func->resetCurrentNode();
5274 if (Func->isVerbose(IceV_AddrOpt)) {
5275 OstreamLocker L(Func->getContext());
5276 Ostream &Str = Func->getContext()->getStrDump();
5277 Str << "\nStarting computeAddressOpt for instruction:\n ";
5278 Instr->dumpDecorated(Func);
5279 }
John Portoac2388c2016-01-22 07:10:56 -08005280
5281 OptAddr NewAddr;
5282 NewAddr.Base = llvm::dyn_cast<Variable>(Addr);
5283 if (NewAddr.Base == nullptr)
5284 return nullptr;
5285
Andrew Scull57e12682015-09-16 11:30:19 -07005286 // If the Base has more than one use or is live across multiple blocks, then
5287 // don't go further. Alternatively (?), never consider a transformation that
5288 // would change a variable that is currently *not* live across basic block
5289 // boundaries into one that *is*.
John Portoac2388c2016-01-22 07:10:56 -08005290 if (Func->getVMetadata()->isMultiBlock(
5291 NewAddr.Base) /* || Base->getUseCount() > 1*/)
5292 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005293
John Portoac2388c2016-01-22 07:10:56 -08005294 AddressOptimizer AddrOpt(Func);
Karl Schimpfd4699942016-04-02 09:55:31 -07005295 const bool MockBounds = getFlags().getMockBoundsCheck();
David Sehraa0b1a12015-10-27 16:55:40 -07005296 const Inst *Reason = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005297 bool AddressWasOptimized = false;
5298 // The following unnamed struct identifies the address mode formation steps
5299 // that could potentially create an invalid memory operand (i.e., no free
5300 // slots for RebasePtr.) We add all those variables to this struct so that we
5301 // can use memset() to reset all members to false.
5302 struct {
5303 bool AssignBase = false;
5304 bool AssignIndex = false;
5305 bool OffsetFromBase = false;
5306 bool OffsetFromIndex = false;
5307 bool CombinedBaseIndex = false;
5308 } Skip;
5309 // This points to the boolean in Skip that represents the last folding
5310 // performed. This is used to disable a pattern match that generated an
5311 // invalid address. Without this, the algorithm would never finish.
5312 bool *SkipLastFolding = nullptr;
5313 // NewAddrCheckpoint is used to rollback the address being formed in case an
5314 // invalid address is formed.
5315 OptAddr NewAddrCheckpoint;
5316 Reason = Instr;
David Sehraa0b1a12015-10-27 16:55:40 -07005317 do {
John Portoac2388c2016-01-22 07:10:56 -08005318 if (SandboxingType != ST_None) {
5319 // When sandboxing, we defer the sandboxing of NewAddr to the Concrete
5320 // Target. If our optimization was overly aggressive, then we simply undo
5321 // what the previous iteration did, and set the previous pattern's skip
5322 // bit to true.
5323 if (!legalizeOptAddrForSandbox(&NewAddr)) {
5324 *SkipLastFolding = true;
5325 SkipLastFolding = nullptr;
5326 NewAddr = NewAddrCheckpoint;
5327 Reason = nullptr;
5328 }
5329 }
5330
David Sehraa0b1a12015-10-27 16:55:40 -07005331 if (Reason) {
John Portoac2388c2016-01-22 07:10:56 -08005332 AddrOpt.dumpAddressOpt(NewAddr.Relocatable, NewAddr.Offset, NewAddr.Base,
5333 NewAddr.Index, NewAddr.Shift, Reason);
David Sehraa0b1a12015-10-27 16:55:40 -07005334 AddressWasOptimized = true;
5335 Reason = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005336 SkipLastFolding = nullptr;
5337 memset(&Skip, 0, sizeof(Skip));
John Porto7e93c622015-06-23 10:58:57 -07005338 }
John Portoac2388c2016-01-22 07:10:56 -08005339
5340 NewAddrCheckpoint = NewAddr;
5341
David Sehraa0b1a12015-10-27 16:55:40 -07005342 // Update Base and Index to follow through assignments to definitions.
John Portoac2388c2016-01-22 07:10:56 -08005343 if (!Skip.AssignBase &&
5344 (Reason = AddrOpt.matchAssign(&NewAddr.Base, &NewAddr.Relocatable,
5345 &NewAddr.Offset))) {
5346 SkipLastFolding = &Skip.AssignBase;
David Sehraa0b1a12015-10-27 16:55:40 -07005347 // Assignments of Base from a Relocatable or ConstantInt32 can result
5348 // in Base becoming nullptr. To avoid code duplication in this loop we
5349 // prefer that Base be non-nullptr if possible.
John Portoac2388c2016-01-22 07:10:56 -08005350 if ((NewAddr.Base == nullptr) && (NewAddr.Index != nullptr) &&
5351 NewAddr.Shift == 0) {
5352 std::swap(NewAddr.Base, NewAddr.Index);
5353 }
David Sehraa0b1a12015-10-27 16:55:40 -07005354 continue;
5355 }
John Portoac2388c2016-01-22 07:10:56 -08005356 if (!Skip.AssignBase &&
5357 (Reason = AddrOpt.matchAssign(&NewAddr.Index, &NewAddr.Relocatable,
5358 &NewAddr.Offset))) {
5359 SkipLastFolding = &Skip.AssignIndex;
David Sehraa0b1a12015-10-27 16:55:40 -07005360 continue;
John Portoac2388c2016-01-22 07:10:56 -08005361 }
John Porto7e93c622015-06-23 10:58:57 -07005362
David Sehraa0b1a12015-10-27 16:55:40 -07005363 if (!MockBounds) {
5364 // Transition from:
5365 // <Relocatable + Offset>(Base) to
5366 // <Relocatable + Offset>(Base, Index)
John Portoac2388c2016-01-22 07:10:56 -08005367 if (!Skip.CombinedBaseIndex &&
5368 (Reason = AddrOpt.matchCombinedBaseIndex(
5369 &NewAddr.Base, &NewAddr.Index, &NewAddr.Shift))) {
5370 SkipLastFolding = &Skip.CombinedBaseIndex;
David Sehraa0b1a12015-10-27 16:55:40 -07005371 continue;
John Portoac2388c2016-01-22 07:10:56 -08005372 }
5373
David Sehraa0b1a12015-10-27 16:55:40 -07005374 // Recognize multiply/shift and update Shift amount.
5375 // Index becomes Index=Var<<Const && Const+Shift<=3 ==>
5376 // Index=Var, Shift+=Const
5377 // Index becomes Index=Const*Var && log2(Const)+Shift<=3 ==>
5378 // Index=Var, Shift+=log2(Const)
John Portoac2388c2016-01-22 07:10:56 -08005379 if ((Reason =
5380 AddrOpt.matchShiftedIndex(&NewAddr.Index, &NewAddr.Shift))) {
David Sehraa0b1a12015-10-27 16:55:40 -07005381 continue;
John Portoac2388c2016-01-22 07:10:56 -08005382 }
5383
David Sehraa0b1a12015-10-27 16:55:40 -07005384 // If Shift is zero, the choice of Base and Index was purely arbitrary.
5385 // Recognize multiply/shift and set Shift amount.
5386 // Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
5387 // swap(Index,Base)
5388 // Similar for Base=Const*Var and Base=Var<<Const
John Portoac2388c2016-01-22 07:10:56 -08005389 if (NewAddr.Shift == 0 &&
5390 (Reason = AddrOpt.matchShiftedIndex(&NewAddr.Base, &NewAddr.Shift))) {
5391 std::swap(NewAddr.Base, NewAddr.Index);
David Sehraa0b1a12015-10-27 16:55:40 -07005392 continue;
5393 }
5394 }
John Portoac2388c2016-01-22 07:10:56 -08005395
David Sehraa0b1a12015-10-27 16:55:40 -07005396 // Update Offset to reflect additions/subtractions with constants and
5397 // relocatables.
John Porto7e93c622015-06-23 10:58:57 -07005398 // TODO: consider overflow issues with respect to Offset.
John Portoac2388c2016-01-22 07:10:56 -08005399 if (!Skip.OffsetFromBase &&
5400 (Reason = AddrOpt.matchOffsetBase(&NewAddr.Base, &NewAddr.Relocatable,
5401 &NewAddr.Offset))) {
5402 SkipLastFolding = &Skip.OffsetFromBase;
David Sehraa0b1a12015-10-27 16:55:40 -07005403 continue;
John Portoac2388c2016-01-22 07:10:56 -08005404 }
5405 if (NewAddr.Shift == 0 && !Skip.OffsetFromIndex &&
5406 (Reason = AddrOpt.matchOffsetBase(&NewAddr.Index, &NewAddr.Relocatable,
5407 &NewAddr.Offset))) {
5408 SkipLastFolding = &Skip.OffsetFromIndex;
David Sehr69e92902015-11-04 14:46:29 -08005409 continue;
John Portoac2388c2016-01-22 07:10:56 -08005410 }
5411
David Sehraa0b1a12015-10-27 16:55:40 -07005412 // TODO(sehr, stichnot): Handle updates of Index with Shift != 0.
5413 // Index is Index=Var+Const ==>
5414 // set Index=Var, Offset+=(Const<<Shift)
5415 // Index is Index=Const+Var ==>
5416 // set Index=Var, Offset+=(Const<<Shift)
5417 // Index is Index=Var-Const ==>
5418 // set Index=Var, Offset-=(Const<<Shift)
5419 break;
5420 } while (Reason);
John Portoac2388c2016-01-22 07:10:56 -08005421
5422 if (!AddressWasOptimized) {
5423 return nullptr;
5424 }
5425
5426 // Undo any addition of RebasePtr. It will be added back when the mem
5427 // operand is sandboxed.
5428 if (NewAddr.Base == RebasePtr) {
5429 NewAddr.Base = nullptr;
5430 }
5431
5432 if (NewAddr.Index == RebasePtr) {
5433 NewAddr.Index = nullptr;
5434 NewAddr.Shift = 0;
5435 }
5436
5437 Constant *OffsetOp = nullptr;
5438 if (NewAddr.Relocatable == nullptr) {
5439 OffsetOp = Ctx->getConstantInt32(NewAddr.Offset);
5440 } else {
5441 OffsetOp =
5442 Ctx->getConstantSym(NewAddr.Relocatable->getOffset() + NewAddr.Offset,
Jim Stichnoth98ba0062016-03-07 09:26:22 -08005443 NewAddr.Relocatable->getName());
John Portoac2388c2016-01-22 07:10:56 -08005444 }
5445 // Vanilla ICE load instructions should not use the segment registers, and
5446 // computeAddressOpt only works at the level of Variables and Constants, not
5447 // other X86OperandMem, so there should be no mention of segment
5448 // registers there either.
5449 static constexpr auto SegmentReg =
5450 X86OperandMem::SegmentRegisters::DefaultSegment;
5451
5452 return X86OperandMem::create(Func, MemType, NewAddr.Base, OffsetOp,
5453 NewAddr.Index, NewAddr.Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07005454}
5455
Jim Stichnothad2989b2015-09-15 10:21:42 -07005456/// Add a mock bounds check on the memory address before using it as a load or
5457/// store operand. The basic idea is that given a memory operand [reg], we
5458/// would first add bounds-check code something like:
5459///
5460/// cmp reg, <lb>
5461/// jl out_of_line_error
5462/// cmp reg, <ub>
5463/// jg out_of_line_error
5464///
5465/// In reality, the specific code will depend on how <lb> and <ub> are
5466/// represented, e.g. an immediate, a global, or a function argument.
5467///
5468/// As such, we need to enforce that the memory operand does not have the form
5469/// [reg1+reg2], because then there is no simple cmp instruction that would
5470/// suffice. However, we consider [reg+offset] to be OK because the offset is
5471/// usually small, and so <ub> could have a safety buffer built in and then we
5472/// could instead branch to a custom out_of_line_error that does the precise
5473/// check and jumps back if it turns out OK.
5474///
5475/// For the purpose of mocking the bounds check, we'll do something like this:
5476///
5477/// cmp reg, 0
5478/// je label
5479/// cmp reg, 1
5480/// je label
5481/// label:
5482///
5483/// Also note that we don't need to add a bounds check to a dereference of a
5484/// simple global variable address.
John Porto4a566862016-01-04 09:33:41 -08005485template <typename TraitsType>
5486void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) {
Karl Schimpfd4699942016-04-02 09:55:31 -07005487 if (!getFlags().getMockBoundsCheck())
Jim Stichnothad2989b2015-09-15 10:21:42 -07005488 return;
John Porto4a566862016-01-04 09:33:41 -08005489 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) {
Jim Stichnothad2989b2015-09-15 10:21:42 -07005490 if (Mem->getIndex()) {
5491 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg");
5492 }
5493 Opnd = Mem->getBase();
5494 }
5495 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps
5496 // something else. We only care if it is Variable.
5497 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);
5498 if (Var == nullptr)
5499 return;
5500 // We use lowerStore() to copy out-args onto the stack. This creates a memory
5501 // operand with the stack pointer as the base register. Don't do bounds
5502 // checks on that.
Jim Stichnoth8aa39662016-02-10 11:20:30 -08005503 if (Var->getRegNum() == getStackReg())
Jim Stichnothad2989b2015-09-15 10:21:42 -07005504 return;
5505
John Porto4a566862016-01-04 09:33:41 -08005506 auto *Label = InstX86Label::create(Func, this);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005507 _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
5508 _br(Traits::Cond::Br_e, Label);
5509 _cmp(Opnd, Ctx->getConstantInt32(1));
5510 _br(Traits::Cond::Br_e, Label);
5511 Context.insert(Label);
5512}
5513
John Porto4a566862016-01-04 09:33:41 -08005514template <typename TraitsType>
5515void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07005516 // A Load instruction can be treated the same as an Assign instruction, after
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08005517 // the source operand is transformed into an X86OperandMem operand. Note that
5518 // the address mode optimization already creates an X86OperandMem operand, so
5519 // it doesn't need another level of transformation.
John Porto7e93c622015-06-23 10:58:57 -07005520 Variable *DestLoad = Load->getDest();
5521 Type Ty = DestLoad->getType();
5522 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005523 doMockBoundsCheck(Src0);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08005524 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
John Porto7e93c622015-06-23 10:58:57 -07005525 lowerAssign(Assign);
5526}
5527
John Porto4a566862016-01-04 09:33:41 -08005528template <typename TraitsType>
5529void TargetX86Base<TraitsType>::doAddressOptLoad() {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005530 Inst *Instr = Context.getCur();
5531 Operand *Addr = Instr->getSrc(0);
5532 Variable *Dest = Instr->getDest();
5533 if (auto *OptAddr = computeAddressOpt(Instr, Dest->getType(), Addr)) {
5534 Instr->setDeleted();
John Portoac2388c2016-01-22 07:10:56 -08005535 Context.insert<InstLoad>(Dest, OptAddr);
John Porto7e93c622015-06-23 10:58:57 -07005536 }
5537}
5538
John Porto4a566862016-01-04 09:33:41 -08005539template <typename TraitsType>
5540void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability,
5541 RandomNumberGenerator &RNG) {
Qining Luaee5fa82015-08-20 14:59:03 -07005542 RandomNumberGeneratorWrapper RNGW(RNG);
5543 if (RNGW.getTrueWithProbability(Probability)) {
5544 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
John Porto7e93c622015-06-23 10:58:57 -07005545 }
5546}
5547
John Porto4a566862016-01-04 09:33:41 -08005548template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005549void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Instr*/) {
John Porto7e93c622015-06-23 10:58:57 -07005550 Func->setError("Phi found in regular instruction list");
5551}
5552
John Porto4a566862016-01-04 09:33:41 -08005553template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005554void TargetX86Base<TraitsType>::lowerRet(const InstRet *Instr) {
David Sehr0c68bef2016-01-20 10:00:23 -08005555 Variable *Reg = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005556 if (Instr->hasRetValue()) {
5557 Operand *RetValue = legalize(Instr->getRetValue());
David Sehr0c68bef2016-01-20 10:00:23 -08005558 const Type ReturnType = RetValue->getType();
5559 assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) ||
5560 (ReturnType == IceType_i32) || (ReturnType == IceType_i64));
5561 Reg = moveReturnValueToRegister(RetValue, ReturnType);
5562 }
5563 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5564 // explicitly looks for a ret instruction as a marker for where to insert the
5565 // frame removal instructions.
5566 _ret(Reg);
5567 // Add a fake use of esp to make sure esp stays alive for the entire
5568 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5569 keepEspLiveAtExit();
5570}
5571
5572template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08005573void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
David Sehre3984282015-12-15 17:34:55 -08005574 Variable *Dest = Select->getDest();
John Porto7e93c622015-06-23 10:58:57 -07005575
David Sehre3984282015-12-15 17:34:55 -08005576 if (isVectorType(Dest->getType())) {
5577 lowerSelectVector(Select);
John Porto7e93c622015-06-23 10:58:57 -07005578 return;
5579 }
5580
David Sehre3984282015-12-15 17:34:55 -08005581 Operand *Condition = Select->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07005582 // Handle folding opportunities.
David Sehre3984282015-12-15 17:34:55 -08005583 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
John Porto7e93c622015-06-23 10:58:57 -07005584 assert(Producer->isDeleted());
Jim Stichnothcaeaa272016-01-10 12:53:44 -08005585 switch (BoolFolding<Traits>::getProducerKind(Producer)) {
John Porto7e93c622015-06-23 10:58:57 -07005586 default:
5587 break;
Jim Stichnothcaeaa272016-01-10 12:53:44 -08005588 case BoolFolding<Traits>::PK_Icmp32:
5589 case BoolFolding<Traits>::PK_Icmp64: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08005590 lowerIcmpAndConsumer(llvm::cast<InstIcmp>(Producer), Select);
David Sehre3984282015-12-15 17:34:55 -08005591 return;
5592 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08005593 case BoolFolding<Traits>::PK_Fcmp: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08005594 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select);
David Sehre3984282015-12-15 17:34:55 -08005595 return;
5596 }
John Porto7e93c622015-06-23 10:58:57 -07005597 }
5598 }
John Porto7e93c622015-06-23 10:58:57 -07005599
David Sehre3984282015-12-15 17:34:55 -08005600 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
5601 Operand *Zero = Ctx->getConstantZero(IceType_i32);
5602 _cmp(CmpResult, Zero);
5603 Operand *SrcT = Select->getTrueOperand();
5604 Operand *SrcF = Select->getFalseOperand();
John Porto4a566862016-01-04 09:33:41 -08005605 const BrCond Cond = Traits::Cond::Br_ne;
David Sehre3984282015-12-15 17:34:55 -08005606 lowerSelectMove(Dest, Cond, SrcT, SrcF);
5607}
5608
John Porto4a566862016-01-04 09:33:41 -08005609template <typename TraitsType>
5610void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,
5611 Operand *SrcT, Operand *SrcF) {
David Sehre3984282015-12-15 17:34:55 -08005612 Type DestTy = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07005613 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
Andrew Scull57e12682015-09-16 11:30:19 -07005614 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
5615 // explicit control flow.
John Porto7e93c622015-06-23 10:58:57 -07005616 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto4a566862016-01-04 09:33:41 -08005617 auto *Label = InstX86Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07005618 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
5619 _mov(Dest, SrcT);
5620 _br(Cond, Label);
5621 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
David Sehre3984282015-12-15 17:34:55 -08005622 _redefined(_mov(Dest, SrcF));
John Porto7e93c622015-06-23 10:58:57 -07005623 Context.insert(Label);
5624 return;
5625 }
5626 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
Andrew Scull57e12682015-09-16 11:30:19 -07005627 // But if SrcT is immediate, we might be able to do better, as the cmov
5628 // instruction doesn't allow an immediate operand:
John Porto7e93c622015-06-23 10:58:57 -07005629 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
5630 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
5631 std::swap(SrcT, SrcF);
John Porto4a566862016-01-04 09:33:41 -08005632 Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07005633 }
John Porto1d235422015-08-12 12:37:53 -07005634 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07005635 SrcT = legalizeUndef(SrcT);
5636 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07005637 // Set the low portion.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08005638 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08005639 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
John Porto7e93c622015-06-23 10:58:57 -07005640 // Set the high portion.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08005641 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08005642 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
John Porto7e93c622015-06-23 10:58:57 -07005643 return;
5644 }
5645
John Porto1d235422015-08-12 12:37:53 -07005646 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
5647 (Traits::Is64Bit && DestTy == IceType_i64));
David Sehre3984282015-12-15 17:34:55 -08005648 lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
5649}
5650
John Porto4a566862016-01-04 09:33:41 -08005651template <typename TraitsType>
5652void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond,
5653 Operand *SrcT,
5654 Operand *SrcF) {
John Porto7e93c622015-06-23 10:58:57 -07005655 Variable *T = nullptr;
5656 SrcF = legalize(SrcF);
5657 _mov(T, SrcF);
5658 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
5659 _cmov(T, SrcT, Cond);
5660 _mov(Dest, T);
5661}
5662
John Porto4a566862016-01-04 09:33:41 -08005663template <typename TraitsType>
5664void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src,
5665 bool IsRedefinition) {
David Sehre3984282015-12-15 17:34:55 -08005666 assert(Dest->getType() == Src->getType());
5667 assert(!Dest->isRematerializable());
5668 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
5669 Src = legalize(Src);
5670 Operand *SrcLo = loOperand(Src);
5671 Operand *SrcHi = hiOperand(Src);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08005672 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5673 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08005674 Variable *T_Lo = nullptr, *T_Hi = nullptr;
5675 _mov(T_Lo, SrcLo);
5676 _redefined(_mov(DestLo, T_Lo), IsRedefinition);
5677 _mov(T_Hi, SrcHi);
5678 _redefined(_mov(DestHi, T_Hi), IsRedefinition);
5679 } else {
5680 Operand *SrcLegal;
5681 if (Dest->hasReg()) {
5682 // If Dest already has a physical register, then only basic legalization
5683 // is needed, as the source operand can be a register, immediate, or
5684 // memory.
5685 SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum());
5686 } else {
5687 // If Dest could be a stack operand, then RI must be a physical register
5688 // or a scalar integer immediate.
5689 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
5690 }
5691 if (isVectorType(Dest->getType())) {
5692 _redefined(_movp(Dest, SrcLegal), IsRedefinition);
5693 } else {
5694 _redefined(_mov(Dest, SrcLegal), IsRedefinition);
5695 }
5696 }
5697}
5698
John Porto4a566862016-01-04 09:33:41 -08005699template <typename TraitsType>
5700bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
5701 const InstFcmp *Fcmp, const InstSelect *Select) {
David Sehre3984282015-12-15 17:34:55 -08005702 Operand *CmpSrc0 = Fcmp->getSrc(0);
5703 Operand *CmpSrc1 = Fcmp->getSrc(1);
5704 Operand *SelectSrcT = Select->getTrueOperand();
5705 Operand *SelectSrcF = Select->getFalseOperand();
5706
5707 if (CmpSrc0->getType() != SelectSrcT->getType())
5708 return false;
5709
5710 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
5711 InstFcmp::FCond Condition = Fcmp->getCondition();
5712 switch (Condition) {
5713 default:
5714 return false;
5715 case InstFcmp::True:
5716 case InstFcmp::False:
5717 case InstFcmp::Ogt:
5718 case InstFcmp::Olt:
5719 (void)CmpSrc0;
5720 (void)CmpSrc1;
5721 (void)SelectSrcT;
5722 (void)SelectSrcF;
5723 break;
5724 }
5725 return false;
5726}
5727
John Porto4a566862016-01-04 09:33:41 -08005728template <typename TraitsType>
5729void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) {
David Sehre3984282015-12-15 17:34:55 -08005730 Variable *Dest = Icmp->getDest();
5731 if (isVectorType(Dest->getType())) {
5732 lowerIcmpVector(Icmp);
5733 } else {
5734 constexpr Inst *Consumer = nullptr;
5735 lowerIcmpAndConsumer(Icmp, Consumer);
5736 }
5737}
5738
John Porto4a566862016-01-04 09:33:41 -08005739template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005740void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Instr) {
5741 Variable *Dest = Instr->getDest();
David Sehre3984282015-12-15 17:34:55 -08005742 Type DestTy = Dest->getType();
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005743 Operand *SrcT = Instr->getTrueOperand();
5744 Operand *SrcF = Instr->getFalseOperand();
5745 Operand *Condition = Instr->getCondition();
David Sehre3984282015-12-15 17:34:55 -08005746
5747 if (!isVectorType(DestTy))
5748 llvm::report_fatal_error("Expected a vector select");
5749
5750 Type SrcTy = SrcT->getType();
5751 Variable *T = makeReg(SrcTy);
5752 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
5753 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
5754 if (InstructionSet >= Traits::SSE4_1) {
5755 // TODO(wala): If the condition operand is a constant, use blendps or
5756 // pblendw.
5757 //
5758 // Use blendvps or pblendvb to implement select.
5759 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
5760 SrcTy == IceType_v4f32) {
5761 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
5762 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
5763 _movp(xmm0, ConditionRM);
5764 _psll(xmm0, Ctx->getConstantInt8(31));
5765 _movp(T, SrcFRM);
5766 _blendvps(T, SrcTRM, xmm0);
5767 _movp(Dest, T);
5768 } else {
5769 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
5770 Type SignExtTy =
5771 Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8;
5772 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
5773 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
5774 _movp(T, SrcFRM);
5775 _pblendvb(T, SrcTRM, xmm0);
5776 _movp(Dest, T);
5777 }
5778 return;
5779 }
5780 // Lower select without Traits::SSE4.1:
5781 // a=d?b:c ==>
5782 // if elementtype(d) != i1:
5783 // d=sext(d);
5784 // a=(b&d)|(c&~d);
5785 Variable *T2 = makeReg(SrcTy);
5786 // Sign extend the condition operand if applicable.
5787 if (SrcTy == IceType_v4f32) {
5788 // The sext operation takes only integer arguments.
5789 Variable *T3 = Func->makeVariable(IceType_v4i32);
5790 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
5791 _movp(T, T3);
5792 } else if (typeElementType(SrcTy) != IceType_i1) {
5793 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
5794 } else {
5795 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
5796 _movp(T, ConditionRM);
5797 }
5798 _movp(T2, T);
5799 _pand(T, SrcTRM);
5800 _pandn(T2, SrcFRM);
5801 _por(T, T2);
5802 _movp(Dest, T);
5803
5804 return;
5805}
5806
John Porto4a566862016-01-04 09:33:41 -08005807template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005808void TargetX86Base<TraitsType>::lowerStore(const InstStore *Instr) {
5809 Operand *Value = Instr->getData();
5810 Operand *Addr = Instr->getAddr();
John Porto4a566862016-01-04 09:33:41 -08005811 X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
Jim Stichnothad2989b2015-09-15 10:21:42 -07005812 doMockBoundsCheck(NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07005813 Type Ty = NewAddr->getType();
5814
John Porto1d235422015-08-12 12:37:53 -07005815 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07005816 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07005817 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
John Porto4a566862016-01-04 09:33:41 -08005818 _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
Jim Stichnothb40595a2016-01-29 06:14:31 -08005819 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto4a566862016-01-04 09:33:41 -08005820 _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07005821 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07005822 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07005823 } else {
5824 Value = legalize(Value, Legal_Reg | Legal_Imm);
5825 _store(Value, NewAddr);
5826 }
5827}
5828
John Porto4a566862016-01-04 09:33:41 -08005829template <typename TraitsType>
5830void TargetX86Base<TraitsType>::doAddressOptStore() {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005831 auto *Instr = llvm::cast<InstStore>(Context.getCur());
5832 Operand *Addr = Instr->getAddr();
5833 Operand *Data = Instr->getData();
5834 if (auto *OptAddr = computeAddressOpt(Instr, Data->getType(), Addr)) {
5835 Instr->setDeleted();
John Portoac2388c2016-01-22 07:10:56 -08005836 auto *NewStore = Context.insert<InstStore>(Data, OptAddr);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005837 if (Instr->getDest())
5838 NewStore->setRmwBeacon(Instr->getRmwBeacon());
John Porto7e93c622015-06-23 10:58:57 -07005839 }
5840}
5841
John Porto4a566862016-01-04 09:33:41 -08005842template <typename TraitsType>
5843Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison,
5844 uint64_t Min, uint64_t Max) {
Andrew Scull87f80c12015-07-20 10:19:16 -07005845 // TODO(ascull): 64-bit should not reach here but only because it is not
5846 // implemented yet. This should be able to handle the 64-bit case.
John Porto1d235422015-08-12 12:37:53 -07005847 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
Andrew Scull87f80c12015-07-20 10:19:16 -07005848 // Subtracting 0 is a nop so don't do it
5849 if (Min != 0) {
5850 // Avoid clobbering the comparison by copying it
5851 Variable *T = nullptr;
5852 _mov(T, Comparison);
5853 _sub(T, Ctx->getConstantInt32(Min));
5854 Comparison = T;
5855 }
5856
5857 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
5858
5859 return Comparison;
5860}
5861
John Porto4a566862016-01-04 09:33:41 -08005862template <typename TraitsType>
5863void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case,
5864 Operand *Comparison,
5865 bool DoneCmp,
5866 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07005867 switch (Case.getKind()) {
5868 case CaseCluster::JumpTable: {
John Porto4a566862016-01-04 09:33:41 -08005869 InstX86Label *SkipJumpTable;
Andrew Scull87f80c12015-07-20 10:19:16 -07005870
5871 Operand *RangeIndex =
5872 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07005873 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07005874 // Skip over jump table logic if comparison not in range and no default
John Porto4a566862016-01-04 09:33:41 -08005875 SkipJumpTable = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07005876 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07005877 } else {
5878 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07005879 }
Andrew Scull87f80c12015-07-20 10:19:16 -07005880
5881 InstJumpTable *JumpTable = Case.getJumpTable();
5882 Context.insert(JumpTable);
5883
5884 // Make sure the index is a register of the same width as the base
5885 Variable *Index;
John Porto56958cb2016-01-14 09:18:18 -08005886 const Type PointerType = getPointerType();
5887 if (RangeIndex->getType() != PointerType) {
5888 Index = makeReg(PointerType);
Jim Stichnothe641e922016-02-29 09:54:55 -08005889 if (RangeIndex->getType() == IceType_i64) {
5890 assert(Traits::Is64Bit);
5891 _mov(Index, RangeIndex); // trunc
5892 } else {
5893 _movzx(Index, RangeIndex);
5894 }
Andrew Scull87f80c12015-07-20 10:19:16 -07005895 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07005896 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07005897 }
5898
5899 constexpr RelocOffsetT RelocOffset = 0;
John Portoac2388c2016-01-22 07:10:56 -08005900 constexpr Variable *NoBase = nullptr;
John Porto03077212016-04-05 06:30:21 -07005901 auto JTName = GlobalString::createWithString(Ctx, JumpTable->getName());
Jim Stichnoth467ffe52016-03-29 15:01:06 -07005902 Constant *Offset = Ctx->getConstantSym(RelocOffset, JTName);
John Porto56958cb2016-01-14 09:18:18 -08005903 uint16_t Shift = typeWidthInBytesLog2(PointerType);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08005904 constexpr auto Segment = X86OperandMem::SegmentRegisters::DefaultSegment;
John Porto56958cb2016-01-14 09:18:18 -08005905
Andrew Scull87f80c12015-07-20 10:19:16 -07005906 Variable *Target = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08005907 if (Traits::Is64Bit && NeedSandboxing) {
John Porto56958cb2016-01-14 09:18:18 -08005908 assert(Index != nullptr && Index->getType() == IceType_i32);
5909 }
John Portoac2388c2016-01-22 07:10:56 -08005910 auto *TargetInMemory = X86OperandMem::create(Func, PointerType, NoBase,
5911 Offset, Index, Shift, Segment);
Andrew Scull86df4e92015-07-30 13:54:44 -07005912 _mov(Target, TargetInMemory);
John Porto56958cb2016-01-14 09:18:18 -08005913
Andrew Scull86df4e92015-07-30 13:54:44 -07005914 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07005915
Andrew Scull86df4e92015-07-30 13:54:44 -07005916 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07005917 Context.insert(SkipJumpTable);
5918 return;
5919 }
5920 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07005921 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07005922 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07005923 if (!DoneCmp) {
5924 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07005925 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07005926 }
5927 _br(Traits::Cond::Br_e, Case.getTarget());
5928 } else if (DoneCmp && Case.isPairRange()) {
5929 // Range of two items with first item aleady compared against
5930 _br(Traits::Cond::Br_e, Case.getTarget());
5931 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
5932 _cmp(Comparison, Value);
5933 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07005934 } else {
5935 // Range
5936 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07005937 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07005938 }
Andrew Scull86df4e92015-07-30 13:54:44 -07005939 if (DefaultTarget != nullptr)
5940 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07005941 return;
5942 }
5943 }
5944}
5945
John Porto4a566862016-01-04 09:33:41 -08005946template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005947void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Instr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07005948 // Group cases together and navigate through them with a binary search
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005949 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Instr);
5950 Operand *Src0 = Instr->getComparison();
5951 CfgNode *DefaultTarget = Instr->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07005952
5953 assert(CaseClusters.size() != 0); // Should always be at least one
5954
John Porto1d235422015-08-12 12:37:53 -07005955 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Andrew Scull87f80c12015-07-20 10:19:16 -07005956 Src0 = legalize(Src0); // get Base/Index into physical registers
5957 Operand *Src0Lo = loOperand(Src0);
5958 Operand *Src0Hi = hiOperand(Src0);
5959 if (CaseClusters.back().getHigh() > UINT32_MAX) {
5960 // TODO(ascull): handle 64-bit case properly (currently naive version)
5961 // This might be handled by a higher level lowering of switches.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005962 SizeT NumCases = Instr->getNumCases();
Andrew Scull87f80c12015-07-20 10:19:16 -07005963 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07005964 Src0Lo = legalizeToReg(Src0Lo);
5965 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07005966 } else {
5967 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
5968 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
5969 }
5970 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005971 Constant *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5972 Constant *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
John Porto4a566862016-01-04 09:33:41 -08005973 InstX86Label *Label = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07005974 _cmp(Src0Lo, ValueLo);
5975 _br(Traits::Cond::Br_ne, Label);
5976 _cmp(Src0Hi, ValueHi);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005977 _br(Traits::Cond::Br_e, Instr->getLabel(I));
Andrew Scull87f80c12015-07-20 10:19:16 -07005978 Context.insert(Label);
5979 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005980 _br(Instr->getLabelDefault());
Andrew Scull87f80c12015-07-20 10:19:16 -07005981 return;
5982 } else {
5983 // All the values are 32-bit so just check the operand is too and then
5984 // fall through to the 32-bit implementation. This is a common case.
5985 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
5986 Constant *Zero = Ctx->getConstantInt32(0);
5987 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07005988 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07005989 Src0 = Src0Lo;
5990 }
John Porto7e93c622015-06-23 10:58:57 -07005991 }
5992
Andrew Scull87f80c12015-07-20 10:19:16 -07005993 // 32-bit lowering
5994
5995 if (CaseClusters.size() == 1) {
5996 // Jump straight to default if needed. Currently a common case as jump
5997 // tables occur on their own.
5998 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07005999 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006000 return;
6001 }
6002
6003 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07006004 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07006005
6006 // A span is over the clusters
6007 struct SearchSpan {
John Porto4a566862016-01-04 09:33:41 -08006008 SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label)
Andrew Scull87f80c12015-07-20 10:19:16 -07006009 : Begin(Begin), Size(Size), Label(Label) {}
6010
6011 SizeT Begin;
6012 SizeT Size;
John Porto4a566862016-01-04 09:33:41 -08006013 InstX86Label *Label;
Andrew Scull87f80c12015-07-20 10:19:16 -07006014 };
Andrew Scull8447bba2015-07-23 11:41:18 -07006015 // The stack will only grow to the height of the tree so 12 should be plenty
6016 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07006017 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
6018 bool DoneCmp = false;
6019
6020 while (!SearchSpanStack.empty()) {
6021 SearchSpan Span = SearchSpanStack.top();
6022 SearchSpanStack.pop();
6023
6024 if (Span.Label != nullptr)
6025 Context.insert(Span.Label);
6026
6027 switch (Span.Size) {
6028 case 0:
6029 llvm::report_fatal_error("Invalid SearchSpan size");
6030 break;
6031
6032 case 1:
6033 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07006034 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006035 DoneCmp = false;
6036 break;
6037
Andrew Scull86df4e92015-07-30 13:54:44 -07006038 case 2: {
6039 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
6040 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
6041
6042 // Placing a range last may allow register clobbering during the range
6043 // test. That means there is no need to clone the register. If it is a
6044 // unit range the comparison may have already been done in the binary
6045 // search (DoneCmp) and so it should be placed first. If this is a range
6046 // of two items and the comparison with the low value has already been
6047 // done, comparing with the other element is cheaper than a range test.
6048 // If the low end of the range is zero then there is no subtraction and
6049 // nothing to be gained.
6050 if (!CaseA->isUnitRange() &&
6051 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
6052 std::swap(CaseA, CaseB);
6053 DoneCmp = false;
6054 }
6055
6056 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07006057 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07006058 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
6059 SearchSpanStack.empty() ? nullptr : DefaultTarget);
6060 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07006061
6062 default:
6063 // Pick the middle item and branch b or ae
6064 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
6065 const CaseCluster &Pivot = CaseClusters[PivotIndex];
6066 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
John Porto4a566862016-01-04 09:33:41 -08006067 InstX86Label *Label = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006068 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07006069 // TODO(ascull): does it alway have to be far?
John Porto4a566862016-01-04 09:33:41 -08006070 _br(Traits::Cond::Br_b, Label, InstX86Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07006071 // Lower the left and (pivot+right) sides, falling through to the right
6072 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
6073 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
6074 DoneCmp = true;
6075 break;
6076 }
6077 }
6078
Andrew Scull86df4e92015-07-30 13:54:44 -07006079 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07006080}
6081
Andrew Scull9612d322015-07-06 14:53:25 -07006082/// The following pattern occurs often in lowered C and C++ code:
6083///
6084/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
6085/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
6086///
6087/// We can eliminate the sext operation by copying the result of pcmpeqd,
Andrew Scull57e12682015-09-16 11:30:19 -07006088/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
6089/// sext operation.
John Porto4a566862016-01-04 09:33:41 -08006090template <typename TraitsType>
6091void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction(
John Porto7e93c622015-06-23 10:58:57 -07006092 Variable *SignExtendedResult) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08006093 if (auto *NextCast =
John Porto7e93c622015-06-23 10:58:57 -07006094 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
6095 if (NextCast->getCastKind() == InstCast::Sext &&
6096 NextCast->getSrc(0) == SignExtendedResult) {
6097 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07006098 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07006099 // Skip over the instruction.
6100 Context.advanceNext();
6101 }
6102 }
6103}
6104
John Porto4a566862016-01-04 09:33:41 -08006105template <typename TraitsType>
6106void TargetX86Base<TraitsType>::lowerUnreachable(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006107 const InstUnreachable * /*Instr*/) {
John Porto7e93c622015-06-23 10:58:57 -07006108 _ud2();
David Sehr21fd1032015-11-13 16:32:37 -08006109 // Add a fake use of esp to make sure esp adjustments after the unreachable
6110 // do not get dead-code eliminated.
6111 keepEspLiveAtExit();
John Porto7e93c622015-06-23 10:58:57 -07006112}
6113
John Porto4a566862016-01-04 09:33:41 -08006114template <typename TraitsType>
Eric Holk67c7c412016-04-15 13:05:37 -07006115void TargetX86Base<TraitsType>::lowerBreakpoint(
6116 const InstBreakpoint * /*Instr*/) {
6117 _int3();
6118}
6119
6120template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08006121void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) {
Andrew Scull57e12682015-09-16 11:30:19 -07006122 // If the beacon variable's live range does not end in this instruction, then
6123 // it must end in the modified Store instruction that follows. This means
6124 // that the original Store instruction is still there, either because the
6125 // value being stored is used beyond the Store instruction, or because dead
6126 // code elimination did not happen. In either case, we cancel RMW lowering
6127 // (and the caller deletes the RMW instruction).
John Porto7e93c622015-06-23 10:58:57 -07006128 if (!RMW->isLastUse(RMW->getBeacon()))
6129 return;
6130 Operand *Src = RMW->getData();
6131 Type Ty = Src->getType();
John Porto4a566862016-01-04 09:33:41 -08006132 X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07006133 doMockBoundsCheck(Addr);
John Porto1d235422015-08-12 12:37:53 -07006134 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006135 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07006136 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
6137 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006138 auto *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr));
6139 auto *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07006140 switch (RMW->getOp()) {
6141 default:
6142 // TODO(stichnot): Implement other arithmetic operators.
6143 break;
6144 case InstArithmetic::Add:
6145 _add_rmw(AddrLo, SrcLo);
6146 _adc_rmw(AddrHi, SrcHi);
6147 return;
6148 case InstArithmetic::Sub:
6149 _sub_rmw(AddrLo, SrcLo);
6150 _sbb_rmw(AddrHi, SrcHi);
6151 return;
6152 case InstArithmetic::And:
6153 _and_rmw(AddrLo, SrcLo);
6154 _and_rmw(AddrHi, SrcHi);
6155 return;
6156 case InstArithmetic::Or:
6157 _or_rmw(AddrLo, SrcLo);
6158 _or_rmw(AddrHi, SrcHi);
6159 return;
6160 case InstArithmetic::Xor:
6161 _xor_rmw(AddrLo, SrcLo);
6162 _xor_rmw(AddrHi, SrcHi);
6163 return;
6164 }
6165 } else {
John Porto1d235422015-08-12 12:37:53 -07006166 // x86-32: i8, i16, i32
6167 // x86-64: i8, i16, i32, i64
John Porto7e93c622015-06-23 10:58:57 -07006168 switch (RMW->getOp()) {
6169 default:
6170 // TODO(stichnot): Implement other arithmetic operators.
6171 break;
6172 case InstArithmetic::Add:
6173 Src = legalize(Src, Legal_Reg | Legal_Imm);
6174 _add_rmw(Addr, Src);
6175 return;
6176 case InstArithmetic::Sub:
6177 Src = legalize(Src, Legal_Reg | Legal_Imm);
6178 _sub_rmw(Addr, Src);
6179 return;
6180 case InstArithmetic::And:
6181 Src = legalize(Src, Legal_Reg | Legal_Imm);
6182 _and_rmw(Addr, Src);
6183 return;
6184 case InstArithmetic::Or:
6185 Src = legalize(Src, Legal_Reg | Legal_Imm);
6186 _or_rmw(Addr, Src);
6187 return;
6188 case InstArithmetic::Xor:
6189 Src = legalize(Src, Legal_Reg | Legal_Imm);
6190 _xor_rmw(Addr, Src);
6191 return;
6192 }
6193 }
6194 llvm::report_fatal_error("Couldn't lower RMW instruction");
6195}
6196
John Porto4a566862016-01-04 09:33:41 -08006197template <typename TraitsType>
6198void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) {
6199 if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07006200 lowerRMW(RMW);
6201 } else {
6202 TargetLowering::lowerOther(Instr);
6203 }
6204}
6205
Andrew Scull57e12682015-09-16 11:30:19 -07006206/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
6207/// integrity of liveness analysis. Undef values are also turned into zeroes,
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006208/// since loOperand() and hiOperand() don't expect Undef input. Also, in
John Portoac2388c2016-01-22 07:10:56 -08006209/// Non-SFI mode, add a FakeUse(RebasePtr) for every pooled constant operand.
John Porto4a566862016-01-04 09:33:41 -08006210template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() {
Karl Schimpfd4699942016-04-02 09:55:31 -07006211 if (getFlags().getUseNonsfi()) {
John Portoac2388c2016-01-22 07:10:56 -08006212 assert(RebasePtr);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006213 CfgNode *Node = Context.getNode();
John Portoac2388c2016-01-22 07:10:56 -08006214 uint32_t RebasePtrUseCount = 0;
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006215 for (Inst &I : Node->getPhis()) {
6216 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
6217 if (Phi->isDeleted())
6218 continue;
6219 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6220 Operand *Src = Phi->getSrc(I);
6221 // TODO(stichnot): This over-counts for +0.0, and under-counts for other
6222 // kinds of pooling.
6223 if (llvm::isa<ConstantRelocatable>(Src) ||
6224 llvm::isa<ConstantFloat>(Src) || llvm::isa<ConstantDouble>(Src)) {
John Portoac2388c2016-01-22 07:10:56 -08006225 ++RebasePtrUseCount;
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006226 }
6227 }
6228 }
John Portoac2388c2016-01-22 07:10:56 -08006229 if (RebasePtrUseCount) {
6230 Node->getInsts().push_front(InstFakeUse::create(Func, RebasePtr));
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006231 }
6232 }
John Porto1d235422015-08-12 12:37:53 -07006233 if (Traits::Is64Bit) {
6234 // On x86-64 we don't need to prelower phis -- the architecture can handle
6235 // 64-bit integer natively.
6236 return;
6237 }
6238
Andrew Scull57e12682015-09-16 11:30:19 -07006239 // Pause constant blinding or pooling, blinding or pooling will be done later
6240 // during phi lowering assignments
John Porto7e93c622015-06-23 10:58:57 -07006241 BoolFlagSaver B(RandomizationPoolingPaused, true);
John Porto4a566862016-01-04 09:33:41 -08006242 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(
Jan Voung53483692015-07-16 10:47:46 -07006243 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07006244}
6245
John Porto4a566862016-01-04 09:33:41 -08006246template <typename TraitsType>
6247void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
David Sehr26217e32015-11-26 13:03:50 -08006248 uint32_t StackArgumentsSize = 0;
6249 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006250 RuntimeHelper HelperID = RuntimeHelper::H_Num;
David Sehr26217e32015-11-26 13:03:50 -08006251 Variable *Dest = Arith->getDest();
6252 Type DestTy = Dest->getType();
6253 if (!Traits::Is64Bit && DestTy == IceType_i64) {
6254 switch (Arith->getOp()) {
6255 default:
6256 return;
6257 case InstArithmetic::Udiv:
Karl Schimpf20070e82016-03-17 13:30:13 -07006258 HelperID = RuntimeHelper::H_udiv_i64;
David Sehr26217e32015-11-26 13:03:50 -08006259 break;
6260 case InstArithmetic::Sdiv:
Karl Schimpf20070e82016-03-17 13:30:13 -07006261 HelperID = RuntimeHelper::H_sdiv_i64;
David Sehr26217e32015-11-26 13:03:50 -08006262 break;
6263 case InstArithmetic::Urem:
Karl Schimpf20070e82016-03-17 13:30:13 -07006264 HelperID = RuntimeHelper::H_urem_i64;
David Sehr26217e32015-11-26 13:03:50 -08006265 break;
6266 case InstArithmetic::Srem:
Karl Schimpf20070e82016-03-17 13:30:13 -07006267 HelperID = RuntimeHelper::H_srem_i64;
David Sehr26217e32015-11-26 13:03:50 -08006268 break;
6269 }
6270 } else if (isVectorType(DestTy)) {
6271 Variable *Dest = Arith->getDest();
6272 Operand *Src0 = Arith->getSrc(0);
6273 Operand *Src1 = Arith->getSrc(1);
6274 switch (Arith->getOp()) {
6275 default:
6276 return;
6277 case InstArithmetic::Mul:
6278 if (DestTy == IceType_v16i8) {
6279 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
6280 Arith->setDeleted();
6281 }
6282 return;
6283 case InstArithmetic::Shl:
6284 case InstArithmetic::Lshr:
6285 case InstArithmetic::Ashr:
6286 case InstArithmetic::Udiv:
6287 case InstArithmetic::Urem:
6288 case InstArithmetic::Sdiv:
6289 case InstArithmetic::Srem:
6290 case InstArithmetic::Frem:
6291 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
6292 Arith->setDeleted();
6293 return;
6294 }
6295 } else {
6296 switch (Arith->getOp()) {
6297 default:
6298 return;
6299 case InstArithmetic::Frem:
6300 if (isFloat32Asserting32Or64(DestTy))
Karl Schimpf20070e82016-03-17 13:30:13 -07006301 HelperID = RuntimeHelper::H_frem_f32;
David Sehr26217e32015-11-26 13:03:50 -08006302 else
Karl Schimpf20070e82016-03-17 13:30:13 -07006303 HelperID = RuntimeHelper::H_frem_f64;
David Sehr26217e32015-11-26 13:03:50 -08006304 }
6305 }
6306 constexpr SizeT MaxSrcs = 2;
Karl Schimpf20070e82016-03-17 13:30:13 -07006307 InstCall *Call = makeHelperCall(HelperID, Dest, MaxSrcs);
David Sehr26217e32015-11-26 13:03:50 -08006308 Call->addArg(Arith->getSrc(0));
6309 Call->addArg(Arith->getSrc(1));
6310 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6311 Context.insert(Call);
6312 Arith->setDeleted();
6313 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
6314 InstCast::OpKind CastKind = Cast->getCastKind();
6315 Operand *Src0 = Cast->getSrc(0);
6316 const Type SrcType = Src0->getType();
6317 Variable *Dest = Cast->getDest();
6318 const Type DestTy = Dest->getType();
Karl Schimpf20070e82016-03-17 13:30:13 -07006319 RuntimeHelper HelperID = RuntimeHelper::H_Num;
David Sehrb19d39c2016-01-13 14:17:37 -08006320 Variable *CallDest = Dest;
David Sehr26217e32015-11-26 13:03:50 -08006321 switch (CastKind) {
6322 default:
6323 return;
6324 case InstCast::Fptosi:
6325 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006326 HelperID = isFloat32Asserting32Or64(SrcType)
6327 ? RuntimeHelper::H_fptosi_f32_i64
6328 : RuntimeHelper::H_fptosi_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006329 } else {
6330 return;
6331 }
6332 break;
6333 case InstCast::Fptoui:
6334 if (isVectorType(DestTy)) {
6335 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);
Karl Schimpf20070e82016-03-17 13:30:13 -07006336 HelperID = RuntimeHelper::H_fptoui_4xi32_f32;
David Sehr26217e32015-11-26 13:03:50 -08006337 } else if (DestTy == IceType_i64 ||
6338 (!Traits::Is64Bit && DestTy == IceType_i32)) {
6339 if (Traits::Is64Bit) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006340 HelperID = isFloat32Asserting32Or64(SrcType)
6341 ? RuntimeHelper::H_fptoui_f32_i64
6342 : RuntimeHelper::H_fptoui_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006343 } else if (isInt32Asserting32Or64(DestTy)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006344 HelperID = isFloat32Asserting32Or64(SrcType)
6345 ? RuntimeHelper::H_fptoui_f32_i32
6346 : RuntimeHelper::H_fptoui_f64_i32;
David Sehr26217e32015-11-26 13:03:50 -08006347 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07006348 HelperID = isFloat32Asserting32Or64(SrcType)
6349 ? RuntimeHelper::H_fptoui_f32_i64
6350 : RuntimeHelper::H_fptoui_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006351 }
6352 } else {
6353 return;
6354 }
6355 break;
6356 case InstCast::Sitofp:
6357 if (!Traits::Is64Bit && SrcType == IceType_i64) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006358 HelperID = isFloat32Asserting32Or64(DestTy)
6359 ? RuntimeHelper::H_sitofp_i64_f32
6360 : RuntimeHelper::H_sitofp_i64_f64;
David Sehr26217e32015-11-26 13:03:50 -08006361 } else {
6362 return;
6363 }
6364 break;
6365 case InstCast::Uitofp:
6366 if (isVectorType(SrcType)) {
6367 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);
Karl Schimpf20070e82016-03-17 13:30:13 -07006368 HelperID = RuntimeHelper::H_uitofp_4xi32_4xf32;
David Sehr26217e32015-11-26 13:03:50 -08006369 } else if (SrcType == IceType_i64 ||
6370 (!Traits::Is64Bit && SrcType == IceType_i32)) {
6371 if (isInt32Asserting32Or64(SrcType)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006372 HelperID = isFloat32Asserting32Or64(DestTy)
6373 ? RuntimeHelper::H_uitofp_i32_f32
6374 : RuntimeHelper::H_uitofp_i32_f64;
David Sehr26217e32015-11-26 13:03:50 -08006375 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07006376 HelperID = isFloat32Asserting32Or64(DestTy)
6377 ? RuntimeHelper::H_uitofp_i64_f32
6378 : RuntimeHelper::H_uitofp_i64_f64;
David Sehr26217e32015-11-26 13:03:50 -08006379 }
6380 } else {
6381 return;
6382 }
6383 break;
6384 case InstCast::Bitcast: {
6385 if (DestTy == Src0->getType())
6386 return;
6387 switch (DestTy) {
6388 default:
6389 return;
6390 case IceType_i8:
6391 assert(Src0->getType() == IceType_v8i1);
Karl Schimpf20070e82016-03-17 13:30:13 -07006392 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
David Sehrb19d39c2016-01-13 14:17:37 -08006393 CallDest = Func->makeVariable(IceType_i32);
David Sehr26217e32015-11-26 13:03:50 -08006394 break;
6395 case IceType_i16:
6396 assert(Src0->getType() == IceType_v16i1);
Karl Schimpf20070e82016-03-17 13:30:13 -07006397 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
David Sehrb19d39c2016-01-13 14:17:37 -08006398 CallDest = Func->makeVariable(IceType_i32);
David Sehr26217e32015-11-26 13:03:50 -08006399 break;
6400 case IceType_v8i1: {
6401 assert(Src0->getType() == IceType_i8);
Karl Schimpf20070e82016-03-17 13:30:13 -07006402 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
David Sehr26217e32015-11-26 13:03:50 -08006403 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
6404 // Arguments to functions are required to be at least 32 bits wide.
John Porto1d937a82015-12-17 06:19:34 -08006405 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
David Sehr26217e32015-11-26 13:03:50 -08006406 Src0 = Src0AsI32;
6407 } break;
6408 case IceType_v16i1: {
6409 assert(Src0->getType() == IceType_i16);
Karl Schimpf20070e82016-03-17 13:30:13 -07006410 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
David Sehr26217e32015-11-26 13:03:50 -08006411 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
6412 // Arguments to functions are required to be at least 32 bits wide.
John Porto1d937a82015-12-17 06:19:34 -08006413 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
David Sehr26217e32015-11-26 13:03:50 -08006414 Src0 = Src0AsI32;
6415 } break;
6416 }
6417 } break;
6418 }
6419 constexpr SizeT MaxSrcs = 1;
Karl Schimpf20070e82016-03-17 13:30:13 -07006420 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
David Sehr26217e32015-11-26 13:03:50 -08006421 Call->addArg(Src0);
6422 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6423 Context.insert(Call);
David Sehrb19d39c2016-01-13 14:17:37 -08006424 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper call
6425 // result to the appropriate type as necessary.
6426 if (CallDest->getType() != Dest->getType())
6427 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
David Sehr26217e32015-11-26 13:03:50 -08006428 Cast->setDeleted();
6429 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
John Portoe82b5602016-02-24 15:58:55 -08006430 CfgVector<Type> ArgTypes;
David Sehr26217e32015-11-26 13:03:50 -08006431 Type ReturnType = IceType_void;
6432 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {
6433 default:
6434 return;
6435 case Intrinsics::Ctpop: {
6436 Operand *Val = Intrinsic->getArg(0);
6437 Type ValTy = Val->getType();
6438 if (ValTy == IceType_i64)
6439 ArgTypes = {IceType_i64};
6440 else
6441 ArgTypes = {IceType_i32};
6442 ReturnType = IceType_i32;
6443 } break;
6444 case Intrinsics::Longjmp:
6445 ArgTypes = {IceType_i32, IceType_i32};
6446 ReturnType = IceType_void;
6447 break;
6448 case Intrinsics::Memcpy:
6449 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
6450 ReturnType = IceType_void;
6451 break;
6452 case Intrinsics::Memmove:
6453 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
6454 ReturnType = IceType_void;
6455 break;
6456 case Intrinsics::Memset:
6457 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
6458 ReturnType = IceType_void;
6459 break;
6460 case Intrinsics::NaClReadTP:
6461 ReturnType = IceType_i32;
6462 break;
6463 case Intrinsics::Setjmp:
6464 ArgTypes = {IceType_i32};
6465 ReturnType = IceType_i32;
6466 break;
6467 }
6468 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
6469 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) {
6470 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6471 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) {
6472 if (!Ret->hasRetValue())
6473 return;
6474 Operand *RetValue = Ret->getRetValue();
6475 Type ReturnType = RetValue->getType();
6476 if (!isScalarFloatingType(ReturnType))
6477 return;
6478 StackArgumentsSize = typeWidthInBytes(ReturnType);
6479 } else {
6480 return;
6481 }
6482 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);
6483 updateMaxOutArgsSizeBytes(StackArgumentsSize);
6484}
6485
John Porto4a566862016-01-04 09:33:41 -08006486template <typename TraitsType>
6487uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
John Portoe82b5602016-02-24 15:58:55 -08006488 const CfgVector<Type> &ArgTypes, Type ReturnType) {
David Sehr4163b9f2015-11-20 21:09:31 -08006489 uint32_t OutArgumentsSizeBytes = 0;
6490 uint32_t XmmArgCount = 0;
6491 uint32_t GprArgCount = 0;
David Sehr26217e32015-11-26 13:03:50 -08006492 for (Type Ty : ArgTypes) {
David Sehr4163b9f2015-11-20 21:09:31 -08006493 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
6494 assert(typeWidthInBytes(Ty) >= 4);
6495 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
6496 ++XmmArgCount;
6497 } else if (isScalarIntegerType(Ty) &&
6498 GprArgCount < Traits::X86_MAX_GPR_ARGS) {
6499 // The 64 bit ABI allows some integers to be passed in GPRs.
6500 ++GprArgCount;
6501 } else {
David Sehr26217e32015-11-26 13:03:50 -08006502 if (isVectorType(Ty)) {
David Sehr4163b9f2015-11-20 21:09:31 -08006503 OutArgumentsSizeBytes =
6504 Traits::applyStackAlignment(OutArgumentsSizeBytes);
6505 }
David Sehr26217e32015-11-26 13:03:50 -08006506 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty);
David Sehr4163b9f2015-11-20 21:09:31 -08006507 }
6508 }
6509 if (Traits::Is64Bit)
6510 return OutArgumentsSizeBytes;
6511 // The 32 bit ABI requires floating point values to be returned on the x87 FP
6512 // stack. Ensure there is enough space for the fstp/movs for floating returns.
David Sehr26217e32015-11-26 13:03:50 -08006513 if (isScalarFloatingType(ReturnType)) {
David Sehr4163b9f2015-11-20 21:09:31 -08006514 OutArgumentsSizeBytes =
6515 std::max(OutArgumentsSizeBytes,
David Sehr26217e32015-11-26 13:03:50 -08006516 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));
David Sehr4163b9f2015-11-20 21:09:31 -08006517 }
6518 return OutArgumentsSizeBytes;
6519}
6520
John Porto4a566862016-01-04 09:33:41 -08006521template <typename TraitsType>
6522uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
6523 const InstCall *Instr) {
David Sehr26217e32015-11-26 13:03:50 -08006524 // Build a vector of the arguments' types.
John Portoe82b5602016-02-24 15:58:55 -08006525 const SizeT NumArgs = Instr->getNumArgs();
6526 CfgVector<Type> ArgTypes;
6527 ArgTypes.reserve(NumArgs);
6528 for (SizeT i = 0; i < NumArgs; ++i) {
David Sehr26217e32015-11-26 13:03:50 -08006529 Operand *Arg = Instr->getArg(i);
6530 ArgTypes.emplace_back(Arg->getType());
6531 }
6532 // Compute the return type (if any);
6533 Type ReturnType = IceType_void;
6534 Variable *Dest = Instr->getDest();
6535 if (Dest != nullptr)
6536 ReturnType = Dest->getType();
6537 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
6538}
6539
John Porto4a566862016-01-04 09:33:41 -08006540template <typename TraitsType>
6541Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006542 RegNumT RegNum) {
Jim Stichnoth99165662015-11-13 14:20:40 -08006543 Variable *Reg = makeReg(Ty, RegNum);
6544 switch (Ty) {
6545 case IceType_i1:
6546 case IceType_i8:
6547 case IceType_i16:
6548 case IceType_i32:
6549 case IceType_i64:
6550 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
6551 _mov(Reg, Ctx->getConstantZero(Ty));
6552 break;
6553 case IceType_f32:
6554 case IceType_f64:
John Porto1d937a82015-12-17 06:19:34 -08006555 Context.insert<InstFakeDef>(Reg);
David Sehre3984282015-12-15 17:34:55 -08006556 _xorps(Reg, Reg);
Jim Stichnoth99165662015-11-13 14:20:40 -08006557 break;
6558 default:
6559 // All vector types use the same pxor instruction.
6560 assert(isVectorType(Ty));
John Porto1d937a82015-12-17 06:19:34 -08006561 Context.insert<InstFakeDef>(Reg);
Jim Stichnoth99165662015-11-13 14:20:40 -08006562 _pxor(Reg, Reg);
6563 break;
6564 }
6565 return Reg;
6566}
6567
Andrew Scull57e12682015-09-16 11:30:19 -07006568// There is no support for loading or emitting vector constants, so the vector
6569// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
6570// initialized with register operations.
John Porto7e93c622015-06-23 10:58:57 -07006571//
Andrew Scull57e12682015-09-16 11:30:19 -07006572// TODO(wala): Add limited support for vector constants so that complex
6573// initialization in registers is unnecessary.
John Porto7e93c622015-06-23 10:58:57 -07006574
John Porto4a566862016-01-04 09:33:41 -08006575template <typename TraitsType>
6576Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006577 RegNumT RegNum) {
Jim Stichnoth99165662015-11-13 14:20:40 -08006578 return makeZeroedRegister(Ty, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07006579}
6580
John Porto4a566862016-01-04 09:33:41 -08006581template <typename TraitsType>
6582Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006583 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006584 Variable *MinusOnes = makeReg(Ty, RegNum);
6585 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
John Porto1d937a82015-12-17 06:19:34 -08006586 Context.insert<InstFakeDef>(MinusOnes);
David Sehrb19d39c2016-01-13 14:17:37 -08006587 if (Ty == IceType_f64)
6588 // Making a vector of minus ones of type f64 is currently only used for the
6589 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq
6590 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the
6591 // same job and only requires SSE2.
6592 _pcmpeq(MinusOnes, MinusOnes, IceType_f32);
6593 else
6594 _pcmpeq(MinusOnes, MinusOnes);
John Porto7e93c622015-06-23 10:58:57 -07006595 return MinusOnes;
6596}
6597
John Porto4a566862016-01-04 09:33:41 -08006598template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006599Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006600 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
6601 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
6602 _psub(Dest, MinusOne);
6603 return Dest;
6604}
6605
John Porto4a566862016-01-04 09:33:41 -08006606template <typename TraitsType>
6607Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006608 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006609 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
6610 Ty == IceType_v16i8);
6611 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
6612 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
6613 SizeT Shift =
6614 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
6615 _psll(Reg, Ctx->getConstantInt8(Shift));
6616 return Reg;
6617 } else {
6618 // SSE has no left shift operation for vectors of 8 bit integers.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07006619 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
John Porto7e93c622015-06-23 10:58:57 -07006620 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
6621 Variable *Reg = makeReg(Ty, RegNum);
6622 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
6623 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
6624 return Reg;
6625 }
6626}
6627
Andrew Scull57e12682015-09-16 11:30:19 -07006628/// Construct a mask in a register that can be and'ed with a floating-point
6629/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
6630/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
6631/// ones logically right shifted one bit.
6632// TODO(stichnot): Fix the wala
6633// TODO: above, to represent vector constants in memory.
John Porto4a566862016-01-04 09:33:41 -08006634template <typename TraitsType>
6635Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006636 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006637 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
6638 _psrl(Reg, Ctx->getConstantInt8(1));
6639 return Reg;
6640}
6641
John Porto4a566862016-01-04 09:33:41 -08006642template <typename TraitsType>
6643typename TargetX86Base<TraitsType>::X86OperandMem *
6644TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
6645 uint32_t Offset) {
John Porto7e93c622015-06-23 10:58:57 -07006646 // Ensure that Loc is a stack slot.
Andrew Scull11c9a322015-08-28 14:24:14 -07006647 assert(Slot->mustNotHaveReg());
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08006648 assert(Slot->getRegNum().hasNoValue());
John Porto7e93c622015-06-23 10:58:57 -07006649 // Compute the location of Loc in memory.
Andrew Scull57e12682015-09-16 11:30:19 -07006650 // TODO(wala,stichnot): lea should not
6651 // be required. The address of the stack slot is known at compile time
6652 // (although not until after addProlog()).
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07006653 constexpr Type PointerType = IceType_i32;
John Porto7e93c622015-06-23 10:58:57 -07006654 Variable *Loc = makeReg(PointerType);
6655 _lea(Loc, Slot);
6656 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto4a566862016-01-04 09:33:41 -08006657 return X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07006658}
6659
Jim Stichnothc59288b2015-11-09 11:38:40 -08006660/// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
6661/// Src is assumed to already be legalized. If the source operand is known to
6662/// be a memory or immediate operand, a simple mov will suffice. But if the
6663/// source operand can be a physical register, then it must first be copied into
6664/// a physical register that is truncable to 8-bit, then truncated into a
6665/// physical register that can receive a truncation, and finally copied into the
6666/// result 8-bit register (which in general can be any 8-bit register). For
6667/// example, moving %ebp into %ah may be accomplished as:
6668/// movl %ebp, %edx
6669/// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
6670/// movb %dl, %ah
6671/// On the other hand, moving a memory or immediate operand into ah:
6672/// movb 4(%ebp), %ah
6673/// movb $my_imm, %ah
6674///
6675/// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
6676/// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08006677/// use RegNum=RegNumT() and then let the caller do a separate copy into
Jim Stichnothc59288b2015-11-09 11:38:40 -08006678/// Reg_ah.
6679///
6680/// Note #2. ConstantRelocatable operands are also put through this process
6681/// (not truncated directly) because our ELF emitter does R_386_32 relocations
6682/// but not R_386_8 relocations.
6683///
6684/// Note #3. If Src is a Variable, the result will be an infinite-weight i8
6685/// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
6686/// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
6687/// to the pinsrb instruction.
John Porto4a566862016-01-04 09:33:41 -08006688template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006689Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, RegNumT RegNum) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08006690 Type Ty = Src->getType();
6691 assert(isScalarIntegerType(Ty));
6692 assert(Ty != IceType_i1);
6693 Variable *Reg = makeReg(IceType_i8, RegNum);
6694 Reg->setRegClass(RCX86_IsTrunc8Rcvr);
6695 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
6696 Variable *SrcTruncable = makeReg(Ty);
6697 switch (Ty) {
6698 case IceType_i64:
6699 SrcTruncable->setRegClass(RCX86_Is64To8);
6700 break;
6701 case IceType_i32:
6702 SrcTruncable->setRegClass(RCX86_Is32To8);
6703 break;
6704 case IceType_i16:
6705 SrcTruncable->setRegClass(RCX86_Is16To8);
6706 break;
6707 default:
6708 // i8 - just use default register class
6709 break;
6710 }
6711 Variable *SrcRcvr = makeReg(IceType_i8);
6712 SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
6713 _mov(SrcTruncable, Src);
6714 _mov(SrcRcvr, SrcTruncable);
6715 Src = SrcRcvr;
6716 }
6717 _mov(Reg, Src);
6718 return Reg;
6719}
6720
Andrew Scull9612d322015-07-06 14:53:25 -07006721/// Helper for legalize() to emit the right code to lower an operand to a
6722/// register of the appropriate type.
John Porto4a566862016-01-04 09:33:41 -08006723template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006724Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006725 Type Ty = Src->getType();
6726 Variable *Reg = makeReg(Ty, RegNum);
6727 if (isVectorType(Ty)) {
6728 _movp(Reg, Src);
6729 } else {
6730 _mov(Reg, Src);
6731 }
6732 return Reg;
6733}
6734
John Porto4a566862016-01-04 09:33:41 -08006735template <typename TraitsType>
6736Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006737 RegNumT RegNum) {
Karl Schimpfd4699942016-04-02 09:55:31 -07006738 const bool UseNonsfi = getFlags().getUseNonsfi();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006739 const Type Ty = From->getType();
Andrew Scull57e12682015-09-16 11:30:19 -07006740 // Assert that a physical register is allowed. To date, all calls to
6741 // legalize() allow a physical register. If a physical register needs to be
6742 // explicitly disallowed, then new code will need to be written to force a
6743 // spill.
John Porto7e93c622015-06-23 10:58:57 -07006744 assert(Allowed & Legal_Reg);
Andrew Scull57e12682015-09-16 11:30:19 -07006745 // If we're asking for a specific physical register, make sure we're not
6746 // allowing any other operand kinds. (This could be future work, e.g. allow
6747 // the shl shift amount to be either an immediate or in ecx.)
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08006748 assert(RegNum.hasNoValue() || Allowed == Legal_Reg);
John Porto7e93c622015-06-23 10:58:57 -07006749
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07006750 // Substitute with an available infinite-weight variable if possible. Only do
6751 // this when we are not asking for a specific register, and when the
6752 // substitution is not locked to a specific register, and when the types
6753 // match, in order to capture the vast majority of opportunities and avoid
6754 // corner cases in the lowering.
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08006755 if (RegNum.hasNoValue()) {
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07006756 if (Variable *Subst = getContext().availabilityGet(From)) {
6757 // At this point we know there is a potential substitution available.
6758 if (Subst->mustHaveReg() && !Subst->hasReg()) {
6759 // At this point we know the substitution will have a register.
6760 if (From->getType() == Subst->getType()) {
6761 // At this point we know the substitution's register is compatible.
6762 return Subst;
6763 }
6764 }
6765 }
6766 }
6767
John Porto4a566862016-01-04 09:33:41 -08006768 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07006769 // Before doing anything with a Mem operand, we need to ensure that the
6770 // Base and Index components are in physical registers.
John Porto7e93c622015-06-23 10:58:57 -07006771 Variable *Base = Mem->getBase();
6772 Variable *Index = Mem->getIndex();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006773 Constant *Offset = Mem->getOffset();
John Porto7e93c622015-06-23 10:58:57 -07006774 Variable *RegBase = nullptr;
6775 Variable *RegIndex = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08006776 uint16_t Shift = Mem->getShift();
John Porto7e93c622015-06-23 10:58:57 -07006777 if (Base) {
David Sehr4318a412015-11-11 15:01:55 -08006778 RegBase = llvm::cast<Variable>(
6779 legalize(Base, Legal_Reg | Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07006780 }
6781 if (Index) {
John Porto56958cb2016-01-14 09:18:18 -08006782 // TODO(jpp): perhaps we should only allow Legal_Reg if
6783 // Base->isRematerializable.
David Sehr4318a412015-11-11 15:01:55 -08006784 RegIndex = llvm::cast<Variable>(
6785 legalize(Index, Legal_Reg | Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07006786 }
John Portoac2388c2016-01-22 07:10:56 -08006787
John Porto7e93c622015-06-23 10:58:57 -07006788 if (Base != RegBase || Index != RegIndex) {
John Porto56958cb2016-01-14 09:18:18 -08006789 Mem = X86OperandMem::create(Func, Ty, RegBase, Offset, RegIndex, Shift,
John Portoac2388c2016-01-22 07:10:56 -08006790 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07006791 }
6792
John Portoac2388c2016-01-22 07:10:56 -08006793 // For all Memory Operands, we do randomization/pooling here.
John Porto7e93c622015-06-23 10:58:57 -07006794 From = randomizeOrPoolImmediate(Mem);
6795
6796 if (!(Allowed & Legal_Mem)) {
6797 From = copyToReg(From, RegNum);
6798 }
6799 return From;
6800 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006801
John Porto7e93c622015-06-23 10:58:57 -07006802 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
6803 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006804 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07006805 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07006806 return From;
6807 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07006808 }
6809 // There should be no constants of vector type (other than undef).
6810 assert(!isVectorType(Ty));
6811
John Porto1d235422015-08-12 12:37:53 -07006812 // If the operand is a 64 bit constant integer we need to legalize it to a
6813 // register in x86-64.
6814 if (Traits::Is64Bit) {
6815 if (llvm::isa<ConstantInteger64>(Const)) {
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08006816 if (RegNum.hasValue()) {
John Porto008f4ce2015-12-24 13:22:18 -08006817 assert(Traits::getGprForType(IceType_i64, RegNum) == RegNum);
6818 }
6819 return copyToReg(Const, RegNum);
John Porto1d235422015-08-12 12:37:53 -07006820 }
6821 }
6822
Andrew Scull57e12682015-09-16 11:30:19 -07006823 // If the operand is an 32 bit constant integer, we should check whether we
6824 // need to randomize it or pool it.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08006825 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
John Porto7e93c622015-06-23 10:58:57 -07006826 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
6827 if (NewConst != Const) {
6828 return NewConst;
6829 }
6830 }
6831
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006832 if (auto *CR = llvm::dyn_cast<ConstantRelocatable>(Const)) {
John Portoac2388c2016-01-22 07:10:56 -08006833 // If the operand is a ConstantRelocatable, and Legal_AddrAbs is not
6834 // specified, and UseNonsfi is indicated, we need to add RebasePtr.
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006835 if (UseNonsfi && !(Allowed & Legal_AddrAbs)) {
6836 assert(Ty == IceType_i32);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006837 Variable *NewVar = makeReg(Ty, RegNum);
John Portoac2388c2016-01-22 07:10:56 -08006838 auto *Mem = Traits::X86OperandMem::create(Func, Ty, nullptr, CR);
6839 // LEAs are not automatically sandboxed, thus we explicitly invoke
6840 // _sandbox_mem_reference.
6841 _lea(NewVar, _sandbox_mem_reference(Mem));
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006842 From = NewVar;
6843 }
John Portoac2388c2016-01-22 07:10:56 -08006844 } else if (isScalarFloatingType(Ty)) {
6845 // Convert a scalar floating point constant into an explicit memory
6846 // operand.
Jim Stichnoth99165662015-11-13 14:20:40 -08006847 if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
John Portoccea7932015-11-17 04:58:36 -08006848 if (Utils::isPositiveZero(ConstFloat->getValue()))
Jim Stichnoth99165662015-11-13 14:20:40 -08006849 return makeZeroedRegister(Ty, RegNum);
6850 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
John Portoccea7932015-11-17 04:58:36 -08006851 if (Utils::isPositiveZero(ConstDouble->getValue()))
Jim Stichnoth99165662015-11-13 14:20:40 -08006852 return makeZeroedRegister(Ty, RegNum);
6853 }
John Portoac2388c2016-01-22 07:10:56 -08006854
Jim Stichnoth467ffe52016-03-29 15:01:06 -07006855 auto *CFrom = llvm::cast<Constant>(From);
6856 assert(CFrom->getShouldBePooled());
6857 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08006858 auto *Mem = X86OperandMem::create(Func, Ty, nullptr, Offset);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006859 From = Mem;
John Porto7e93c622015-06-23 10:58:57 -07006860 }
John Portoac2388c2016-01-22 07:10:56 -08006861
John Porto7e93c622015-06-23 10:58:57 -07006862 bool NeedsReg = false;
6863 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
John Portoac2388c2016-01-22 07:10:56 -08006864 // Immediate specifically not allowed.
John Porto7e93c622015-06-23 10:58:57 -07006865 NeedsReg = true;
6866 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
6867 // On x86, FP constants are lowered to mem operands.
6868 NeedsReg = true;
6869 if (NeedsReg) {
6870 From = copyToReg(From, RegNum);
6871 }
6872 return From;
6873 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006874
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07006875 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07006876 // Check if the variable is guaranteed a physical register. This can happen
6877 // either when the variable is pre-colored or when it is assigned infinite
6878 // weight.
Andrew Scull11c9a322015-08-28 14:24:14 -07006879 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
David Sehr4318a412015-11-11 15:01:55 -08006880 bool MustRematerialize =
6881 (Var->isRematerializable() && !(Allowed & Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07006882 // We need a new physical register for the operand if:
David Sehr4318a412015-11-11 15:01:55 -08006883 // - Mem is not allowed and Var isn't guaranteed a physical register, or
6884 // - RegNum is required and Var->getRegNum() doesn't match, or
6885 // - Var is a rematerializable variable and rematerializable pass-through is
6886 // not allowed (in which case we need an lea instruction).
6887 if (MustRematerialize) {
6888 assert(Ty == IceType_i32);
6889 Variable *NewVar = makeReg(Ty, RegNum);
6890 // Since Var is rematerializable, the offset will be added when the lea is
6891 // emitted.
6892 constexpr Constant *NoOffset = nullptr;
John Porto4a566862016-01-04 09:33:41 -08006893 auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset);
David Sehr4318a412015-11-11 15:01:55 -08006894 _lea(NewVar, Mem);
6895 From = NewVar;
6896 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08006897 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
John Porto7e93c622015-06-23 10:58:57 -07006898 From = copyToReg(From, RegNum);
6899 }
6900 return From;
6901 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006902
6903 llvm::report_fatal_error("Unhandled operand kind in legalize()");
John Porto7e93c622015-06-23 10:58:57 -07006904 return From;
6905}
6906
Andrew Scull9612d322015-07-06 14:53:25 -07006907/// Provide a trivial wrapper to legalize() for this common usage.
John Porto4a566862016-01-04 09:33:41 -08006908template <typename TraitsType>
6909Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006910 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006911 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6912}
6913
Jan Voungfbdd2442015-07-15 12:36:20 -07006914/// Legalize undef values to concrete values.
John Porto4a566862016-01-04 09:33:41 -08006915template <typename TraitsType>
6916Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006917 RegNumT RegNum) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006918 Type Ty = From->getType();
6919 if (llvm::isa<ConstantUndef>(From)) {
6920 // Lower undefs to zero. Another option is to lower undefs to an
Andrew Scull57e12682015-09-16 11:30:19 -07006921 // uninitialized register; however, using an uninitialized register results
6922 // in less predictable code.
Jan Voungfbdd2442015-07-15 12:36:20 -07006923 //
Andrew Scull57e12682015-09-16 11:30:19 -07006924 // If in the future the implementation is changed to lower undef values to
6925 // uninitialized registers, a FakeDef will be needed:
John Porto1d937a82015-12-17 06:19:34 -08006926 // Context.insert<InstFakeDef>(Reg);
Jan Voungfbdd2442015-07-15 12:36:20 -07006927 // This is in order to ensure that the live range of Reg is not
Andrew Scull57e12682015-09-16 11:30:19 -07006928 // overestimated. If the constant being lowered is a 64 bit value, then
6929 // the result should be split and the lo and hi components will need to go
6930 // in uninitialized registers.
Jan Voungfbdd2442015-07-15 12:36:20 -07006931 if (isVectorType(Ty))
6932 return makeVectorOfZeros(Ty, RegNum);
6933 return Ctx->getConstantZero(Ty);
6934 }
6935 return From;
6936}
6937
Andrew Scull57e12682015-09-16 11:30:19 -07006938/// For the cmp instruction, if Src1 is an immediate, or known to be a physical
6939/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
6940/// copied into a physical register. (Actually, either Src0 or Src1 can be
6941/// chosen for the physical register, but unfortunately we have to commit to one
6942/// or the other before register allocation.)
John Porto4a566862016-01-04 09:33:41 -08006943template <typename TraitsType>
6944Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0,
6945 Operand *Src1) {
John Porto7e93c622015-06-23 10:58:57 -07006946 bool IsSrc1ImmOrReg = false;
6947 if (llvm::isa<Constant>(Src1)) {
6948 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07006949 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07006950 if (Var->hasReg())
6951 IsSrc1ImmOrReg = true;
6952 }
6953 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
6954}
6955
John Porto4a566862016-01-04 09:33:41 -08006956template <typename TraitsType>
6957typename TargetX86Base<TraitsType>::X86OperandMem *
6958TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty,
6959 bool DoLegalize) {
6960 auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd);
John Porto921856d2015-07-07 11:56:26 -07006961 // It may be the case that address mode optimization already creates an
John Porto4a566862016-01-04 09:33:41 -08006962 // X86OperandMem, so in that case it wouldn't need another level of
John Porto921856d2015-07-07 11:56:26 -07006963 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07006964 if (!Mem) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08006965 auto *Base = llvm::dyn_cast<Variable>(Opnd);
6966 auto *Offset = llvm::dyn_cast<Constant>(Opnd);
John Porto7e93c622015-06-23 10:58:57 -07006967 assert(Base || Offset);
6968 if (Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07006969 // During memory operand building, we do not blind or pool the constant
6970 // offset, we will work on the whole memory operand later as one entity
6971 // later, this save one instruction. By turning blinding and pooling off,
6972 // we guarantee legalize(Offset) will return a Constant*.
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006973 if (!llvm::isa<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07006974 BoolFlagSaver B(RandomizationPoolingPaused, true);
6975
6976 Offset = llvm::cast<Constant>(legalize(Offset));
6977 }
6978
6979 assert(llvm::isa<ConstantInteger32>(Offset) ||
6980 llvm::isa<ConstantRelocatable>(Offset));
6981 }
John Porto56958cb2016-01-14 09:18:18 -08006982 // Not completely sure whether it's OK to leave IsRebased unset when
6983 // creating the mem operand. If DoLegalize is true, it will definitely be
6984 // applied during the legalize() call, but perhaps not during the
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006985 // randomizeOrPoolImmediate() call. In any case, the emit routines will
6986 // assert that PIC legalization has been applied.
John Porto4a566862016-01-04 09:33:41 -08006987 Mem = X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07006988 }
Andrew Scull57e12682015-09-16 11:30:19 -07006989 // Do legalization, which contains randomization/pooling or do
6990 // randomization/pooling.
John Porto4a566862016-01-04 09:33:41 -08006991 return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem)
6992 : randomizeOrPoolImmediate(Mem));
John Porto7e93c622015-06-23 10:58:57 -07006993}
6994
John Porto4a566862016-01-04 09:33:41 -08006995template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08006996Variable *TargetX86Base<TraitsType>::makeReg(Type Type, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07006997 // There aren't any 64-bit integer registers for x86-32.
John Porto1d235422015-08-12 12:37:53 -07006998 assert(Traits::Is64Bit || Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07006999 Variable *Reg = Func->makeVariable(Type);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007000 if (RegNum.hasValue())
John Porto7e93c622015-06-23 10:58:57 -07007001 Reg->setRegNum(RegNum);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007002 else
7003 Reg->setMustHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07007004 return Reg;
7005}
7006
John Porto4a566862016-01-04 09:33:41 -08007007template <typename TraitsType>
7008const Type TargetX86Base<TraitsType>::TypeForSize[] = {
John Porto3c275ce2015-12-22 08:14:00 -08007009 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
John Porto4a566862016-01-04 09:33:41 -08007010template <typename TraitsType>
7011Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size,
7012 uint32_t MaxSize) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07007013 assert(Size != 0);
7014 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7015 uint32_t MaxIndex = MaxSize == NoSizeLimit
7016 ? llvm::array_lengthof(TypeForSize) - 1
7017 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7018 return TypeForSize[std::min(TyIndex, MaxIndex)];
7019}
7020
John Porto4a566862016-01-04 09:33:41 -08007021template <typename TraitsType>
7022Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size,
7023 uint32_t MaxSize) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07007024 assert(Size != 0);
7025 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7026 if (!llvm::isPowerOf2_32(Size))
7027 ++TyIndex;
7028 uint32_t MaxIndex = MaxSize == NoSizeLimit
7029 ? llvm::array_lengthof(TypeForSize) - 1
7030 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7031 return TypeForSize[std::min(TyIndex, MaxIndex)];
7032}
7033
John Porto4a566862016-01-04 09:33:41 -08007034template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07007035 if (Func->getOptLevel() == Opt_m1)
John Porto7e93c622015-06-23 10:58:57 -07007036 return;
Jim Stichnoth230d4102015-09-25 17:40:32 -07007037 markRedefinitions();
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007038 Context.availabilityUpdate();
John Porto7e93c622015-06-23 10:58:57 -07007039}
7040
John Porto4a566862016-01-04 09:33:41 -08007041template <typename TraitsType>
7042void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007043 llvm::SmallVectorImpl<RegNumT> &Permutation,
John Portoe82b5602016-02-24 15:58:55 -08007044 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
Karl Schimpfd4699942016-04-02 09:55:31 -07007045 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters,
7046 Salt);
John Porto7e93c622015-06-23 10:58:57 -07007047}
7048
John Porto4a566862016-01-04 09:33:41 -08007049template <typename TraitsType>
7050void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007051 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007052 return;
7053 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007054 Str << "$" << C->getValue();
John Porto7e93c622015-06-23 10:58:57 -07007055}
7056
John Porto4a566862016-01-04 09:33:41 -08007057template <typename TraitsType>
7058void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const {
John Porto1d235422015-08-12 12:37:53 -07007059 if (!Traits::Is64Bit) {
7060 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
7061 } else {
7062 if (!BuildDefs::dump())
7063 return;
7064 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007065 Str << "$" << C->getValue();
John Porto1d235422015-08-12 12:37:53 -07007066 }
John Porto7e93c622015-06-23 10:58:57 -07007067}
7068
John Porto4a566862016-01-04 09:33:41 -08007069template <typename TraitsType>
7070void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007071 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007072 return;
7073 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007074 Str << C->getLabelName();
John Porto7e93c622015-06-23 10:58:57 -07007075}
7076
John Porto4a566862016-01-04 09:33:41 -08007077template <typename TraitsType>
7078void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007079 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007080 return;
7081 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007082 Str << C->getLabelName();
John Porto7e93c622015-06-23 10:58:57 -07007083}
7084
John Porto4a566862016-01-04 09:33:41 -08007085template <typename TraitsType>
7086void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const {
John Porto7e93c622015-06-23 10:58:57 -07007087 llvm::report_fatal_error("undef value encountered by emitter.");
7088}
7089
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007090template <class Machine>
7091void TargetX86Base<Machine>::emit(const ConstantRelocatable *C) const {
7092 if (!BuildDefs::dump())
7093 return;
Karl Schimpfd4699942016-04-02 09:55:31 -07007094 assert(!getFlags().getUseNonsfi() ||
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007095 C->getName().toString() == GlobalOffsetTable);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007096 Ostream &Str = Ctx->getStrEmit();
7097 Str << "$";
7098 emitWithoutPrefix(C);
7099}
7100
Andrew Scull9612d322015-07-06 14:53:25 -07007101/// Randomize or pool an Immediate.
John Porto4a566862016-01-04 09:33:41 -08007102template <typename TraitsType>
7103Operand *
7104TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007105 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007106 assert(llvm::isa<ConstantInteger32>(Immediate) ||
7107 llvm::isa<ConstantRelocatable>(Immediate));
Karl Schimpfd4699942016-04-02 09:55:31 -07007108 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
John Porto7e93c622015-06-23 10:58:57 -07007109 RandomizationPoolingPaused == true) {
7110 // Immediates randomization/pooling off or paused
7111 return Immediate;
7112 }
John Porto56958cb2016-01-14 09:18:18 -08007113
7114 if (Traits::Is64Bit && NeedSandboxing) {
7115 // Immediate randomization/pooling is currently disabled for x86-64
7116 // sandboxing for it could generate invalid memory operands.
7117 assert(false &&
7118 "Constant pooling/randomization is disabled for x8664 sandbox.");
7119 return Immediate;
John Porto7e93c622015-06-23 10:58:57 -07007120 }
John Porto56958cb2016-01-14 09:18:18 -08007121
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007122 if (!Immediate->shouldBeRandomizedOrPooled()) {
John Porto56958cb2016-01-14 09:18:18 -08007123 // the constant Immediate is not eligible for blinding/pooling
7124 return Immediate;
7125 }
7126 Ctx->statsUpdateRPImms();
Karl Schimpfd4699942016-04-02 09:55:31 -07007127 switch (getFlags().getRandomizeAndPoolImmediatesOption()) {
John Porto56958cb2016-01-14 09:18:18 -08007128 default:
7129 llvm::report_fatal_error("Unsupported -randomize-pool-immediates option");
7130 case RPI_Randomize: {
7131 // blind the constant
7132 // FROM:
7133 // imm
7134 // TO:
7135 // insert: mov imm+cookie, Reg
7136 // insert: lea -cookie[Reg], Reg
7137 // => Reg
7138 // If we have already assigned a phy register, we must come from
7139 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
7140 // assigned register as this assignment is that start of its use-def
7141 // chain. So we add RegNum argument here. Note we use 'lea' instruction
7142 // instead of 'xor' to avoid affecting the flags.
7143 Variable *Reg = makeReg(IceType_i32, RegNum);
7144 auto *Integer = llvm::cast<ConstantInteger32>(Immediate);
7145 uint32_t Value = Integer->getValue();
7146 uint32_t Cookie = Func->getConstantBlindingCookie();
7147 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
7148 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
7149 _lea(Reg, X86OperandMem::create(Func, IceType_i32, Reg, Offset));
7150 if (Immediate->getType() == IceType_i32) {
7151 return Reg;
7152 }
7153 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
7154 _mov(TruncReg, Reg);
7155 return TruncReg;
7156 }
7157 case RPI_Pool: {
7158 // pool the constant
7159 // FROM:
7160 // imm
7161 // TO:
7162 // insert: mov $label, Reg
7163 // => Reg
Karl Schimpfd4699942016-04-02 09:55:31 -07007164 assert(getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007165 assert(Immediate->getShouldBePooled());
John Porto56958cb2016-01-14 09:18:18 -08007166 // if we have already assigned a phy register, we must come from
7167 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
7168 // assigned register as this assignment is that start of its use-def
7169 // chain. So we add RegNum argument here.
7170 Variable *Reg = makeReg(Immediate->getType(), RegNum);
John Porto56958cb2016-01-14 09:18:18 -08007171 constexpr RelocOffsetT Offset = 0;
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007172 Constant *Symbol = Ctx->getConstantSym(Offset, Immediate->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007173 constexpr Variable *NoBase = nullptr;
7174 X86OperandMem *MemOperand =
7175 X86OperandMem::create(Func, Immediate->getType(), NoBase, Symbol);
John Porto56958cb2016-01-14 09:18:18 -08007176 _mov(Reg, MemOperand);
7177 return Reg;
7178 }
7179 }
John Porto7e93c622015-06-23 10:58:57 -07007180}
7181
John Porto4a566862016-01-04 09:33:41 -08007182template <typename TraitsType>
7183typename TargetX86Base<TraitsType>::X86OperandMem *
7184TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007185 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007186 assert(MemOperand);
Karl Schimpfd4699942016-04-02 09:55:31 -07007187 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
John Porto7e93c622015-06-23 10:58:57 -07007188 RandomizationPoolingPaused == true) {
7189 // immediates randomization/pooling is turned off
7190 return MemOperand;
7191 }
7192
John Porto56958cb2016-01-14 09:18:18 -08007193 if (Traits::Is64Bit && NeedSandboxing) {
7194 // Immediate randomization/pooling is currently disabled for x86-64
7195 // sandboxing for it could generate invalid memory operands.
7196 assert(false &&
7197 "Constant pooling/randomization is disabled for x8664 sandbox.");
7198 return MemOperand;
7199 }
7200
Andrew Scull57e12682015-09-16 11:30:19 -07007201 // If this memory operand is already a randomized one, we do not randomize it
7202 // again.
John Porto7e93c622015-06-23 10:58:57 -07007203 if (MemOperand->getRandomized())
7204 return MemOperand;
7205
John Porto56958cb2016-01-14 09:18:18 -08007206 auto *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset());
John Porto7e93c622015-06-23 10:58:57 -07007207
John Porto56958cb2016-01-14 09:18:18 -08007208 if (C == nullptr) {
7209 return MemOperand;
John Porto7e93c622015-06-23 10:58:57 -07007210 }
John Porto7e93c622015-06-23 10:58:57 -07007211
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007212 if (!C->shouldBeRandomizedOrPooled()) {
John Porto56958cb2016-01-14 09:18:18 -08007213 return MemOperand;
7214 }
7215
7216 // The offset of this mem operand should be blinded or pooled
7217 Ctx->statsUpdateRPImms();
Karl Schimpfd4699942016-04-02 09:55:31 -07007218 switch (getFlags().getRandomizeAndPoolImmediatesOption()) {
John Porto56958cb2016-01-14 09:18:18 -08007219 default:
7220 llvm::report_fatal_error("Unsupported -randomize-pool-immediates option");
7221 case RPI_Randomize: {
7222 // blind the constant offset
7223 // FROM:
7224 // offset[base, index, shift]
7225 // TO:
7226 // insert: lea offset+cookie[base], RegTemp
7227 // => -cookie[RegTemp, index, shift]
7228 uint32_t Value =
7229 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())->getValue();
7230 uint32_t Cookie = Func->getConstantBlindingCookie();
7231 Constant *Mask1 =
7232 Ctx->getConstantInt(MemOperand->getOffset()->getType(), Cookie + Value);
7233 Constant *Mask2 =
7234 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
7235
7236 X86OperandMem *TempMemOperand = X86OperandMem::create(
7237 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
7238 // If we have already assigned a physical register, we must come from
7239 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
7240 // the assigned register as this assignment is that start of its
7241 // use-def chain. So we add RegNum argument here.
7242 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
7243 _lea(RegTemp, TempMemOperand);
7244
7245 X86OperandMem *NewMemOperand = X86OperandMem::create(
7246 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
7247 MemOperand->getShift(), MemOperand->getSegmentRegister(),
7248 MemOperand->getIsRebased());
7249
7250 // Label this memory operand as randomized, so we won't randomize it
7251 // again in case we call legalize() multiple times on this memory
7252 // operand.
7253 NewMemOperand->setRandomized(true);
7254 return NewMemOperand;
7255 }
7256 case RPI_Pool: {
7257 // pool the constant offset
7258 // FROM:
7259 // offset[base, index, shift]
7260 // TO:
7261 // insert: mov $label, RegTemp
7262 // insert: lea [base, RegTemp], RegTemp
7263 // =>[RegTemp, index, shift]
7264
7265 // Memory operand should never exist as source operands in phi lowering
7266 // assignments, so there is no need to reuse any registers here. For
7267 // phi lowering, we should not ask for new physical registers in
7268 // general. However, if we do meet Memory Operand during phi lowering,
7269 // we should not blind or pool the immediates for now.
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007270 if (RegNum.hasValue())
John Porto56958cb2016-01-14 09:18:18 -08007271 return MemOperand;
7272 Variable *RegTemp = makeReg(IceType_i32);
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007273 assert(MemOperand->getOffset()->getShouldBePooled());
John Porto56958cb2016-01-14 09:18:18 -08007274 constexpr RelocOffsetT SymOffset = 0;
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007275 Constant *Symbol =
7276 Ctx->getConstantSym(SymOffset, MemOperand->getOffset()->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007277 constexpr Variable *NoBase = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08007278 X86OperandMem *SymbolOperand = X86OperandMem::create(
John Portoac2388c2016-01-22 07:10:56 -08007279 Func, MemOperand->getOffset()->getType(), NoBase, Symbol);
John Porto56958cb2016-01-14 09:18:18 -08007280 _mov(RegTemp, SymbolOperand);
7281 // If we have a base variable here, we should add the lea instruction
7282 // to add the value of the base variable to RegTemp. If there is no
7283 // base variable, we won't need this lea instruction.
7284 if (MemOperand->getBase()) {
7285 X86OperandMem *CalculateOperand = X86OperandMem::create(
7286 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, RegTemp,
7287 0, MemOperand->getSegmentRegister());
7288 _lea(RegTemp, CalculateOperand);
7289 }
7290 X86OperandMem *NewMemOperand = X86OperandMem::create(
7291 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),
7292 MemOperand->getShift(), MemOperand->getSegmentRegister());
7293 return NewMemOperand;
7294 }
7295 }
7296}
David Sehr6b80cf12016-01-21 23:16:58 -08007297
7298template <typename TraitsType>
7299void TargetX86Base<TraitsType>::emitJumpTable(
John Porto03077212016-04-05 06:30:21 -07007300 const Cfg *, const InstJumpTable *JumpTable) const {
David Sehr6b80cf12016-01-21 23:16:58 -08007301 if (!BuildDefs::dump())
7302 return;
7303 Ostream &Str = Ctx->getStrEmit();
Karl Schimpfd4699942016-04-02 09:55:31 -07007304 const bool UseNonsfi = getFlags().getUseNonsfi();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007305 const char *Prefix = UseNonsfi ? ".data.rel.ro." : ".rodata.";
John Porto03077212016-04-05 06:30:21 -07007306 Str << "\t.section\t" << Prefix << JumpTable->getSectionName()
7307 << ",\"a\",@progbits\n"
7308 "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"
7309 << JumpTable->getName() << ":";
David Sehr6b80cf12016-01-21 23:16:58 -08007310
7311 // On X86 ILP32 pointers are 32-bit hence the use of .long
7312 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
7313 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
7314 Str << "\n";
7315}
7316
7317template <typename TraitsType>
7318template <typename T>
7319void TargetDataX86<TraitsType>::emitConstantPool(GlobalContext *Ctx) {
7320 if (!BuildDefs::dump())
7321 return;
7322 Ostream &Str = Ctx->getStrEmit();
7323 Type Ty = T::Ty;
7324 SizeT Align = typeAlignInBytes(Ty);
7325 ConstantList Pool = Ctx->getConstantPool(Ty);
7326
7327 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
7328 << "\n";
7329 Str << "\t.align\t" << Align << "\n";
7330
7331 // If reorder-pooled-constants option is set to true, we need to shuffle the
7332 // constant pool before emitting it.
Karl Schimpfd4699942016-04-02 09:55:31 -07007333 if (getFlags().getReorderPooledConstants() && !Pool.empty()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007334 // Use the constant's kind value as the salt for creating random number
7335 // generator.
7336 Operand::OperandKind K = (*Pool.begin())->getKind();
Karl Schimpfd4699942016-04-02 09:55:31 -07007337 RandomNumberGenerator RNG(getFlags().getRandomSeed(),
David Sehr6b80cf12016-01-21 23:16:58 -08007338 RPE_PooledConstantReordering, K);
7339 RandomShuffle(Pool.begin(), Pool.end(),
7340 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });
7341 }
7342
7343 for (Constant *C : Pool) {
7344 if (!C->getShouldBePooled())
7345 continue;
7346 auto *Const = llvm::cast<typename T::IceType>(C);
7347 typename T::IceType::PrimType Value = Const->getValue();
7348 // Use memcpy() to copy bits from Value into RawValue in a way that avoids
7349 // breaking strict-aliasing rules.
7350 typename T::PrimitiveIntType RawValue;
7351 memcpy(&RawValue, &Value, sizeof(Value));
7352 char buf[30];
7353 int CharsPrinted =
7354 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
7355 assert(CharsPrinted >= 0);
7356 assert((size_t)CharsPrinted < llvm::array_lengthof(buf));
7357 (void)CharsPrinted; // avoid warnings if asserts are disabled
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007358 Str << Const->getLabelName();
David Sehr6b80cf12016-01-21 23:16:58 -08007359 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t/* " << T::TypeName << " "
7360 << Value << " */\n";
7361 }
7362}
7363
7364template <typename TraitsType>
7365void TargetDataX86<TraitsType>::lowerConstants() {
Karl Schimpfd4699942016-04-02 09:55:31 -07007366 if (getFlags().getDisableTranslation())
David Sehr6b80cf12016-01-21 23:16:58 -08007367 return;
Karl Schimpfd4699942016-04-02 09:55:31 -07007368 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007369 case FT_Elf: {
7370 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7371
7372 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
7373 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
7374 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
7375
7376 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7377 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7378 } break;
7379 case FT_Asm:
7380 case FT_Iasm: {
7381 OstreamLocker L(Ctx);
7382
7383 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
7384 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
7385 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
7386
7387 emitConstantPool<PoolTypeConverter<float>>(Ctx);
7388 emitConstantPool<PoolTypeConverter<double>>(Ctx);
7389 } break;
7390 }
7391}
7392
7393template <typename TraitsType>
7394void TargetDataX86<TraitsType>::lowerJumpTables() {
Karl Schimpfd4699942016-04-02 09:55:31 -07007395 const bool IsPIC = getFlags().getUseNonsfi();
7396 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007397 case FT_Elf: {
7398 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7399 for (const JumpTableData &JT : Ctx->getJumpTables())
7400 Writer->writeJumpTable(JT, Traits::FK_Abs, IsPIC);
7401 } break;
7402 case FT_Asm:
7403 // Already emitted from Cfg
7404 break;
7405 case FT_Iasm: {
7406 if (!BuildDefs::dump())
7407 return;
7408 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007409 const char *Prefix = IsPIC ? ".data.rel.ro." : ".rodata.";
David Sehr6b80cf12016-01-21 23:16:58 -08007410 for (const JumpTableData &JT : Ctx->getJumpTables()) {
John Porto03077212016-04-05 06:30:21 -07007411 Str << "\t.section\t" << Prefix << JT.getSectionName()
7412 << ",\"a\",@progbits\n"
7413 "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"
7414 << JT.getName().toString() << ":";
David Sehr6b80cf12016-01-21 23:16:58 -08007415
7416 // On X8664 ILP32 pointers are 32-bit hence the use of .long
7417 for (intptr_t TargetOffset : JT.getTargetOffsets())
7418 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
7419 Str << "\n";
7420 }
7421 } break;
7422 }
7423}
7424
7425template <typename TraitsType>
7426void TargetDataX86<TraitsType>::lowerGlobals(
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007427 const VariableDeclarationList &Vars, const std::string &SectionSuffix) {
Karl Schimpfd4699942016-04-02 09:55:31 -07007428 const bool IsPIC = getFlags().getUseNonsfi();
7429 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007430 case FT_Elf: {
7431 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7432 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC);
7433 } break;
7434 case FT_Asm:
7435 case FT_Iasm: {
David Sehr6b80cf12016-01-21 23:16:58 -08007436 OstreamLocker L(Ctx);
7437 for (const VariableDeclaration *Var : Vars) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07007438 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
David Sehr6b80cf12016-01-21 23:16:58 -08007439 emitGlobal(*Var, SectionSuffix);
7440 }
7441 }
7442 } break;
7443 }
7444}
John Porto4a566862016-01-04 09:33:41 -08007445} // end of namespace X86NAMESPACE
John Porto7e93c622015-06-23 10:58:57 -07007446} // end of namespace Ice
7447
7448#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H