blob: 8dc8238ef1e95d3a39e171cbd4fbd52ab5f78b52 [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
Jim Stichnoth92a6e5b2015-12-02 16:52:44 -080011/// \brief Implements the TargetLoweringX86Base class, which consists almost
Andrew Scull57e12682015-09-16 11:30:19 -070012/// entirely of the lowering sequence for each high-level instruction.
Andrew Scull9612d322015-07-06 14:53:25 -070013///
John Porto7e93c622015-06-23 10:58:57 -070014//===----------------------------------------------------------------------===//
15
16#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18
John Porto7e93c622015-06-23 10:58:57 -070019#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceClFlags.h"
22#include "IceDefs.h"
23#include "IceELFObjectWriter.h"
24#include "IceGlobalInits.h"
John Portoec3f5652015-08-31 15:07:09 -070025#include "IceInstVarIter.h"
John Porto7e93c622015-06-23 10:58:57 -070026#include "IceLiveness.h"
27#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070028#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070029#include "IceUtils.h"
John Porto4a566862016-01-04 09:33:41 -080030#include "IceInstX86Base.h"
John Porto67f8de92015-06-25 10:14:17 -070031#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070032
Andrew Scull87f80c12015-07-20 10:19:16 -070033#include <stack>
34
John Porto7e93c622015-06-23 10:58:57 -070035namespace Ice {
David Sehr6b80cf12016-01-21 23:16:58 -080036namespace X86 {
37template <typename T> struct PoolTypeConverter {};
38
39template <> struct PoolTypeConverter<float> {
40 using PrimitiveIntType = uint32_t;
41 using IceType = ConstantFloat;
42 static const Type Ty = IceType_f32;
43 static const char *TypeName;
44 static const char *AsmTag;
45 static const char *PrintfString;
46};
47
48template <> struct PoolTypeConverter<double> {
49 using PrimitiveIntType = uint64_t;
50 using IceType = ConstantDouble;
51 static const Type Ty = IceType_f64;
52 static const char *TypeName;
53 static const char *AsmTag;
54 static const char *PrintfString;
55};
56
57// Add converter for int type constant pooling
58template <> struct PoolTypeConverter<uint32_t> {
59 using PrimitiveIntType = uint32_t;
60 using IceType = ConstantInteger32;
61 static const Type Ty = IceType_i32;
62 static const char *TypeName;
63 static const char *AsmTag;
64 static const char *PrintfString;
65};
66
67// Add converter for int type constant pooling
68template <> struct PoolTypeConverter<uint16_t> {
69 using PrimitiveIntType = uint32_t;
70 using IceType = ConstantInteger32;
71 static const Type Ty = IceType_i16;
72 static const char *TypeName;
73 static const char *AsmTag;
74 static const char *PrintfString;
75};
76
77// Add converter for int type constant pooling
78template <> struct PoolTypeConverter<uint8_t> {
79 using PrimitiveIntType = uint32_t;
80 using IceType = ConstantInteger32;
81 static const Type Ty = IceType_i8;
82 static const char *TypeName;
83 static const char *AsmTag;
84 static const char *PrintfString;
85};
86} // end of namespace X86
87
John Porto4a566862016-01-04 09:33:41 -080088namespace X86NAMESPACE {
John Porto7e93c622015-06-23 10:58:57 -070089
Eric Holkd6cf6b32016-02-17 11:09:48 -080090using Utils::BoolFlagSaver;
John Porto7e93c622015-06-23 10:58:57 -070091
John Porto4a566862016-01-04 09:33:41 -080092template <typename Traits> class BoolFoldingEntry {
John Porto7e93c622015-06-23 10:58:57 -070093 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
94
95public:
96 BoolFoldingEntry() = default;
97 explicit BoolFoldingEntry(Inst *I);
98 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -070099 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -0700100 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -0700101 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -0700102 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -0700103 /// IsLiveOut is initialized conservatively to true, and is set to false when
Andrew Scull57e12682015-09-16 11:30:19 -0700104 /// we encounter an instruction that ends Var's live range. We disable the
105 /// folding optimization when Var is live beyond this basic block. Note that
Andrew Scull9612d322015-07-06 14:53:25 -0700106 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
107 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -0700108 bool IsLiveOut = true;
109 // NumUses counts the number of times Var is used as a source operand in the
Andrew Scull57e12682015-09-16 11:30:19 -0700110 // basic block. If IsComplex is true and there is more than one use of Var,
John Porto7e93c622015-06-23 10:58:57 -0700111 // then the folding optimization is disabled for Var.
112 uint32_t NumUses = 0;
113};
114
John Porto4a566862016-01-04 09:33:41 -0800115template <typename Traits> class BoolFolding {
John Porto7e93c622015-06-23 10:58:57 -0700116public:
117 enum BoolFoldingProducerKind {
118 PK_None,
John Porto1d235422015-08-12 12:37:53 -0700119 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
John Porto7e93c622015-06-23 10:58:57 -0700120 PK_Icmp32,
121 PK_Icmp64,
122 PK_Fcmp,
David Sehrdaf096c2015-11-11 10:56:58 -0800123 PK_Trunc,
John Porto7b3d9cb2015-11-11 14:26:57 -0800124 PK_Arith // A flag-setting arithmetic instruction.
John Porto7e93c622015-06-23 10:58:57 -0700125 };
126
Andrew Scull9612d322015-07-06 14:53:25 -0700127 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -0700128 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -0700129 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -0700130 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
131
132private:
133 BoolFolding(const BoolFolding &) = delete;
134 BoolFolding &operator=(const BoolFolding &) = delete;
135
136public:
137 BoolFolding() = default;
138 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
139 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
140 static bool hasComplexLowering(const Inst *Instr);
David Sehre3984282015-12-15 17:34:55 -0800141 static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
142 BoolFoldingConsumerKind ConsumerKind);
John Porto7e93c622015-06-23 10:58:57 -0700143 void init(CfgNode *Node);
144 const Inst *getProducerFor(const Operand *Opnd) const;
145 void dump(const Cfg *Func) const;
146
147private:
Andrew Scull9612d322015-07-06 14:53:25 -0700148 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700149 bool containsValid(SizeT VarNum) const {
150 auto Element = Producers.find(VarNum);
151 return Element != Producers.end() && Element->second.Instr != nullptr;
152 }
153 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700154 void invalidateProducersOnStore(const Inst *Instr);
Andrew Scull9612d322015-07-06 14:53:25 -0700155 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Portoe82b5602016-02-24 15:58:55 -0800156 CfgUnorderedMap<SizeT, BoolFoldingEntry<Traits>> Producers;
John Porto7e93c622015-06-23 10:58:57 -0700157};
158
John Porto4a566862016-01-04 09:33:41 -0800159template <typename Traits>
160BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I)
161 : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {}
John Porto7e93c622015-06-23 10:58:57 -0700162
John Porto4a566862016-01-04 09:33:41 -0800163template <typename Traits>
164typename BoolFolding<Traits>::BoolFoldingProducerKind
165BoolFolding<Traits>::getProducerKind(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700166 if (llvm::isa<InstIcmp>(Instr)) {
John Porto4a566862016-01-04 09:33:41 -0800167 if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -0700168 return PK_Icmp32;
David Sehrd9810252015-10-16 13:23:17 -0700169 return PK_Icmp64;
John Porto7e93c622015-06-23 10:58:57 -0700170 }
John Porto7e93c622015-06-23 10:58:57 -0700171 if (llvm::isa<InstFcmp>(Instr))
172 return PK_Fcmp;
David Sehrdaf096c2015-11-11 10:56:58 -0800173 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
John Porto4a566862016-01-04 09:33:41 -0800174 if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
David Sehrdaf096c2015-11-11 10:56:58 -0800175 switch (Arith->getOp()) {
176 default:
177 return PK_None;
178 case InstArithmetic::And:
179 case InstArithmetic::Or:
180 return PK_Arith;
181 }
182 }
183 }
184 return PK_None; // TODO(stichnot): remove this
185
John Porto7e93c622015-06-23 10:58:57 -0700186 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
187 switch (Cast->getCastKind()) {
188 default:
189 return PK_None;
190 case InstCast::Trunc:
191 return PK_Trunc;
192 }
193 }
194 return PK_None;
195}
196
John Porto4a566862016-01-04 09:33:41 -0800197template <typename Traits>
198typename BoolFolding<Traits>::BoolFoldingConsumerKind
199BoolFolding<Traits>::getConsumerKind(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700200 if (llvm::isa<InstBr>(Instr))
201 return CK_Br;
202 if (llvm::isa<InstSelect>(Instr))
203 return CK_Select;
204 return CK_None; // TODO(stichnot): remove this
205
206 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
207 switch (Cast->getCastKind()) {
208 default:
209 return CK_None;
210 case InstCast::Sext:
211 return CK_Sext;
212 case InstCast::Zext:
213 return CK_Zext;
214 }
215 }
216 return CK_None;
217}
218
John Porto921856d2015-07-07 11:56:26 -0700219/// Returns true if the producing instruction has a "complex" lowering sequence.
220/// This generally means that its lowering sequence requires more than one
221/// conditional branch, namely 64-bit integer compares and some floating-point
Andrew Scull57e12682015-09-16 11:30:19 -0700222/// compares. When this is true, and there is more than one consumer, we prefer
John Porto921856d2015-07-07 11:56:26 -0700223/// to disable the folding optimization because it minimizes branches.
John Porto4a566862016-01-04 09:33:41 -0800224template <typename Traits>
225bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700226 switch (getProducerKind(Instr)) {
227 default:
228 return false;
229 case PK_Icmp64:
John Porto4a566862016-01-04 09:33:41 -0800230 return !Traits::Is64Bit;
John Porto7e93c622015-06-23 10:58:57 -0700231 case PK_Fcmp:
John Porto4a566862016-01-04 09:33:41 -0800232 return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
233 Traits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700234 }
235}
236
John Porto4a566862016-01-04 09:33:41 -0800237template <typename Traits>
238bool BoolFolding<Traits>::isValidFolding(
239 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind,
240 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) {
David Sehre3984282015-12-15 17:34:55 -0800241 switch (ProducerKind) {
242 default:
243 return false;
244 case PK_Icmp32:
245 case PK_Icmp64:
246 case PK_Fcmp:
247 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
248 case PK_Arith:
249 return ConsumerKind == CK_Br;
250 }
251}
252
John Porto4a566862016-01-04 09:33:41 -0800253template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
John Porto7e93c622015-06-23 10:58:57 -0700254 Producers.clear();
255 for (Inst &Instr : Node->getInsts()) {
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700256 if (Instr.isDeleted())
257 continue;
258 invalidateProducersOnStore(&Instr);
John Porto7e93c622015-06-23 10:58:57 -0700259 // Check whether Instr is a valid producer.
260 Variable *Var = Instr.getDest();
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700261 if (Var // only consider instructions with an actual dest var
John Porto7e93c622015-06-23 10:58:57 -0700262 && Var->getType() == IceType_i1 // only bool-type dest vars
263 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
John Porto4a566862016-01-04 09:33:41 -0800264 Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
John Porto7e93c622015-06-23 10:58:57 -0700265 }
266 // Check each src variable against the map.
John Portoec3f5652015-08-31 15:07:09 -0700267 FOREACH_VAR_IN_INST(Var, Instr) {
268 SizeT VarNum = Var->getIndex();
David Sehre3984282015-12-15 17:34:55 -0800269 if (!containsValid(VarNum))
270 continue;
271 // All valid consumers use Var as the first source operand
272 if (IndexOfVarOperandInInst(Var) != 0) {
273 setInvalid(VarNum);
274 continue;
275 }
276 // Consumer instructions must be white-listed
John Porto4a566862016-01-04 09:33:41 -0800277 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind =
278 getConsumerKind(&Instr);
David Sehre3984282015-12-15 17:34:55 -0800279 if (ConsumerKind == CK_None) {
280 setInvalid(VarNum);
281 continue;
282 }
John Porto4a566862016-01-04 09:33:41 -0800283 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind =
284 getProducerKind(Producers[VarNum].Instr);
David Sehre3984282015-12-15 17:34:55 -0800285 if (!isValidFolding(ProducerKind, ConsumerKind)) {
286 setInvalid(VarNum);
287 continue;
288 }
289 // Avoid creating multiple copies of complex producer instructions.
290 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
291 setInvalid(VarNum);
292 continue;
293 }
294 ++Producers[VarNum].NumUses;
295 if (Instr.isLastUse(Var)) {
296 Producers[VarNum].IsLiveOut = false;
John Porto7e93c622015-06-23 10:58:57 -0700297 }
298 }
299 }
300 for (auto &I : Producers) {
301 // Ignore entries previously marked invalid.
302 if (I.second.Instr == nullptr)
303 continue;
304 // Disable the producer if its dest may be live beyond this block.
305 if (I.second.IsLiveOut) {
306 setInvalid(I.first);
307 continue;
308 }
Andrew Scull57e12682015-09-16 11:30:19 -0700309 // Mark as "dead" rather than outright deleting. This is so that other
John Porto921856d2015-07-07 11:56:26 -0700310 // peephole style optimizations during or before lowering have access to
Andrew Scull57e12682015-09-16 11:30:19 -0700311 // this instruction in undeleted form. See for example
John Porto921856d2015-07-07 11:56:26 -0700312 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700313 I.second.Instr->setDead();
314 }
315}
316
John Porto4a566862016-01-04 09:33:41 -0800317template <typename Traits>
318const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const {
John Porto7e93c622015-06-23 10:58:57 -0700319 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
320 if (Var == nullptr)
321 return nullptr;
322 SizeT VarNum = Var->getIndex();
323 auto Element = Producers.find(VarNum);
324 if (Element == Producers.end())
325 return nullptr;
326 return Element->second.Instr;
327}
328
John Porto4a566862016-01-04 09:33:41 -0800329template <typename Traits>
330void BoolFolding<Traits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700331 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700332 return;
333 OstreamLocker L(Func->getContext());
334 Ostream &Str = Func->getContext()->getStrDump();
335 for (auto &I : Producers) {
336 if (I.second.Instr == nullptr)
337 continue;
338 Str << "Found foldable producer:\n ";
339 I.second.Instr->dump(Func);
340 Str << "\n";
341 }
342}
343
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700344/// If the given instruction has potential memory side effects (e.g. store, rmw,
345/// or a call instruction with potential memory side effects), then we must not
346/// allow a pre-store Producer instruction with memory operands to be folded
347/// into a post-store Consumer instruction. If this is detected, the Producer
348/// is invalidated.
349///
350/// We use the Producer's IsLiveOut field to determine whether any potential
351/// Consumers come after this store instruction. The IsLiveOut field is
352/// initialized to true, and BoolFolding::init() sets IsLiveOut to false when it
353/// sees the variable's definitive last use (indicating the variable is not in
354/// the node's live-out set). Thus if we see here that IsLiveOut is false, we
355/// know that there can be no consumers after the store, and therefore we know
356/// the folding is safe despite the store instruction.
357template <typename Traits>
358void BoolFolding<Traits>::invalidateProducersOnStore(const Inst *Instr) {
359 if (!Instr->isMemoryWrite())
360 return;
361 for (auto &ProducerPair : Producers) {
362 if (!ProducerPair.second.IsLiveOut)
363 continue;
364 Inst *PInst = ProducerPair.second.Instr;
365 if (PInst == nullptr)
366 continue;
367 bool HasMemOperand = false;
368 const SizeT SrcSize = PInst->getSrcSize();
369 for (SizeT I = 0; I < SrcSize; ++I) {
370 if (llvm::isa<typename Traits::X86OperandMem>(PInst->getSrc(I))) {
371 HasMemOperand = true;
372 break;
373 }
374 }
375 if (!HasMemOperand)
376 continue;
377 setInvalid(ProducerPair.first);
378 }
379}
380
John Porto4a566862016-01-04 09:33:41 -0800381template <typename TraitsType>
382void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) {
John Porto7e93c622015-06-23 10:58:57 -0700383 FoldingInfo.init(Node);
384 FoldingInfo.dump(Func);
385}
386
John Porto4a566862016-01-04 09:33:41 -0800387template <typename TraitsType>
388TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func)
John Portoac2388c2016-01-22 07:10:56 -0800389 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {
John Porto7e93c622015-06-23 10:58:57 -0700390 static_assert(
391 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
392 (TargetInstructionSet::X86InstructionSet_End -
393 TargetInstructionSet::X86InstructionSet_Begin),
394 "Traits::InstructionSet range different from TargetInstructionSet");
Karl Schimpfd4699942016-04-02 09:55:31 -0700395 if (getFlags().getTargetInstructionSet() !=
John Porto7e93c622015-06-23 10:58:57 -0700396 TargetInstructionSet::BaseInstructionSet) {
John Porto4a566862016-01-04 09:33:41 -0800397 InstructionSet = static_cast<InstructionSetEnum>(
Karl Schimpfd4699942016-04-02 09:55:31 -0700398 (getFlags().getTargetInstructionSet() -
John Porto7e93c622015-06-23 10:58:57 -0700399 TargetInstructionSet::X86InstructionSet_Begin) +
400 Traits::InstructionSet::Begin);
401 }
Jim Stichnoth94844f12015-11-04 16:06:16 -0800402}
403
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800404template <typename TraitsType>
Karl Schimpf5403f5d2016-01-15 11:07:46 -0800405void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) {
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800406 RegNumT::setLimit(Traits::RegisterSet::Reg_NUM);
Karl Schimpfd4699942016-04-02 09:55:31 -0700407 Traits::initRegisterSet(getFlags(), &TypeToRegisterSet, &RegisterAliases);
Jim Stichnothb40595a2016-01-29 06:14:31 -0800408 for (size_t i = 0; i < TypeToRegisterSet.size(); ++i)
409 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
Karl Schimpf5403f5d2016-01-15 11:07:46 -0800410 filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM,
411 TypeToRegisterSet.data(), TypeToRegisterSet.size(),
Jim Stichnoth2544d4d2016-01-22 13:07:46 -0800412 Traits::getRegName, getRegClassName);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800413 PcRelFixup = Traits::FK_PcRel;
Karl Schimpfd4699942016-04-02 09:55:31 -0700414 AbsFixup = getFlags().getUseNonsfi() ? Traits::FK_Gotoff : Traits::FK_Abs;
John Porto7e93c622015-06-23 10:58:57 -0700415}
416
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700417template <typename TraitsType>
418bool TargetX86Base<TraitsType>::shouldBePooled(const Constant *C) {
419 if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(C)) {
420 return !Utils::isPositiveZero(ConstFloat->getValue());
421 }
422 if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
423 return !Utils::isPositiveZero(ConstDouble->getValue());
424 }
Karl Schimpfd4699942016-04-02 09:55:31 -0700425 if (getFlags().getRandomizeAndPoolImmediatesOption() != RPI_Pool) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700426 return false;
427 }
428 return C->shouldBeRandomizedOrPooled();
429}
430
John Porto4a566862016-01-04 09:33:41 -0800431template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
John Porto7e93c622015-06-23 10:58:57 -0700432 TimerMarker T(TimerStack::TT_O2, Func);
433
John Portoac2388c2016-01-22 07:10:56 -0800434 if (SandboxingType != ST_None) {
435 initRebasePtr();
John Porto56958cb2016-01-14 09:18:18 -0800436 }
437
John Porto5e0a8a72015-11-20 13:50:36 -0800438 genTargetHelperCalls();
David Sehr26217e32015-11-26 13:03:50 -0800439 Func->dump("After target helper call insertion");
John Porto5e0a8a72015-11-20 13:50:36 -0800440
David Sehr4318a412015-11-11 15:01:55 -0800441 // Merge Alloca instructions, and lay out the stack.
442 static constexpr bool SortAndCombineAllocas = true;
443 Func->processAllocas(SortAndCombineAllocas);
444 Func->dump("After Alloca processing");
445
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700446 if (getFlags().getEnableExperimental()) {
447 Func->localCSE();
448 Func->dump("After Local CSE");
449 }
450
Karl Schimpfd4699942016-04-02 09:55:31 -0700451 if (!getFlags().getEnablePhiEdgeSplit()) {
John Porto7e93c622015-06-23 10:58:57 -0700452 // Lower Phi instructions.
453 Func->placePhiLoads();
454 if (Func->hasError())
455 return;
456 Func->placePhiStores();
457 if (Func->hasError())
458 return;
459 Func->deletePhis();
460 if (Func->hasError())
461 return;
462 Func->dump("After Phi lowering");
463 }
464
Andrew Scullaa6c1092015-09-03 17:50:30 -0700465 // Run this early so it can be used to focus optimizations on potentially hot
466 // code.
Andrew Scull57e12682015-09-16 11:30:19 -0700467 // TODO(stichnot,ascull): currently only used for regalloc not
468 // expensive high level optimizations which could be focused on potentially
469 // hot code.
Andrew Scullaa6c1092015-09-03 17:50:30 -0700470 Func->computeLoopNestDepth();
471 Func->dump("After loop nest depth analysis");
472
John Porto7e93c622015-06-23 10:58:57 -0700473 // Address mode optimization.
474 Func->getVMetadata()->init(VMK_SingleDefs);
475 Func->doAddressOpt();
John Portoa47c11c2016-04-21 05:53:42 -0700476 Func->materializeVectorShuffles();
John Porto7e93c622015-06-23 10:58:57 -0700477
Andrew Scull57e12682015-09-16 11:30:19 -0700478 // Find read-modify-write opportunities. Do this after address mode
John Porto7e93c622015-06-23 10:58:57 -0700479 // optimization so that doAddressOpt() doesn't need to be applied to RMW
480 // instructions as well.
481 findRMW();
482 Func->dump("After RMW transform");
483
484 // Argument lowering
485 Func->doArgLowering();
486
Andrew Scull57e12682015-09-16 11:30:19 -0700487 // Target lowering. This requires liveness analysis for some parts of the
488 // lowering decisions, such as compare/branch fusing. If non-lightweight
John Porto921856d2015-07-07 11:56:26 -0700489 // liveness analysis is used, the instructions need to be renumbered first
490 // TODO: This renumbering should only be necessary if we're actually
491 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700492 Func->renumberInstructions();
493 if (Func->hasError())
494 return;
495
Andrew Scull57e12682015-09-16 11:30:19 -0700496 // TODO: It should be sufficient to use the fastest liveness calculation,
497 // i.e. livenessLightweight(). However, for some reason that slows down the
498 // rest of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700499 Func->liveness(Liveness_Basic);
500 if (Func->hasError())
501 return;
502 Func->dump("After x86 address mode opt");
503
504 // Disable constant blinding or pooling for load optimization.
505 {
506 BoolFlagSaver B(RandomizationPoolingPaused, true);
507 doLoadOpt();
508 }
509 Func->genCode();
510 if (Func->hasError())
511 return;
John Portoac2388c2016-01-22 07:10:56 -0800512 if (SandboxingType != ST_None) {
513 initSandbox();
514 }
John Porto7e93c622015-06-23 10:58:57 -0700515 Func->dump("After x86 codegen");
516
Andrew Scullaa6c1092015-09-03 17:50:30 -0700517 // Register allocation. This requires instruction renumbering and full
518 // liveness analysis. Loops must be identified before liveness so variable
519 // use weights are correct.
John Porto7e93c622015-06-23 10:58:57 -0700520 Func->renumberInstructions();
521 if (Func->hasError())
522 return;
523 Func->liveness(Liveness_Intervals);
524 if (Func->hasError())
525 return;
John Porto921856d2015-07-07 11:56:26 -0700526 // The post-codegen dump is done here, after liveness analysis and associated
527 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700528 Func->dump("After initial x8632 codegen");
Jim Stichnoth2943d772016-06-21 11:22:17 -0700529 // Validate the live range computations. The expensive validation call is
530 // deliberately only made when assertions are enabled.
531 assert(Func->validateLiveness());
John Porto7e93c622015-06-23 10:58:57 -0700532 Func->getVMetadata()->init(VMK_All);
533 regAlloc(RAK_Global);
534 if (Func->hasError())
535 return;
536 Func->dump("After linear scan regalloc");
537
Karl Schimpfd4699942016-04-02 09:55:31 -0700538 if (getFlags().getEnablePhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700539 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700540 Func->dump("After advanced Phi lowering");
541 }
542
543 // Stack frame mapping.
544 Func->genFrame();
545 if (Func->hasError())
546 return;
547 Func->dump("After stack frame mapping");
548
549 Func->contractEmptyNodes();
550 Func->reorderNodes();
551
Qining Lu969f6a32015-07-31 09:58:34 -0700552 // Shuffle basic block order if -reorder-basic-blocks is enabled.
553 Func->shuffleNodes();
554
Andrew Scull57e12682015-09-16 11:30:19 -0700555 // Branch optimization. This needs to be done just before code emission. In
John Porto921856d2015-07-07 11:56:26 -0700556 // particular, no transformations that insert or reorder CfgNodes should be
Andrew Scull57e12682015-09-16 11:30:19 -0700557 // done after branch optimization. We go ahead and do it before nop insertion
John Porto921856d2015-07-07 11:56:26 -0700558 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700559 Func->doBranchOpt();
560 Func->dump("After branch optimization");
561
Qining Lu969f6a32015-07-31 09:58:34 -0700562 // Nop insertion if -nop-insertion is enabled.
563 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700564
565 // Mark nodes that require sandbox alignment
John Porto56958cb2016-01-14 09:18:18 -0800566 if (NeedSandboxing) {
Andrew Scull86df4e92015-07-30 13:54:44 -0700567 Func->markNodesForSandboxing();
John Porto56958cb2016-01-14 09:18:18 -0800568 }
John Porto7e93c622015-06-23 10:58:57 -0700569}
570
John Porto4a566862016-01-04 09:33:41 -0800571template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() {
John Porto7e93c622015-06-23 10:58:57 -0700572 TimerMarker T(TimerStack::TT_Om1, Func);
573
John Portoac2388c2016-01-22 07:10:56 -0800574 if (SandboxingType != ST_None) {
575 initRebasePtr();
John Porto56958cb2016-01-14 09:18:18 -0800576 }
577
John Porto5e0a8a72015-11-20 13:50:36 -0800578 genTargetHelperCalls();
579
David Sehr4318a412015-11-11 15:01:55 -0800580 // Do not merge Alloca instructions, and lay out the stack.
581 static constexpr bool SortAndCombineAllocas = false;
582 Func->processAllocas(SortAndCombineAllocas);
583 Func->dump("After Alloca processing");
584
John Porto7e93c622015-06-23 10:58:57 -0700585 Func->placePhiLoads();
586 if (Func->hasError())
587 return;
588 Func->placePhiStores();
589 if (Func->hasError())
590 return;
591 Func->deletePhis();
592 if (Func->hasError())
593 return;
594 Func->dump("After Phi lowering");
595
596 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700597 Func->genCode();
598 if (Func->hasError())
599 return;
John Portoac2388c2016-01-22 07:10:56 -0800600 if (SandboxingType != ST_None) {
601 initSandbox();
602 }
John Porto7e93c622015-06-23 10:58:57 -0700603 Func->dump("After initial x8632 codegen");
604
605 regAlloc(RAK_InfOnly);
606 if (Func->hasError())
607 return;
608 Func->dump("After regalloc of infinite-weight variables");
609
610 Func->genFrame();
611 if (Func->hasError())
612 return;
613 Func->dump("After stack frame mapping");
614
Qining Lu969f6a32015-07-31 09:58:34 -0700615 // Shuffle basic block order if -reorder-basic-blocks is enabled.
616 Func->shuffleNodes();
617
618 // Nop insertion if -nop-insertion is enabled.
619 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700620
621 // Mark nodes that require sandbox alignment
John Porto56958cb2016-01-14 09:18:18 -0800622 if (NeedSandboxing)
Andrew Scull86df4e92015-07-30 13:54:44 -0700623 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700624}
625
John Porto5aeed952015-07-21 13:39:09 -0700626inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700627 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700628 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700629 if (isVectorType(Ty))
630 return false;
631 bool isI64 = Ty == IceType_i64;
632
633 switch (Arith->getOp()) {
634 // Not handled for lack of simple lowering:
635 // shift on i64
636 // mul, udiv, urem, sdiv, srem, frem
637 // Not handled for lack of RMW instructions:
638 // fadd, fsub, fmul, fdiv (also vector types)
639 default:
640 return false;
641 case InstArithmetic::Add:
642 case InstArithmetic::Sub:
643 case InstArithmetic::And:
644 case InstArithmetic::Or:
645 case InstArithmetic::Xor:
646 return true;
647 case InstArithmetic::Shl:
648 case InstArithmetic::Lshr:
649 case InstArithmetic::Ashr:
650 return false; // TODO(stichnot): implement
651 return !isI64;
652 }
653}
654
John Porto4a566862016-01-04 09:33:41 -0800655template <typename TraitsType>
John Porto7e93c622015-06-23 10:58:57 -0700656bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
657 if (A == B)
658 return true;
John Porto4a566862016-01-04 09:33:41 -0800659 if (auto *MemA =
660 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
661 A)) {
662 if (auto *MemB =
663 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
664 B)) {
John Porto7e93c622015-06-23 10:58:57 -0700665 return MemA->getBase() == MemB->getBase() &&
666 MemA->getOffset() == MemB->getOffset() &&
667 MemA->getIndex() == MemB->getIndex() &&
668 MemA->getShift() == MemB->getShift() &&
669 MemA->getSegmentRegister() == MemB->getSegmentRegister();
670 }
671 }
672 return false;
673}
674
John Porto4a566862016-01-04 09:33:41 -0800675template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
Jim Stichnothb88d8c82016-03-11 15:33:00 -0800676 TimerMarker _(TimerStack::TT_findRMW, Func);
John Porto7e93c622015-06-23 10:58:57 -0700677 Func->dump("Before RMW");
Andrew Scull00741a02015-09-16 19:04:09 -0700678 if (Func->isVerbose(IceV_RMW))
679 Func->getContext()->lockStr();
John Porto7e93c622015-06-23 10:58:57 -0700680 for (CfgNode *Node : Func->getNodes()) {
681 // Walk through the instructions, considering each sequence of 3
Andrew Scull57e12682015-09-16 11:30:19 -0700682 // instructions, and look for the particular RMW pattern. Note that this
683 // search can be "broken" (false negatives) if there are intervening
684 // deleted instructions, or intervening instructions that could be safely
685 // moved out of the way to reveal an RMW pattern.
John Porto7e93c622015-06-23 10:58:57 -0700686 auto E = Node->getInsts().end();
687 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
688 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
689 // Make I3 skip over deleted instructions.
690 while (I3 != E && I3->isDeleted())
691 ++I3;
692 if (I1 == E || I2 == E || I3 == E)
693 continue;
694 assert(!I1->isDeleted());
695 assert(!I2->isDeleted());
696 assert(!I3->isDeleted());
Andrew Scull00741a02015-09-16 19:04:09 -0700697 auto *Load = llvm::dyn_cast<InstLoad>(I1);
698 auto *Arith = llvm::dyn_cast<InstArithmetic>(I2);
699 auto *Store = llvm::dyn_cast<InstStore>(I3);
700 if (!Load || !Arith || !Store)
701 continue;
702 // Look for:
703 // a = Load addr
704 // b = <op> a, other
705 // Store b, addr
706 // Change to:
707 // a = Load addr
708 // b = <op> a, other
709 // x = FakeDef
710 // RMW <op>, addr, other, x
711 // b = Store b, addr, x
Jim Stichnoth230d4102015-09-25 17:40:32 -0700712 // Note that inferTwoAddress() makes sure setDestRedefined() gets called
713 // on the updated Store instruction, to avoid liveness problems later.
Andrew Scull00741a02015-09-16 19:04:09 -0700714 //
715 // With this transformation, the Store instruction acquires a Dest
716 // variable and is now subject to dead code elimination if there are no
Jim Stichnoth230d4102015-09-25 17:40:32 -0700717 // more uses of "b". Variable "x" is a beacon for determining whether the
718 // Store instruction gets dead-code eliminated. If the Store instruction
719 // is eliminated, then it must be the case that the RMW instruction ends
720 // x's live range, and therefore the RMW instruction will be retained and
721 // later lowered. On the other hand, if the RMW instruction does not end
722 // x's live range, then the Store instruction must still be present, and
723 // therefore the RMW instruction is ignored during lowering because it is
724 // redundant with the Store instruction.
Andrew Scull00741a02015-09-16 19:04:09 -0700725 //
726 // Note that if "a" has further uses, the RMW transformation may still
727 // trigger, resulting in two loads and one store, which is worse than the
728 // original one load and one store. However, this is probably rare, and
729 // caching probably keeps it just as fast.
John Porto4a566862016-01-04 09:33:41 -0800730 if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(),
731 Store->getAddr()))
Andrew Scull00741a02015-09-16 19:04:09 -0700732 continue;
733 Operand *ArithSrcFromLoad = Arith->getSrc(0);
734 Operand *ArithSrcOther = Arith->getSrc(1);
735 if (ArithSrcFromLoad != Load->getDest()) {
736 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
737 continue;
738 std::swap(ArithSrcFromLoad, ArithSrcOther);
John Porto7e93c622015-06-23 10:58:57 -0700739 }
Andrew Scull00741a02015-09-16 19:04:09 -0700740 if (Arith->getDest() != Store->getData())
741 continue;
742 if (!canRMW(Arith))
743 continue;
744 if (Func->isVerbose(IceV_RMW)) {
745 Ostream &Str = Func->getContext()->getStrDump();
746 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
747 Load->dump(Func);
748 Str << "\n ";
749 Arith->dump(Func);
750 Str << "\n ";
751 Store->dump(Func);
752 Str << "\n";
753 }
754 Variable *Beacon = Func->makeVariable(IceType_i32);
755 Beacon->setMustNotHaveReg();
756 Store->setRmwBeacon(Beacon);
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800757 auto *BeaconDef = InstFakeDef::create(Func, Beacon);
Andrew Scull00741a02015-09-16 19:04:09 -0700758 Node->getInsts().insert(I3, BeaconDef);
John Porto4a566862016-01-04 09:33:41 -0800759 auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(),
760 Beacon, Arith->getOp());
Andrew Scull00741a02015-09-16 19:04:09 -0700761 Node->getInsts().insert(I3, RMW);
John Porto7e93c622015-06-23 10:58:57 -0700762 }
763 }
Andrew Scull00741a02015-09-16 19:04:09 -0700764 if (Func->isVerbose(IceV_RMW))
765 Func->getContext()->unlockStr();
John Porto7e93c622015-06-23 10:58:57 -0700766}
767
768// Converts a ConstantInteger32 operand into its constant value, or
769// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700770inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700771 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
John Porto7e93c622015-06-23 10:58:57 -0700772 return Integer->getValue();
773 return Intrinsics::MemoryOrderInvalid;
774}
775
Andrew Scull57e12682015-09-16 11:30:19 -0700776/// Determines whether the dest of a Load instruction can be folded into one of
777/// the src operands of a 2-operand instruction. This is true as long as the
778/// load dest matches exactly one of the binary instruction's src operands.
779/// Replaces Src0 or Src1 with LoadSrc if the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700780inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
781 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700782 if (Src0 == LoadDest && Src1 != LoadDest) {
783 Src0 = LoadSrc;
784 return true;
785 }
786 if (Src0 != LoadDest && Src1 == LoadDest) {
787 Src1 = LoadSrc;
788 return true;
789 }
790 return false;
791}
792
John Porto4a566862016-01-04 09:33:41 -0800793template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
Jim Stichnothb88d8c82016-03-11 15:33:00 -0800794 TimerMarker _(TimerStack::TT_loadOpt, Func);
John Porto7e93c622015-06-23 10:58:57 -0700795 for (CfgNode *Node : Func->getNodes()) {
796 Context.init(Node);
797 while (!Context.atEnd()) {
798 Variable *LoadDest = nullptr;
799 Operand *LoadSrc = nullptr;
800 Inst *CurInst = Context.getCur();
801 Inst *Next = Context.getNextInst();
Andrew Scull57e12682015-09-16 11:30:19 -0700802 // Determine whether the current instruction is a Load instruction or
803 // equivalent.
John Porto7e93c622015-06-23 10:58:57 -0700804 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
805 // An InstLoad always qualifies.
806 LoadDest = Load->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700807 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700808 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
809 LoadDest->getType(), DoLegalize);
810 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700811 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
812 // ordering, and can be implemented in a single instruction (i.e., not
813 // i64 on x86-32).
John Porto7e93c622015-06-23 10:58:57 -0700814 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
815 if (ID == Intrinsics::AtomicLoad &&
John Porto1d235422015-08-12 12:37:53 -0700816 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
John Porto7e93c622015-06-23 10:58:57 -0700817 Intrinsics::isMemoryOrderValid(
818 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
819 LoadDest = Intrin->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700820 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700821 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
822 DoLegalize);
823 }
824 }
Andrew Scull57e12682015-09-16 11:30:19 -0700825 // A Load instruction can be folded into the following instruction only
826 // if the following instruction ends the Load's Dest variable's live
827 // range.
John Porto7e93c622015-06-23 10:58:57 -0700828 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
829 assert(LoadSrc);
830 Inst *NewInst = nullptr;
831 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
832 Operand *Src0 = Arith->getSrc(0);
833 Operand *Src1 = Arith->getSrc(1);
834 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
835 NewInst = InstArithmetic::create(Func, Arith->getOp(),
836 Arith->getDest(), Src0, Src1);
837 }
838 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
839 Operand *Src0 = Icmp->getSrc(0);
840 Operand *Src1 = Icmp->getSrc(1);
841 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
842 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
843 Icmp->getDest(), Src0, Src1);
844 }
845 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
846 Operand *Src0 = Fcmp->getSrc(0);
847 Operand *Src1 = Fcmp->getSrc(1);
848 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
849 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
850 Fcmp->getDest(), Src0, Src1);
851 }
852 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
853 Operand *Src0 = Select->getTrueOperand();
854 Operand *Src1 = Select->getFalseOperand();
855 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
856 NewInst = InstSelect::create(Func, Select->getDest(),
857 Select->getCondition(), Src0, Src1);
858 }
859 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700860 // The load dest can always be folded into a Cast instruction.
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800861 auto *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
John Porto7e93c622015-06-23 10:58:57 -0700862 if (Src0 == LoadDest) {
863 NewInst = InstCast::create(Func, Cast->getCastKind(),
864 Cast->getDest(), LoadSrc);
865 }
866 }
867 if (NewInst) {
868 CurInst->setDeleted();
869 Next->setDeleted();
870 Context.insert(NewInst);
Andrew Scull57e12682015-09-16 11:30:19 -0700871 // Update NewInst->LiveRangesEnded so that target lowering may
872 // benefit. Also update NewInst->HasSideEffects.
John Porto7e93c622015-06-23 10:58:57 -0700873 NewInst->spliceLivenessInfo(Next, CurInst);
874 }
875 }
876 Context.advanceCur();
877 Context.advanceNext();
878 }
879 }
880 Func->dump("After load optimization");
881}
882
John Porto4a566862016-01-04 09:33:41 -0800883template <typename TraitsType>
884bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
885 if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700886 return Br->optimizeBranch(NextNode);
887 }
888 return false;
889}
890
John Porto4a566862016-01-04 09:33:41 -0800891template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800892Variable *TargetX86Base<TraitsType>::getPhysicalRegister(RegNumT RegNum,
John Porto4a566862016-01-04 09:33:41 -0800893 Type Ty) {
John Porto7e93c622015-06-23 10:58:57 -0700894 if (Ty == IceType_void)
895 Ty = IceType_i32;
896 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700897 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800898 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
John Porto7e93c622015-06-23 10:58:57 -0700899 Variable *Reg = PhysicalRegisters[Ty][RegNum];
900 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700901 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700902 Reg->setRegNum(RegNum);
903 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth69660552015-09-18 06:41:02 -0700904 // Specially mark a named physical register as an "argument" so that it is
905 // considered live upon function entry. Otherwise it's possible to get
906 // liveness validation errors for saving callee-save registers.
907 Func->addImplicitArg(Reg);
908 // Don't bother tracking the live range of a named physical register.
909 Reg->setIgnoreLiveness();
John Porto7e93c622015-06-23 10:58:57 -0700910 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800911 assert(Traits::getGprForType(Ty, RegNum) == RegNum);
John Porto7e93c622015-06-23 10:58:57 -0700912 return Reg;
913}
914
John Porto4a566862016-01-04 09:33:41 -0800915template <typename TraitsType>
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700916const char *TargetX86Base<TraitsType>::getRegName(RegNumT RegNum,
917 Type Ty) const {
John Porto008f4ce2015-12-24 13:22:18 -0800918 return Traits::getRegName(Traits::getGprForType(Ty, RegNum));
John Porto7e93c622015-06-23 10:58:57 -0700919}
920
John Porto4a566862016-01-04 09:33:41 -0800921template <typename TraitsType>
922void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700923 if (!BuildDefs::dump())
924 return;
John Porto7e93c622015-06-23 10:58:57 -0700925 Ostream &Str = Ctx->getStrEmit();
926 if (Var->hasReg()) {
John Porto56958cb2016-01-14 09:18:18 -0800927 const bool Is64BitSandboxing = Traits::Is64Bit && NeedSandboxing;
928 const Type VarType = (Var->isRematerializable() && Is64BitSandboxing)
929 ? IceType_i64
930 : Var->getType();
931 Str << "%" << getRegName(Var->getRegNum(), VarType);
John Porto7e93c622015-06-23 10:58:57 -0700932 return;
933 }
Andrew Scull11c9a322015-08-28 14:24:14 -0700934 if (Var->mustHaveReg()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700935 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
Jim Stichnoth45bec542016-02-05 10:26:09 -0800936 ") has no register assigned - function " +
937 Func->getFunctionName());
John Porto7e93c622015-06-23 10:58:57 -0700938 }
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700939 const int32_t Offset = Var->getStackOffset();
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800940 auto BaseRegNum = Var->getBaseRegNum();
Reed Kotler5fa0a5f2016-02-15 20:01:24 -0800941 if (BaseRegNum.hasNoValue())
Jan Voung28068ad2015-07-31 12:58:46 -0700942 BaseRegNum = getFrameOrStackReg();
David Sehr26217e32015-11-26 13:03:50 -0800943 // Print in the form "Offset(%reg)", taking care that:
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700944 // - Offset is never printed when it is 0
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700945
Karl Schimpfd4699942016-04-02 09:55:31 -0700946 const bool DecorateAsm = getFlags().getDecorateAsm();
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700947 // Only print Offset when it is nonzero, regardless of DecorateAsm.
948 if (Offset) {
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700949 if (DecorateAsm) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700950 Str << Var->getSymbolicStackOffset();
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700951 } else {
952 Str << Offset;
953 }
954 }
John Porto1d235422015-08-12 12:37:53 -0700955 const Type FrameSPTy = Traits::WordType;
Jan Voung28068ad2015-07-31 12:58:46 -0700956 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
John Porto7e93c622015-06-23 10:58:57 -0700957}
958
John Porto4a566862016-01-04 09:33:41 -0800959template <typename TraitsType>
960typename TargetX86Base<TraitsType>::X86Address
961TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const {
John Porto7e93c622015-06-23 10:58:57 -0700962 if (Var->hasReg())
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800963 llvm::report_fatal_error("Stack Variable has a register assigned");
Andrew Scull11c9a322015-08-28 14:24:14 -0700964 if (Var->mustHaveReg()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700965 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
Jim Stichnoth45bec542016-02-05 10:26:09 -0800966 ") has no register assigned - function " +
967 Func->getFunctionName());
John Porto7e93c622015-06-23 10:58:57 -0700968 }
969 int32_t Offset = Var->getStackOffset();
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800970 auto BaseRegNum = Var->getBaseRegNum();
Reed Kotler5fa0a5f2016-02-15 20:01:24 -0800971 if (Var->getBaseRegNum().hasNoValue())
Jan Voung28068ad2015-07-31 12:58:46 -0700972 BaseRegNum = getFrameOrStackReg();
John Porto4a566862016-01-04 09:33:41 -0800973 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset,
974 AssemblerFixup::NoFixup);
John Porto7e93c622015-06-23 10:58:57 -0700975}
976
David Sehrb9a404d2016-01-21 08:09:27 -0800977template <typename TraitsType>
978void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
979 // Stack frame layout:
980 //
981 // +------------------------+
982 // | 1. return address |
983 // +------------------------+
984 // | 2. preserved registers |
985 // +------------------------+
986 // | 3. padding |
987 // +------------------------+
988 // | 4. global spill area |
989 // +------------------------+
990 // | 5. padding |
991 // +------------------------+
992 // | 6. local spill area |
993 // +------------------------+
994 // | 7. padding |
995 // +------------------------+
996 // | 8. allocas |
997 // +------------------------+
998 // | 9. padding |
999 // +------------------------+
1000 // | 10. out args |
1001 // +------------------------+ <--- StackPointer
1002 //
1003 // The following variables record the size in bytes of the given areas:
1004 // * X86_RET_IP_SIZE_BYTES: area 1
1005 // * PreservedRegsSizeBytes: area 2
1006 // * SpillAreaPaddingBytes: area 3
1007 // * GlobalsSize: area 4
1008 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
1009 // * LocalsSpillAreaSize: area 6
1010 // * SpillAreaSizeBytes: areas 3 - 10
1011 // * maxOutArgsSizeBytes(): area 10
1012
1013 // Determine stack frame offsets for each Variable without a register
1014 // assignment. This can be done as one variable per stack slot. Or, do
1015 // coalescing by running the register allocator again with an infinite set of
1016 // registers (as a side effect, this gives variables a second chance at
1017 // physical register assignment).
1018 //
1019 // A middle ground approach is to leverage sparsity and allocate one block of
1020 // space on the frame for globals (variables with multi-block lifetime), and
1021 // one block to share for locals (single-block lifetime).
1022
1023 Context.init(Node);
1024 Context.setInsertPoint(Context.getCur());
1025
John Portoe82b5602016-02-24 15:58:55 -08001026 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1027 RegsUsed = SmallBitVector(CalleeSaves.size());
David Sehrb9a404d2016-01-21 08:09:27 -08001028 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
1029 size_t GlobalsSize = 0;
1030 // If there is a separate locals area, this represents that area. Otherwise
1031 // it counts any variable not counted by GlobalsSize.
1032 SpillAreaSizeBytes = 0;
1033 // If there is a separate locals area, this specifies the alignment for it.
1034 uint32_t LocalsSlotsAlignmentBytes = 0;
1035 // The entire spill locations area gets aligned to largest natural alignment
1036 // of the variables that have a spill slot.
1037 uint32_t SpillAreaAlignmentBytes = 0;
1038 // A spill slot linked to a variable with a stack slot should reuse that
1039 // stack slot.
1040 std::function<bool(Variable *)> TargetVarHook =
1041 [&VariablesLinkedToSpillSlots](Variable *Var) {
1042 if (auto *SpillVar =
1043 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
1044 assert(Var->mustNotHaveReg());
1045 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
1046 VariablesLinkedToSpillSlots.push_back(Var);
1047 return true;
1048 }
1049 }
1050 return false;
1051 };
1052
1053 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1054 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1055 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1056 &LocalsSlotsAlignmentBytes, TargetVarHook);
1057 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1058 SpillAreaSizeBytes += GlobalsSize;
1059
1060 // Add push instructions for preserved registers.
1061 uint32_t NumCallee = 0;
1062 size_t PreservedRegsSizeBytes = 0;
John Portoe82b5602016-02-24 15:58:55 -08001063 SmallBitVector Pushed(CalleeSaves.size());
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001064 for (RegNumT i : RegNumBVIter(CalleeSaves)) {
1065 const auto Canonical = Traits::getBaseReg(i);
David Sehrb9a404d2016-01-21 08:09:27 -08001066 assert(Canonical == Traits::getBaseReg(Canonical));
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001067 if (RegsUsed[i]) {
David Sehrb9a404d2016-01-21 08:09:27 -08001068 Pushed[Canonical] = true;
1069 }
1070 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001071 for (RegNumT RegNum : RegNumBVIter(Pushed)) {
1072 assert(RegNum == Traits::getBaseReg(RegNum));
David Sehrb9a404d2016-01-21 08:09:27 -08001073 ++NumCallee;
1074 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001075 _push_reg(getPhysicalRegister(RegNum, Traits::WordType));
David Sehrb9a404d2016-01-21 08:09:27 -08001076 }
1077 Ctx->statsUpdateRegistersSaved(NumCallee);
1078
1079 // Generate "push frameptr; mov frameptr, stackptr"
1080 if (IsEbpBasedFrame) {
1081 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
1082 .count() == 0);
1083 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
1084 _link_bp();
1085 }
1086
1087 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1088 // after the preserved registers and before the spill areas.
1089 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1090 // locals area if they are separate.
1091 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
1092 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1093 uint32_t SpillAreaPaddingBytes = 0;
1094 uint32_t LocalsSlotsPaddingBytes = 0;
1095 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
1096 SpillAreaAlignmentBytes, GlobalsSize,
1097 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1098 &LocalsSlotsPaddingBytes);
1099 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1100 uint32_t GlobalsAndSubsequentPaddingSize =
1101 GlobalsSize + LocalsSlotsPaddingBytes;
1102
1103 // Functions returning scalar floating point types may need to convert values
1104 // from an in-register xmm value to the top of the x87 floating point stack.
1105 // This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch
1106 // space on the stack for this.
1107 const Type ReturnType = Func->getReturnType();
1108 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1109 if (isScalarFloatingType(ReturnType)) {
1110 // Avoid misaligned double-precicion load/store.
1111 NeedsStackAlignment = true;
1112 SpillAreaSizeBytes =
1113 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
1114 }
1115 }
1116
1117 // Align esp if necessary.
1118 if (NeedsStackAlignment) {
1119 uint32_t StackOffset =
1120 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1121 uint32_t StackSize =
1122 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1123 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
1124 SpillAreaSizeBytes = StackSize - StackOffset;
1125 } else {
1126 SpillAreaSizeBytes += maxOutArgsSizeBytes();
1127 }
1128
1129 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
1130 // fixed allocations in the prolog.
1131 if (PrologEmitsFixedAllocas)
1132 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1133 if (SpillAreaSizeBytes) {
1134 // Generate "sub stackptr, SpillAreaSizeBytes"
1135 _sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1136 // If the fixed allocas are aligned more than the stack frame, align the
1137 // stack pointer accordingly.
1138 if (PrologEmitsFixedAllocas &&
1139 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
1140 assert(IsEbpBasedFrame);
1141 _and(getPhysicalRegister(getStackReg(), Traits::WordType),
1142 Ctx->getConstantInt32(-FixedAllocaAlignBytes));
1143 }
1144 }
1145
1146 // Account for known-frame-offset alloca instructions that were not already
1147 // combined into the prolog.
1148 if (!PrologEmitsFixedAllocas)
1149 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1150
1151 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1152
1153 // Fill in stack offsets for stack args, and copy args into registers for
1154 // those that were register-allocated. Args are pushed right to left, so
1155 // Arg[0] is closest to the stack/frame pointer.
1156 Variable *FramePtr =
1157 getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
1158 size_t BasicFrameOffset =
1159 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1160 if (!IsEbpBasedFrame)
1161 BasicFrameOffset += SpillAreaSizeBytes;
1162
1163 emitGetIP(Node);
1164
1165 const VarList &Args = Func->getArgs();
1166 size_t InArgsSizeBytes = 0;
1167 unsigned NumXmmArgs = 0;
1168 unsigned NumGPRArgs = 0;
1169 for (Variable *Arg : Args) {
1170 // Skip arguments passed in registers.
1171 if (isVectorType(Arg->getType())) {
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001172 if (Traits::getRegisterForXmmArgNum(NumXmmArgs).hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001173 ++NumXmmArgs;
1174 continue;
1175 }
1176 } else if (isScalarFloatingType(Arg->getType())) {
1177 if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001178 Traits::getRegisterForXmmArgNum(NumXmmArgs).hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001179 ++NumXmmArgs;
1180 continue;
1181 }
1182 } else {
1183 assert(isScalarIntegerType(Arg->getType()));
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001184 if (Traits::getRegisterForGprArgNum(Traits::WordType, NumGPRArgs)
1185 .hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001186 ++NumGPRArgs;
1187 continue;
1188 }
1189 }
1190 // For esp-based frames where the allocas are done outside the prolog, the
1191 // esp value may not stabilize to its home value until after all the
1192 // fixed-size alloca instructions have executed. In this case, a stack
1193 // adjustment is needed when accessing in-args in order to copy them into
1194 // registers.
1195 size_t StackAdjBytes = 0;
1196 if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
1197 StackAdjBytes -= FixedAllocaSizeBytes;
1198 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
1199 InArgsSizeBytes);
1200 }
1201
1202 // Fill in stack offsets for locals.
1203 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1204 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1205 IsEbpBasedFrame);
1206 // Assign stack offsets to variables that have been linked to spilled
1207 // variables.
1208 for (Variable *Var : VariablesLinkedToSpillSlots) {
1209 Variable *Linked =
1210 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
1211 Var->setStackOffset(Linked->getStackOffset());
1212 }
1213 this->HasComputedFrame = true;
1214
1215 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1216 OstreamLocker L(Func->getContext());
1217 Ostream &Str = Func->getContext()->getStrDump();
1218
1219 Str << "Stack layout:\n";
1220 uint32_t EspAdjustmentPaddingSize =
1221 SpillAreaSizeBytes - LocalsSpillAreaSize -
1222 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1223 maxOutArgsSizeBytes();
1224 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1225 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1226 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1227 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1228 << " globals spill area = " << GlobalsSize << " bytes\n"
1229 << " globals-locals spill areas intermediate padding = "
1230 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1231 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1232 << " esp alignment padding = " << EspAdjustmentPaddingSize
1233 << " bytes\n";
1234
1235 Str << "Stack details:\n"
1236 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1237 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1238 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
1239 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1240 << " bytes\n"
1241 << " is ebp based = " << IsEbpBasedFrame << "\n";
1242 }
1243}
1244
Andrew Scull9612d322015-07-06 14:53:25 -07001245/// Helper function for addProlog().
1246///
Andrew Scull57e12682015-09-16 11:30:19 -07001247/// This assumes Arg is an argument passed on the stack. This sets the frame
1248/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1249/// I64 arg that has been split into Lo and Hi components, it calls itself
1250/// recursively on the components, taking care to handle Lo first because of the
1251/// little-endian architecture. Lastly, this function generates an instruction
1252/// to copy Arg into its assigned register if applicable.
John Porto4a566862016-01-04 09:33:41 -08001253template <typename TraitsType>
1254void TargetX86Base<TraitsType>::finishArgumentLowering(
1255 Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset,
1256 size_t StackAdjBytes, size_t &InArgsSizeBytes) {
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001257 if (!Traits::Is64Bit) {
1258 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1259 Variable *Lo = Arg64On32->getLo();
1260 Variable *Hi = Arg64On32->getHi();
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001261 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,
1262 InArgsSizeBytes);
1263 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,
1264 InArgsSizeBytes);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001265 return;
1266 }
John Porto7e93c622015-06-23 10:58:57 -07001267 }
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001268 Type Ty = Arg->getType();
John Porto7e93c622015-06-23 10:58:57 -07001269 if (isVectorType(Ty)) {
1270 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
1271 }
1272 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
1273 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1274 if (Arg->hasReg()) {
John Porto1d235422015-08-12 12:37:53 -07001275 assert(Ty != IceType_i64 || Traits::Is64Bit);
John Porto4a566862016-01-04 09:33:41 -08001276 auto *Mem = X86OperandMem::create(
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001277 Func, Ty, FramePtr,
1278 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
John Porto7e93c622015-06-23 10:58:57 -07001279 if (isVectorType(Arg->getType())) {
1280 _movp(Arg, Mem);
1281 } else {
1282 _mov(Arg, Mem);
1283 }
John Porto4a566862016-01-04 09:33:41 -08001284 // This argument-copying instruction uses an explicit X86OperandMem
Andrew Scull57e12682015-09-16 11:30:19 -07001285 // operand instead of a Variable, so its fill-from-stack operation has to
1286 // be tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -07001287 Ctx->statsUpdateFills();
1288 }
1289}
1290
David Sehrb9a404d2016-01-21 08:09:27 -08001291template <typename TraitsType>
1292void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
1293 InstList &Insts = Node->getInsts();
1294 InstList::reverse_iterator RI, E;
1295 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1296 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
1297 break;
1298 }
1299 if (RI == E)
1300 return;
1301
1302 // Convert the reverse_iterator position into its corresponding (forward)
1303 // iterator position.
1304 InstList::iterator InsertPoint = RI.base();
1305 --InsertPoint;
1306 Context.init(Node);
1307 Context.setInsertPoint(InsertPoint);
1308
1309 if (IsEbpBasedFrame) {
1310 _unlink_bp();
1311 } else {
1312 // add stackptr, SpillAreaSizeBytes
1313 if (SpillAreaSizeBytes != 0) {
1314 _add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1315 }
1316 }
1317
1318 // Add pop instructions for preserved registers.
John Portoe82b5602016-02-24 15:58:55 -08001319 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1320 SmallBitVector Popped(CalleeSaves.size());
David Sehrb9a404d2016-01-21 08:09:27 -08001321 for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001322 const auto RegNum = RegNumT::fromInt(i);
1323 if (RegNum == getFrameReg() && IsEbpBasedFrame)
David Sehrb9a404d2016-01-21 08:09:27 -08001324 continue;
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001325 const RegNumT Canonical = Traits::getBaseReg(RegNum);
David Sehrb9a404d2016-01-21 08:09:27 -08001326 if (CalleeSaves[i] && RegsUsed[i]) {
1327 Popped[Canonical] = true;
1328 }
1329 }
1330 for (int32_t i = Popped.size() - 1; i >= 0; --i) {
1331 if (!Popped[i])
1332 continue;
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001333 const auto RegNum = RegNumT::fromInt(i);
1334 assert(RegNum == Traits::getBaseReg(RegNum));
1335 _pop(getPhysicalRegister(RegNum, Traits::WordType));
David Sehrb9a404d2016-01-21 08:09:27 -08001336 }
1337
1338 if (!NeedSandboxing) {
1339 return;
1340 }
1341 emitSandboxedReturn();
1342 if (RI->getSrcSize()) {
1343 auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1344 Context.insert<InstFakeUse>(RetValue);
1345 }
1346 RI->setDeleted();
1347}
1348
John Porto4a566862016-01-04 09:33:41 -08001349template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
John Porto1d235422015-08-12 12:37:53 -07001350 return Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -07001351}
1352
John Porto4a566862016-01-04 09:33:41 -08001353template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07001354template <typename T>
John Porto1d235422015-08-12 12:37:53 -07001355typename std::enable_if<!T::Is64Bit, Operand>::type *
John Porto4a566862016-01-04 09:33:41 -08001356TargetX86Base<TraitsType>::loOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -07001357 assert(Operand->getType() == IceType_i64 ||
1358 Operand->getType() == IceType_f64);
1359 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1360 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001361 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1362 return Var64On32->getLo();
Jan Voungfbdd2442015-07-15 12:36:20 -07001363 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1364 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001365 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -07001366 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001367 return legalize(ConstInt);
1368 }
John Porto4a566862016-01-04 09:33:41 -08001369 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
1370 auto *MemOperand = X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001371 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
John Porto56958cb2016-01-14 09:18:18 -08001372 Mem->getShift(), Mem->getSegmentRegister(), Mem->getIsRebased());
John Porto7e93c622015-06-23 10:58:57 -07001373 // Test if we should randomize or pool the offset, if so randomize it or
1374 // pool it then create mem operand with the blinded/pooled constant.
1375 // Otherwise, return the mem operand as ordinary mem operand.
1376 return legalize(MemOperand);
1377 }
1378 llvm_unreachable("Unsupported operand type");
1379 return nullptr;
1380}
1381
John Porto4a566862016-01-04 09:33:41 -08001382template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07001383template <typename T>
1384typename std::enable_if<!T::Is64Bit, Operand>::type *
John Porto4a566862016-01-04 09:33:41 -08001385TargetX86Base<TraitsType>::hiOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -07001386 assert(Operand->getType() == IceType_i64 ||
1387 Operand->getType() == IceType_f64);
1388 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1389 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001390 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1391 return Var64On32->getHi();
Jan Voungfbdd2442015-07-15 12:36:20 -07001392 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1393 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001394 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -07001395 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001396 return legalize(ConstInt);
1397 }
John Porto4a566862016-01-04 09:33:41 -08001398 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -07001399 Constant *Offset = Mem->getOffset();
1400 if (Offset == nullptr) {
1401 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -07001402 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001403 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -07001404 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001405 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1406 Offset =
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001407 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName());
John Porto7e93c622015-06-23 10:58:57 -07001408 }
John Porto4a566862016-01-04 09:33:41 -08001409 auto *MemOperand = X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001410 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
John Porto56958cb2016-01-14 09:18:18 -08001411 Mem->getShift(), Mem->getSegmentRegister(), Mem->getIsRebased());
John Porto7e93c622015-06-23 10:58:57 -07001412 // Test if the Offset is an eligible i32 constants for randomization and
1413 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1414 // operand.
1415 return legalize(MemOperand);
1416 }
1417 llvm_unreachable("Unsupported operand type");
1418 return nullptr;
1419}
1420
John Porto4a566862016-01-04 09:33:41 -08001421template <typename TraitsType>
John Portoe82b5602016-02-24 15:58:55 -08001422SmallBitVector
John Porto4a566862016-01-04 09:33:41 -08001423TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include,
1424 RegSetMask Exclude) const {
Karl Schimpfd4699942016-04-02 09:55:31 -07001425 return Traits::getRegisterSet(getFlags(), Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -07001426}
1427
John Porto4a566862016-01-04 09:33:41 -08001428template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001429void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) {
Andrew Scull57e12682015-09-16 11:30:19 -07001430 // Conservatively require the stack to be aligned. Some stack adjustment
1431 // operations implemented below assume that the stack is aligned before the
1432 // alloca. All the alloca code ensures that the stack alignment is preserved
1433 // after the alloca. The stack alignment restriction can be relaxed in some
1434 // cases.
John Porto7e93c622015-06-23 10:58:57 -07001435 NeedsStackAlignment = true;
1436
John Porto7e93c622015-06-23 10:58:57 -07001437 // For default align=0, set it to the real value 1, to avoid any
1438 // bit-manipulation problems below.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001439 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
John Porto7e93c622015-06-23 10:58:57 -07001440
1441 // LLVM enforces power of 2 alignment.
1442 assert(llvm::isPowerOf2_32(AlignmentParam));
1443 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1444
David Sehr26217e32015-11-26 13:03:50 -08001445 const uint32_t Alignment =
John Porto7e93c622015-06-23 10:58:57 -07001446 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
David Sehr26217e32015-11-26 13:03:50 -08001447 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07001448 const bool OptM1 = Func->getOptLevel() == Opt_m1;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001449 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
David Sehr26217e32015-11-26 13:03:50 -08001450 const bool UseFramePointer =
1451 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1452
1453 if (UseFramePointer)
David Sehr4318a412015-11-11 15:01:55 -08001454 setHasFramePointer();
David Sehr26217e32015-11-26 13:03:50 -08001455
John Porto008f4ce2015-12-24 13:22:18 -08001456 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
David Sehr26217e32015-11-26 13:03:50 -08001457 if (OverAligned) {
John Porto7e93c622015-06-23 10:58:57 -07001458 _and(esp, Ctx->getConstantInt32(-Alignment));
1459 }
David Sehr26217e32015-11-26 13:03:50 -08001460
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001461 Variable *Dest = Instr->getDest();
1462 Operand *TotalSize = legalize(Instr->getSizeInBytes());
David Sehr26217e32015-11-26 13:03:50 -08001463
John Porto7e93c622015-06-23 10:58:57 -07001464 if (const auto *ConstantTotalSize =
1465 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
David Sehr26217e32015-11-26 13:03:50 -08001466 const uint32_t Value =
1467 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
John Porto56958cb2016-01-14 09:18:18 -08001468 if (UseFramePointer) {
1469 _sub_sp(Ctx->getConstantInt32(Value));
1470 } else {
David Sehr26217e32015-11-26 13:03:50 -08001471 // If we don't need a Frame Pointer, this alloca has a known offset to the
1472 // stack pointer. We don't need adjust the stack pointer, nor assign any
1473 // value to Dest, as Dest is rematerializable.
1474 assert(Dest->isRematerializable());
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001475 FixedAllocaSizeBytes += Value;
John Porto1d937a82015-12-17 06:19:34 -08001476 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001477 }
John Porto7e93c622015-06-23 10:58:57 -07001478 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07001479 // Non-constant sizes need to be adjusted to the next highest multiple of
1480 // the required alignment at runtime.
John Porto56958cb2016-01-14 09:18:18 -08001481 Variable *T = nullptr;
1482 if (Traits::Is64Bit && TotalSize->getType() != IceType_i64 &&
1483 !NeedSandboxing) {
1484 T = makeReg(IceType_i64);
John Porto008f4ce2015-12-24 13:22:18 -08001485 _movzx(T, TotalSize);
1486 } else {
John Porto56958cb2016-01-14 09:18:18 -08001487 T = makeReg(IceType_i32);
John Porto008f4ce2015-12-24 13:22:18 -08001488 _mov(T, TotalSize);
1489 }
John Porto7e93c622015-06-23 10:58:57 -07001490 _add(T, Ctx->getConstantInt32(Alignment - 1));
1491 _and(T, Ctx->getConstantInt32(-Alignment));
John Porto56958cb2016-01-14 09:18:18 -08001492 _sub_sp(T);
John Porto7e93c622015-06-23 10:58:57 -07001493 }
David Sehr26217e32015-11-26 13:03:50 -08001494 // Add enough to the returned address to account for the out args area.
1495 uint32_t OutArgsSize = maxOutArgsSizeBytes();
1496 if (OutArgsSize > 0) {
1497 Variable *T = makeReg(IceType_i32);
John Porto4a566862016-01-04 09:33:41 -08001498 auto *CalculateOperand = X86OperandMem::create(
John Porto56958cb2016-01-14 09:18:18 -08001499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
David Sehr26217e32015-11-26 13:03:50 -08001500 _lea(T, CalculateOperand);
1501 _mov(Dest, T);
1502 } else {
1503 _mov(Dest, esp);
1504 }
John Porto7e93c622015-06-23 10:58:57 -07001505}
1506
David Sehr0c68bef2016-01-20 10:00:23 -08001507template <typename TraitsType>
1508void TargetX86Base<TraitsType>::lowerArguments() {
Jim Stichnothf5319312016-06-10 12:21:17 -07001509 const bool OptM1 = Func->getOptLevel() == Opt_m1;
David Sehr0c68bef2016-01-20 10:00:23 -08001510 VarList &Args = Func->getArgs();
1511 unsigned NumXmmArgs = 0;
1512 bool XmmSlotsRemain = true;
1513 unsigned NumGprArgs = 0;
1514 bool GprSlotsRemain = true;
1515
1516 Context.init(Func->getEntryNode());
1517 Context.setInsertPoint(Context.getCur());
1518
1519 for (SizeT i = 0, End = Args.size();
1520 i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) {
1521 Variable *Arg = Args[i];
1522 Type Ty = Arg->getType();
1523 Variable *RegisterArg = nullptr;
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001524 RegNumT RegNum;
David Sehr0c68bef2016-01-20 10:00:23 -08001525 if (isVectorType(Ty)) {
1526 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001527 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001528 XmmSlotsRemain = false;
1529 continue;
1530 }
1531 ++NumXmmArgs;
1532 RegisterArg = Func->makeVariable(Ty);
1533 } else if (isScalarFloatingType(Ty)) {
1534 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1535 continue;
1536 }
1537 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001538 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001539 XmmSlotsRemain = false;
1540 continue;
1541 }
1542 ++NumXmmArgs;
1543 RegisterArg = Func->makeVariable(Ty);
1544 } else if (isScalarIntegerType(Ty)) {
1545 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001546 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001547 GprSlotsRemain = false;
1548 continue;
1549 }
1550 ++NumGprArgs;
1551 RegisterArg = Func->makeVariable(Ty);
1552 }
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001553 assert(RegNum.hasValue());
David Sehr0c68bef2016-01-20 10:00:23 -08001554 assert(RegisterArg != nullptr);
1555 // Replace Arg in the argument list with the home register. Then generate
1556 // an instruction in the prolog to copy the home register to the assigned
1557 // location of Arg.
1558 if (BuildDefs::dump())
Jim Stichnotha91c3412016-04-05 15:31:43 -07001559 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
David Sehr0c68bef2016-01-20 10:00:23 -08001560 RegisterArg->setRegNum(RegNum);
1561 RegisterArg->setIsArg();
1562 Arg->setIsArg(false);
1563
1564 Args[i] = RegisterArg;
Jim Stichnothf5319312016-06-10 12:21:17 -07001565 // When not Om1, do the assignment through a temporary, instead of directly
1566 // from the pre-colored variable, so that a subsequent availabilityGet()
1567 // call has a chance to work. (In Om1, don't bother creating extra
1568 // instructions with extra variables to register-allocate.)
1569 if (OptM1) {
1570 Context.insert<InstAssign>(Arg, RegisterArg);
1571 } else {
1572 Variable *Tmp = makeReg(RegisterArg->getType());
1573 Context.insert<InstAssign>(Tmp, RegisterArg);
1574 Context.insert<InstAssign>(Arg, Tmp);
1575 }
David Sehr0c68bef2016-01-20 10:00:23 -08001576 }
Jim Stichnothf5319312016-06-10 12:21:17 -07001577 if (!OptM1)
1578 Context.availabilityUpdate();
David Sehr0c68bef2016-01-20 10:00:23 -08001579}
1580
Andrew Scull57e12682015-09-16 11:30:19 -07001581/// Strength-reduce scalar integer multiplication by a constant (for i32 or
1582/// narrower) for certain constants. The lea instruction can be used to multiply
1583/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1584/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1585/// lea-based multiplies by 5, combined with left-shifting by 2.
John Porto4a566862016-01-04 09:33:41 -08001586template <typename TraitsType>
1587bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1588 int32_t Src1) {
Andrew Scull57e12682015-09-16 11:30:19 -07001589 // Disable this optimization for Om1 and O0, just to keep things simple
1590 // there.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07001591 if (Func->getOptLevel() < Opt_1)
John Porto7e93c622015-06-23 10:58:57 -07001592 return false;
1593 Type Ty = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07001594 if (Src1 == -1) {
John Porto56958cb2016-01-14 09:18:18 -08001595 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001596 _mov(T, Src0);
1597 _neg(T);
1598 _mov(Dest, T);
1599 return true;
1600 }
1601 if (Src1 == 0) {
1602 _mov(Dest, Ctx->getConstantZero(Ty));
1603 return true;
1604 }
1605 if (Src1 == 1) {
John Porto56958cb2016-01-14 09:18:18 -08001606 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001607 _mov(T, Src0);
1608 _mov(Dest, T);
1609 return true;
1610 }
1611 // Don't bother with the edge case where Src1 == MININT.
1612 if (Src1 == -Src1)
1613 return false;
1614 const bool Src1IsNegative = Src1 < 0;
1615 if (Src1IsNegative)
1616 Src1 = -Src1;
1617 uint32_t Count9 = 0;
1618 uint32_t Count5 = 0;
1619 uint32_t Count3 = 0;
1620 uint32_t Count2 = 0;
1621 uint32_t CountOps = 0;
1622 while (Src1 > 1) {
1623 if (Src1 % 9 == 0) {
1624 ++CountOps;
1625 ++Count9;
1626 Src1 /= 9;
1627 } else if (Src1 % 5 == 0) {
1628 ++CountOps;
1629 ++Count5;
1630 Src1 /= 5;
1631 } else if (Src1 % 3 == 0) {
1632 ++CountOps;
1633 ++Count3;
1634 Src1 /= 3;
1635 } else if (Src1 % 2 == 0) {
1636 if (Count2 == 0)
1637 ++CountOps;
1638 ++Count2;
1639 Src1 /= 2;
1640 } else {
1641 return false;
1642 }
1643 }
John Porto56958cb2016-01-14 09:18:18 -08001644 // Lea optimization only works for i16 and i32 types, not i8.
1645 if (Ty != IceType_i32 && !(Traits::Is64Bit && Ty == IceType_i64) &&
1646 (Count3 || Count5 || Count9))
John Porto7e93c622015-06-23 10:58:57 -07001647 return false;
Andrew Scull57e12682015-09-16 11:30:19 -07001648 // Limit the number of lea/shl operations for a single multiply, to a
1649 // somewhat arbitrary choice of 3.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001650 constexpr uint32_t MaxOpsForOptimizedMul = 3;
John Porto7e93c622015-06-23 10:58:57 -07001651 if (CountOps > MaxOpsForOptimizedMul)
1652 return false;
John Porto56958cb2016-01-14 09:18:18 -08001653 Variable *T = makeReg(Traits::WordType);
1654 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) {
1655 _movzx(T, Src0);
1656 } else {
1657 _mov(T, Src0);
1658 }
John Porto7e93c622015-06-23 10:58:57 -07001659 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1660 for (uint32_t i = 0; i < Count9; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001661 constexpr uint16_t Shift = 3; // log2(9-1)
John Porto4a566862016-01-04 09:33:41 -08001662 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001663 }
1664 for (uint32_t i = 0; i < Count5; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001665 constexpr uint16_t Shift = 2; // log2(5-1)
John Porto4a566862016-01-04 09:33:41 -08001666 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001667 }
1668 for (uint32_t i = 0; i < Count3; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001669 constexpr uint16_t Shift = 1; // log2(3-1)
John Porto4a566862016-01-04 09:33:41 -08001670 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001671 }
1672 if (Count2) {
1673 _shl(T, Ctx->getConstantInt(Ty, Count2));
1674 }
1675 if (Src1IsNegative)
1676 _neg(T);
1677 _mov(Dest, T);
1678 return true;
1679}
1680
John Porto4a566862016-01-04 09:33:41 -08001681template <typename TraitsType>
1682void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,
1683 Operand *Src0Lo, Operand *Src0Hi,
1684 Operand *Src1Lo, Variable *DestLo,
1685 Variable *DestHi) {
David Sehr188eae52015-09-24 11:42:55 -07001686 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1687 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1688 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1689 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1690 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1691 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1692 if (ShiftAmount > 32) {
1693 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);
1694 switch (Op) {
1695 default:
1696 assert(0 && "non-shift op");
1697 break;
1698 case InstArithmetic::Shl: {
1699 // a=b<<c ==>
1700 // t2 = b.lo
1701 // t2 = shl t2, ShiftAmount-32
1702 // t3 = t2
1703 // t2 = 0
1704 _mov(T_2, Src0Lo);
1705 _shl(T_2, ReducedShift);
1706 _mov(DestHi, T_2);
1707 _mov(DestLo, Zero);
1708 } break;
1709 case InstArithmetic::Lshr: {
1710 // a=b>>c (unsigned) ==>
1711 // t2 = b.hi
1712 // t2 = shr t2, ShiftAmount-32
1713 // a.lo = t2
1714 // a.hi = 0
1715 _mov(T_2, Src0Hi);
1716 _shr(T_2, ReducedShift);
1717 _mov(DestLo, T_2);
1718 _mov(DestHi, Zero);
1719 } break;
1720 case InstArithmetic::Ashr: {
1721 // a=b>>c (signed) ==>
1722 // t3 = b.hi
1723 // t3 = sar t3, 0x1f
1724 // t2 = b.hi
1725 // t2 = shrd t2, t3, ShiftAmount-32
1726 // a.lo = t2
1727 // a.hi = t3
1728 _mov(T_3, Src0Hi);
1729 _sar(T_3, SignExtend);
1730 _mov(T_2, Src0Hi);
1731 _shrd(T_2, T_3, ReducedShift);
1732 _mov(DestLo, T_2);
1733 _mov(DestHi, T_3);
1734 } break;
1735 }
1736 } else if (ShiftAmount == 32) {
1737 switch (Op) {
1738 default:
1739 assert(0 && "non-shift op");
1740 break;
1741 case InstArithmetic::Shl: {
1742 // a=b<<c ==>
1743 // t2 = b.lo
1744 // a.hi = t2
1745 // a.lo = 0
1746 _mov(T_2, Src0Lo);
1747 _mov(DestHi, T_2);
1748 _mov(DestLo, Zero);
1749 } break;
1750 case InstArithmetic::Lshr: {
1751 // a=b>>c (unsigned) ==>
1752 // t2 = b.hi
1753 // a.lo = t2
1754 // a.hi = 0
1755 _mov(T_2, Src0Hi);
1756 _mov(DestLo, T_2);
1757 _mov(DestHi, Zero);
1758 } break;
1759 case InstArithmetic::Ashr: {
1760 // a=b>>c (signed) ==>
1761 // t2 = b.hi
1762 // a.lo = t2
1763 // t3 = b.hi
1764 // t3 = sar t3, 0x1f
1765 // a.hi = t3
1766 _mov(T_2, Src0Hi);
1767 _mov(DestLo, T_2);
1768 _mov(T_3, Src0Hi);
1769 _sar(T_3, SignExtend);
1770 _mov(DestHi, T_3);
1771 } break;
1772 }
1773 } else {
1774 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1775 // t2 = b.lo
1776 // t3 = b.hi
1777 _mov(T_2, Src0Lo);
1778 _mov(T_3, Src0Hi);
1779 switch (Op) {
1780 default:
1781 assert(0 && "non-shift op");
1782 break;
1783 case InstArithmetic::Shl: {
1784 // a=b<<c ==>
1785 // t3 = shld t3, t2, ShiftAmount
1786 // t2 = shl t2, ShiftAmount
1787 _shld(T_3, T_2, ConstantShiftAmount);
1788 _shl(T_2, ConstantShiftAmount);
1789 } break;
1790 case InstArithmetic::Lshr: {
1791 // a=b>>c (unsigned) ==>
1792 // t2 = shrd t2, t3, ShiftAmount
1793 // t3 = shr t3, ShiftAmount
1794 _shrd(T_2, T_3, ConstantShiftAmount);
1795 _shr(T_3, ConstantShiftAmount);
1796 } break;
1797 case InstArithmetic::Ashr: {
1798 // a=b>>c (signed) ==>
1799 // t2 = shrd t2, t3, ShiftAmount
1800 // t3 = sar t3, ShiftAmount
1801 _shrd(T_2, T_3, ConstantShiftAmount);
1802 _sar(T_3, ConstantShiftAmount);
1803 } break;
1804 }
1805 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1806 // a.lo = t2
1807 // a.hi = t3
1808 _mov(DestLo, T_2);
1809 _mov(DestHi, T_3);
1810 }
1811 } else {
1812 // NON-CONSTANT CASES.
1813 Constant *BitTest = Ctx->getConstantInt32(0x20);
John Porto4a566862016-01-04 09:33:41 -08001814 InstX86Label *Label = InstX86Label::create(Func, this);
David Sehr188eae52015-09-24 11:42:55 -07001815 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1816 // t1:ecx = c.lo & 0xff
1817 // t2 = b.lo
1818 // t3 = b.hi
Jim Stichnothc59288b2015-11-09 11:38:40 -08001819 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
David Sehr188eae52015-09-24 11:42:55 -07001820 _mov(T_2, Src0Lo);
1821 _mov(T_3, Src0Hi);
1822 switch (Op) {
1823 default:
1824 assert(0 && "non-shift op");
1825 break;
1826 case InstArithmetic::Shl: {
1827 // a=b<<c ==>
1828 // t3 = shld t3, t2, t1
1829 // t2 = shl t2, t1
1830 // test t1, 0x20
1831 // je L1
1832 // use(t3)
1833 // t3 = t2
1834 // t2 = 0
1835 _shld(T_3, T_2, T_1);
1836 _shl(T_2, T_1);
1837 _test(T_1, BitTest);
1838 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001839 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001840 // flow, so we need to use _redefined to avoid liveness problems.
1841 _redefined(_mov(T_3, T_2));
1842 _redefined(_mov(T_2, Zero));
David Sehr188eae52015-09-24 11:42:55 -07001843 } break;
1844 case InstArithmetic::Lshr: {
1845 // a=b>>c (unsigned) ==>
1846 // t2 = shrd t2, t3, t1
1847 // t3 = shr t3, t1
1848 // test t1, 0x20
1849 // je L1
1850 // use(t2)
1851 // t2 = t3
1852 // t3 = 0
1853 _shrd(T_2, T_3, T_1);
1854 _shr(T_3, T_1);
1855 _test(T_1, BitTest);
1856 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001857 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001858 // flow, so we need to use _redefined to avoid liveness problems.
1859 _redefined(_mov(T_2, T_3));
1860 _redefined(_mov(T_3, Zero));
David Sehr188eae52015-09-24 11:42:55 -07001861 } break;
1862 case InstArithmetic::Ashr: {
1863 // a=b>>c (signed) ==>
1864 // t2 = shrd t2, t3, t1
1865 // t3 = sar t3, t1
1866 // test t1, 0x20
1867 // je L1
1868 // use(t2)
1869 // t2 = t3
1870 // t3 = sar t3, 0x1f
1871 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1872 _shrd(T_2, T_3, T_1);
1873 _sar(T_3, T_1);
1874 _test(T_1, BitTest);
1875 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001876 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001877 // flow, so T_2 needs to use _redefined to avoid liveness problems. T_3
1878 // doesn't need special treatment because it is reassigned via _sar
1879 // instead of _mov.
1880 _redefined(_mov(T_2, T_3));
David Sehr188eae52015-09-24 11:42:55 -07001881 _sar(T_3, SignExtend);
1882 } break;
1883 }
1884 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1885 // L1:
1886 // a.lo = t2
1887 // a.hi = t3
1888 Context.insert(Label);
1889 _mov(DestLo, T_2);
1890 _mov(DestHi, T_3);
1891 }
1892}
1893
John Porto4a566862016-01-04 09:33:41 -08001894template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001895void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
1896 Variable *Dest = Instr->getDest();
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001897 if (Dest->isRematerializable()) {
John Porto1d937a82015-12-17 06:19:34 -08001898 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001899 return;
1900 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001901 Type Ty = Dest->getType();
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001902 Operand *Src0 = legalize(Instr->getSrc(0));
1903 Operand *Src1 = legalize(Instr->getSrc(1));
1904 if (Instr->isCommutative()) {
David Sehr487bad02015-10-06 17:41:26 -07001905 uint32_t SwapCount = 0;
1906 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001907 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001908 ++SwapCount;
1909 }
1910 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001911 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001912 ++SwapCount;
1913 }
1914 // Improve two-address code patterns by avoiding a copy to the dest
1915 // register when one of the source operands ends its lifetime here.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001916 if (!Instr->isLastUse(Src0) && Instr->isLastUse(Src1)) {
David Sehr487bad02015-10-06 17:41:26 -07001917 std::swap(Src0, Src1);
1918 ++SwapCount;
1919 }
1920 assert(SwapCount <= 1);
Karl Schimpfa313a122015-10-08 10:40:57 -07001921 (void)SwapCount;
John Porto7e93c622015-06-23 10:58:57 -07001922 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001923 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto1d235422015-08-12 12:37:53 -07001924 // These x86-32 helper-call-involved instructions are lowered in this
Andrew Scull57e12682015-09-16 11:30:19 -07001925 // separate switch. This is because loOperand() and hiOperand() may insert
1926 // redundant instructions for constant blinding and pooling. Such redundant
1927 // instructions will fail liveness analysis under -Om1 setting. And,
1928 // actually these arguments do not need to be processed with loOperand()
1929 // and hiOperand() to be used.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001930 switch (Instr->getOp()) {
David Sehr26217e32015-11-26 13:03:50 -08001931 case InstArithmetic::Udiv:
1932 case InstArithmetic::Sdiv:
1933 case InstArithmetic::Urem:
1934 case InstArithmetic::Srem:
1935 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07001936 return;
John Porto7e93c622015-06-23 10:58:57 -07001937 default:
1938 break;
1939 }
1940
Jim Stichnoth54f3d512015-12-11 09:53:00 -08001941 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1942 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07001943 Operand *Src0Lo = loOperand(Src0);
1944 Operand *Src0Hi = hiOperand(Src0);
1945 Operand *Src1Lo = loOperand(Src1);
1946 Operand *Src1Hi = hiOperand(Src1);
1947 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001948 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07001949 case InstArithmetic::_num:
1950 llvm_unreachable("Unknown arithmetic operator");
1951 break;
1952 case InstArithmetic::Add:
1953 _mov(T_Lo, Src0Lo);
1954 _add(T_Lo, Src1Lo);
1955 _mov(DestLo, T_Lo);
1956 _mov(T_Hi, Src0Hi);
1957 _adc(T_Hi, Src1Hi);
1958 _mov(DestHi, T_Hi);
1959 break;
1960 case InstArithmetic::And:
1961 _mov(T_Lo, Src0Lo);
1962 _and(T_Lo, Src1Lo);
1963 _mov(DestLo, T_Lo);
1964 _mov(T_Hi, Src0Hi);
1965 _and(T_Hi, Src1Hi);
1966 _mov(DestHi, T_Hi);
1967 break;
1968 case InstArithmetic::Or:
1969 _mov(T_Lo, Src0Lo);
1970 _or(T_Lo, Src1Lo);
1971 _mov(DestLo, T_Lo);
1972 _mov(T_Hi, Src0Hi);
1973 _or(T_Hi, Src1Hi);
1974 _mov(DestHi, T_Hi);
1975 break;
1976 case InstArithmetic::Xor:
1977 _mov(T_Lo, Src0Lo);
1978 _xor(T_Lo, Src1Lo);
1979 _mov(DestLo, T_Lo);
1980 _mov(T_Hi, Src0Hi);
1981 _xor(T_Hi, Src1Hi);
1982 _mov(DestHi, T_Hi);
1983 break;
1984 case InstArithmetic::Sub:
1985 _mov(T_Lo, Src0Lo);
1986 _sub(T_Lo, Src1Lo);
1987 _mov(DestLo, T_Lo);
1988 _mov(T_Hi, Src0Hi);
1989 _sbb(T_Hi, Src1Hi);
1990 _mov(DestHi, T_Hi);
1991 break;
1992 case InstArithmetic::Mul: {
1993 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07001994 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1995 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07001996 // gcc does the following:
1997 // a=b*c ==>
1998 // t1 = b.hi; t1 *=(imul) c.lo
1999 // t2 = c.hi; t2 *=(imul) b.lo
2000 // t3:eax = b.lo
2001 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
2002 // a.lo = t4.lo
2003 // t4.hi += t1
2004 // t4.hi += t2
2005 // a.hi = t4.hi
2006 // The mul instruction cannot take an immediate operand.
2007 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
2008 _mov(T_1, Src0Hi);
2009 _imul(T_1, Src1Lo);
John Porto5d0acff2015-06-30 15:29:21 -07002010 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07002011 _mul(T_4Lo, T_3, Src1Lo);
Andrew Scull57e12682015-09-16 11:30:19 -07002012 // The mul instruction produces two dest variables, edx:eax. We create a
2013 // fake definition of edx to account for this.
John Porto1d937a82015-12-17 06:19:34 -08002014 Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
Jim Stichnoth28df6ba2016-02-05 15:43:24 -08002015 Context.insert<InstFakeUse>(T_4Hi);
John Porto7e93c622015-06-23 10:58:57 -07002016 _mov(DestLo, T_4Lo);
2017 _add(T_4Hi, T_1);
Jim Stichnothb40595a2016-01-29 06:14:31 -08002018 _mov(T_2, Src1Hi);
2019 _imul(T_2, Src0Lo);
John Porto7e93c622015-06-23 10:58:57 -07002020 _add(T_4Hi, T_2);
2021 _mov(DestHi, T_4Hi);
2022 } break;
David Sehr188eae52015-09-24 11:42:55 -07002023 case InstArithmetic::Shl:
2024 case InstArithmetic::Lshr:
2025 case InstArithmetic::Ashr:
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002026 lowerShift64(Instr->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi);
David Sehr188eae52015-09-24 11:42:55 -07002027 break;
John Porto7e93c622015-06-23 10:58:57 -07002028 case InstArithmetic::Fadd:
2029 case InstArithmetic::Fsub:
2030 case InstArithmetic::Fmul:
2031 case InstArithmetic::Fdiv:
2032 case InstArithmetic::Frem:
2033 llvm_unreachable("FP instruction with i64 type");
2034 break;
2035 case InstArithmetic::Udiv:
2036 case InstArithmetic::Sdiv:
2037 case InstArithmetic::Urem:
2038 case InstArithmetic::Srem:
2039 llvm_unreachable("Call-helper-involved instruction for i64 type \
2040 should have already been handled before");
2041 break;
2042 }
2043 return;
2044 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002045 if (isVectorType(Ty)) {
Andrew Scull57e12682015-09-16 11:30:19 -07002046 // TODO: Trap on integer divide and integer modulo by zero. See:
2047 // https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto4a566862016-01-04 09:33:41 -08002048 if (llvm::isa<X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07002049 Src1 = legalizeToReg(Src1);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002050 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07002051 case InstArithmetic::_num:
2052 llvm_unreachable("Unknown arithmetic operator");
2053 break;
2054 case InstArithmetic::Add: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002055 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002056 _movp(T, Src0);
2057 _padd(T, Src1);
2058 _movp(Dest, T);
2059 } break;
2060 case InstArithmetic::And: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002061 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002062 _movp(T, Src0);
2063 _pand(T, Src1);
2064 _movp(Dest, T);
2065 } break;
2066 case InstArithmetic::Or: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002067 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002068 _movp(T, Src0);
2069 _por(T, Src1);
2070 _movp(Dest, T);
2071 } break;
2072 case InstArithmetic::Xor: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002073 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002074 _movp(T, Src0);
2075 _pxor(T, Src1);
2076 _movp(Dest, T);
2077 } break;
2078 case InstArithmetic::Sub: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002079 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002080 _movp(T, Src0);
2081 _psub(T, Src1);
2082 _movp(Dest, T);
2083 } break;
2084 case InstArithmetic::Mul: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002085 bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
John Porto7e93c622015-06-23 10:58:57 -07002086 bool InstructionSetIsValidForPmull =
Jim Stichnothc59288b2015-11-09 11:38:40 -08002087 Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07002088 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002089 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002090 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002091 _pmull(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002092 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002093 } else if (Ty == IceType_v4i32) {
John Porto7e93c622015-06-23 10:58:57 -07002094 // Lowering sequence:
2095 // Note: The mask arguments have index 0 on the left.
2096 //
2097 // movups T1, Src0
2098 // pshufd T2, Src0, {1,0,3,0}
2099 // pshufd T3, Src1, {1,0,3,0}
2100 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
2101 // pmuludq T1, Src1
2102 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
2103 // pmuludq T2, T3
2104 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
2105 // shufps T1, T2, {0,2,0,2}
2106 // pshufd T4, T1, {0,2,1,3}
2107 // movups Dest, T4
2108
2109 // Mask that directs pshufd to create a vector with entries
2110 // Src[1, 0, 3, 0]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002111 constexpr unsigned Constant1030 = 0x31;
John Porto7e93c622015-06-23 10:58:57 -07002112 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
2113 // Mask that directs shufps to create a vector with entries
2114 // Dest[0, 2], Src[0, 2]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002115 constexpr unsigned Mask0202 = 0x88;
John Porto7e93c622015-06-23 10:58:57 -07002116 // Mask that directs pshufd to create a vector with entries
2117 // Src[0, 2, 1, 3]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002118 constexpr unsigned Mask0213 = 0xd8;
John Porto7e93c622015-06-23 10:58:57 -07002119 Variable *T1 = makeReg(IceType_v4i32);
2120 Variable *T2 = makeReg(IceType_v4i32);
2121 Variable *T3 = makeReg(IceType_v4i32);
2122 Variable *T4 = makeReg(IceType_v4i32);
2123 _movp(T1, Src0);
2124 _pshufd(T2, Src0, Mask1030);
2125 _pshufd(T3, Src1, Mask1030);
2126 _pmuludq(T1, Src1);
2127 _pmuludq(T2, T3);
2128 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
2129 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
2130 _movp(Dest, T4);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002131 } else if (Ty == IceType_v16i8) {
David Sehr26217e32015-11-26 13:03:50 -08002132 llvm::report_fatal_error("Scalarized operation was expected");
Jim Stichnothebbb5912015-10-05 15:12:09 -07002133 } else {
2134 llvm::report_fatal_error("Invalid vector multiply type");
John Porto7e93c622015-06-23 10:58:57 -07002135 }
2136 } break;
2137 case InstArithmetic::Shl:
2138 case InstArithmetic::Lshr:
2139 case InstArithmetic::Ashr:
2140 case InstArithmetic::Udiv:
2141 case InstArithmetic::Urem:
2142 case InstArithmetic::Sdiv:
2143 case InstArithmetic::Srem:
David Sehr26217e32015-11-26 13:03:50 -08002144 llvm::report_fatal_error("Scalarized operation was expected");
John Porto7e93c622015-06-23 10:58:57 -07002145 break;
2146 case InstArithmetic::Fadd: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002147 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002148 _movp(T, Src0);
2149 _addps(T, Src1);
2150 _movp(Dest, T);
2151 } break;
2152 case InstArithmetic::Fsub: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002153 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002154 _movp(T, Src0);
2155 _subps(T, Src1);
2156 _movp(Dest, T);
2157 } break;
2158 case InstArithmetic::Fmul: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002159 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002160 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002161 _mulps(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002162 _movp(Dest, T);
2163 } break;
2164 case InstArithmetic::Fdiv: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002165 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002166 _movp(T, Src0);
2167 _divps(T, Src1);
2168 _movp(Dest, T);
2169 } break;
2170 case InstArithmetic::Frem:
David Sehr26217e32015-11-26 13:03:50 -08002171 llvm::report_fatal_error("Scalarized operation was expected");
John Porto7e93c622015-06-23 10:58:57 -07002172 break;
2173 }
2174 return;
2175 }
2176 Variable *T_edx = nullptr;
2177 Variable *T = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002178 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07002179 case InstArithmetic::_num:
2180 llvm_unreachable("Unknown arithmetic operator");
2181 break;
2182 case InstArithmetic::Add:
2183 _mov(T, Src0);
2184 _add(T, Src1);
2185 _mov(Dest, T);
2186 break;
2187 case InstArithmetic::And:
2188 _mov(T, Src0);
2189 _and(T, Src1);
2190 _mov(Dest, T);
2191 break;
2192 case InstArithmetic::Or:
2193 _mov(T, Src0);
2194 _or(T, Src1);
2195 _mov(Dest, T);
2196 break;
2197 case InstArithmetic::Xor:
2198 _mov(T, Src0);
2199 _xor(T, Src1);
2200 _mov(Dest, T);
2201 break;
2202 case InstArithmetic::Sub:
2203 _mov(T, Src0);
2204 _sub(T, Src1);
2205 _mov(Dest, T);
2206 break;
2207 case InstArithmetic::Mul:
2208 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2209 if (optimizeScalarMul(Dest, Src0, C->getValue()))
2210 return;
2211 }
Andrew Scull57e12682015-09-16 11:30:19 -07002212 // The 8-bit version of imul only allows the form "imul r/m8" where T must
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002213 // be in al.
Jim Stichnothc59288b2015-11-09 11:38:40 -08002214 if (isByteSizedArithType(Ty)) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002215 _mov(T, Src0, Traits::RegisterSet::Reg_al);
John Porto7e93c622015-06-23 10:58:57 -07002216 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
David Sehre11f8782015-10-06 10:26:57 -07002217 _imul(T, Src0 == Src1 ? T : Src1);
2218 _mov(Dest, T);
2219 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002220 T = makeReg(Ty);
David Sehre11f8782015-10-06 10:26:57 -07002221 _imul_imm(T, Src0, ImmConst);
2222 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002223 } else {
2224 _mov(T, Src0);
David Sehre11f8782015-10-06 10:26:57 -07002225 _imul(T, Src0 == Src1 ? T : Src1);
2226 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002227 }
John Porto7e93c622015-06-23 10:58:57 -07002228 break;
2229 case InstArithmetic::Shl:
2230 _mov(T, Src0);
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07002231 if (!llvm::isa<ConstantInteger32>(Src1) &&
2232 !llvm::isa<ConstantInteger64>(Src1))
Jim Stichnothc59288b2015-11-09 11:38:40 -08002233 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002234 _shl(T, Src1);
2235 _mov(Dest, T);
2236 break;
2237 case InstArithmetic::Lshr:
2238 _mov(T, Src0);
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07002239 if (!llvm::isa<ConstantInteger32>(Src1) &&
2240 !llvm::isa<ConstantInteger64>(Src1))
Jim Stichnothc59288b2015-11-09 11:38:40 -08002241 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002242 _shr(T, Src1);
2243 _mov(Dest, T);
2244 break;
2245 case InstArithmetic::Ashr:
2246 _mov(T, Src0);
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07002247 if (!llvm::isa<ConstantInteger32>(Src1) &&
2248 !llvm::isa<ConstantInteger64>(Src1))
Jim Stichnothc59288b2015-11-09 11:38:40 -08002249 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002250 _sar(T, Src1);
2251 _mov(Dest, T);
2252 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002253 case InstArithmetic::Udiv: {
John Porto7e93c622015-06-23 10:58:57 -07002254 // div and idiv are the few arithmetic operators that do not allow
2255 // immediates as the operand.
2256 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002257 RegNumT Eax;
2258 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002259 switch (Ty) {
2260 default:
John Porto3c275ce2015-12-22 08:14:00 -08002261 llvm::report_fatal_error("Bad type for udiv");
2262 case IceType_i64:
2263 Eax = Traits::getRaxOrDie();
2264 Edx = Traits::getRdxOrDie();
John Porto008f4ce2015-12-24 13:22:18 -08002265 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002266 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002267 Eax = Traits::RegisterSet::Reg_eax;
2268 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002269 break;
2270 case IceType_i16:
2271 Eax = Traits::RegisterSet::Reg_ax;
2272 Edx = Traits::RegisterSet::Reg_dx;
2273 break;
2274 case IceType_i8:
2275 Eax = Traits::RegisterSet::Reg_al;
2276 Edx = Traits::RegisterSet::Reg_ah;
2277 break;
John Porto7e93c622015-06-23 10:58:57 -07002278 }
John Porto008f4ce2015-12-24 13:22:18 -08002279 T_edx = makeReg(Ty, Edx);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002280 _mov(T, Src0, Eax);
John Porto008f4ce2015-12-24 13:22:18 -08002281 _mov(T_edx, Ctx->getConstantZero(Ty));
Jim Stichnothc59288b2015-11-09 11:38:40 -08002282 _div(T, Src1, T_edx);
2283 _mov(Dest, T);
2284 } break;
John Porto7e93c622015-06-23 10:58:57 -07002285 case InstArithmetic::Sdiv:
Andrew Scull57e12682015-09-16 11:30:19 -07002286 // TODO(stichnot): Enable this after doing better performance and cross
2287 // testing.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07002288 if (false && Func->getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07002289 // Optimize division by constant power of 2, but not for Om1 or O0, just
2290 // to keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07002291 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002292 const int32_t Divisor = C->getValue();
2293 const uint32_t UDivisor = Divisor;
John Porto7e93c622015-06-23 10:58:57 -07002294 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2295 uint32_t LogDiv = llvm::Log2_32(UDivisor);
John Porto7e93c622015-06-23 10:58:57 -07002296 // LLVM does the following for dest=src/(1<<log):
2297 // t=src
2298 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2299 // shr t,typewidth-log
2300 // add t,src
2301 // sar t,log
2302 // dest=t
2303 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2304 _mov(T, Src0);
Andrew Scull57e12682015-09-16 11:30:19 -07002305 // If for some reason we are dividing by 1, just treat it like an
2306 // assignment.
John Porto7e93c622015-06-23 10:58:57 -07002307 if (LogDiv > 0) {
2308 // The initial sar is unnecessary when dividing by 2.
2309 if (LogDiv > 1)
2310 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2311 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2312 _add(T, Src0);
2313 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
2314 }
2315 _mov(Dest, T);
2316 return;
2317 }
2318 }
2319 }
2320 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002321 switch (Ty) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002322 default:
John Porto3c275ce2015-12-22 08:14:00 -08002323 llvm::report_fatal_error("Bad type for sdiv");
2324 case IceType_i64:
2325 T_edx = makeReg(Ty, Traits::getRdxOrDie());
2326 _mov(T, Src0, Traits::getRaxOrDie());
2327 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002328 case IceType_i32:
2329 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
John Porto5d0acff2015-06-30 15:29:21 -07002330 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002331 break;
2332 case IceType_i16:
2333 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
2334 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
2335 break;
2336 case IceType_i8:
2337 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
2338 _mov(T, Src0, Traits::RegisterSet::Reg_al);
2339 break;
John Porto7e93c622015-06-23 10:58:57 -07002340 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002341 _cbwdq(T_edx, T);
2342 _idiv(T, Src1, T_edx);
2343 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002344 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002345 case InstArithmetic::Urem: {
John Porto7e93c622015-06-23 10:58:57 -07002346 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002347 RegNumT Eax;
2348 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002349 switch (Ty) {
2350 default:
John Porto3c275ce2015-12-22 08:14:00 -08002351 llvm::report_fatal_error("Bad type for urem");
2352 case IceType_i64:
2353 Eax = Traits::getRaxOrDie();
2354 Edx = Traits::getRdxOrDie();
2355 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002356 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002357 Eax = Traits::RegisterSet::Reg_eax;
2358 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002359 break;
2360 case IceType_i16:
2361 Eax = Traits::RegisterSet::Reg_ax;
2362 Edx = Traits::RegisterSet::Reg_dx;
2363 break;
2364 case IceType_i8:
2365 Eax = Traits::RegisterSet::Reg_al;
2366 Edx = Traits::RegisterSet::Reg_ah;
2367 break;
John Porto7e93c622015-06-23 10:58:57 -07002368 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002369 T_edx = makeReg(Ty, Edx);
2370 _mov(T_edx, Ctx->getConstantZero(Ty));
2371 _mov(T, Src0, Eax);
2372 _div(T_edx, Src1, T);
Jim Stichnoth2655d962016-04-21 05:38:15 -07002373 if (Ty == IceType_i8) {
2374 // Register ah must be moved into one of {al,bl,cl,dl} before it can be
2375 // moved into a general 8-bit register.
2376 auto *T_AhRcvr = makeReg(Ty);
2377 T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
2378 _mov(T_AhRcvr, T_edx);
2379 T_edx = T_AhRcvr;
2380 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002381 _mov(Dest, T_edx);
2382 } break;
2383 case InstArithmetic::Srem: {
Andrew Scull57e12682015-09-16 11:30:19 -07002384 // TODO(stichnot): Enable this after doing better performance and cross
2385 // testing.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07002386 if (false && Func->getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07002387 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
2388 // keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07002389 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002390 const int32_t Divisor = C->getValue();
2391 const uint32_t UDivisor = Divisor;
John Porto7e93c622015-06-23 10:58:57 -07002392 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2393 uint32_t LogDiv = llvm::Log2_32(UDivisor);
John Porto7e93c622015-06-23 10:58:57 -07002394 // LLVM does the following for dest=src%(1<<log):
2395 // t=src
2396 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2397 // shr t,typewidth-log
2398 // add t,src
2399 // and t, -(1<<log)
2400 // sub t,src
2401 // neg t
2402 // dest=t
2403 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2404 // If for some reason we are dividing by 1, just assign 0.
2405 if (LogDiv == 0) {
2406 _mov(Dest, Ctx->getConstantZero(Ty));
2407 return;
2408 }
2409 _mov(T, Src0);
2410 // The initial sar is unnecessary when dividing by 2.
2411 if (LogDiv > 1)
2412 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2413 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2414 _add(T, Src0);
2415 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
2416 _sub(T, Src0);
2417 _neg(T);
2418 _mov(Dest, T);
2419 return;
2420 }
2421 }
2422 }
2423 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002424 RegNumT Eax;
2425 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002426 switch (Ty) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002427 default:
John Porto3c275ce2015-12-22 08:14:00 -08002428 llvm::report_fatal_error("Bad type for srem");
2429 case IceType_i64:
2430 Eax = Traits::getRaxOrDie();
2431 Edx = Traits::getRdxOrDie();
2432 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002433 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002434 Eax = Traits::RegisterSet::Reg_eax;
2435 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002436 break;
2437 case IceType_i16:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002438 Eax = Traits::RegisterSet::Reg_ax;
2439 Edx = Traits::RegisterSet::Reg_dx;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002440 break;
2441 case IceType_i8:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002442 Eax = Traits::RegisterSet::Reg_al;
2443 Edx = Traits::RegisterSet::Reg_ah;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002444 break;
John Porto7e93c622015-06-23 10:58:57 -07002445 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002446 T_edx = makeReg(Ty, Edx);
2447 _mov(T, Src0, Eax);
2448 _cbwdq(T_edx, T);
2449 _idiv(T_edx, Src1, T);
Jim Stichnoth2655d962016-04-21 05:38:15 -07002450 if (Ty == IceType_i8) {
2451 // Register ah must be moved into one of {al,bl,cl,dl} before it can be
2452 // moved into a general 8-bit register.
2453 auto *T_AhRcvr = makeReg(Ty);
2454 T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
2455 _mov(T_AhRcvr, T_edx);
2456 T_edx = T_AhRcvr;
2457 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002458 _mov(Dest, T_edx);
2459 } break;
John Porto7e93c622015-06-23 10:58:57 -07002460 case InstArithmetic::Fadd:
2461 _mov(T, Src0);
2462 _addss(T, Src1);
2463 _mov(Dest, T);
2464 break;
2465 case InstArithmetic::Fsub:
2466 _mov(T, Src0);
2467 _subss(T, Src1);
2468 _mov(Dest, T);
2469 break;
2470 case InstArithmetic::Fmul:
2471 _mov(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002472 _mulss(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002473 _mov(Dest, T);
2474 break;
2475 case InstArithmetic::Fdiv:
2476 _mov(T, Src0);
2477 _divss(T, Src1);
2478 _mov(Dest, T);
2479 break;
David Sehr26217e32015-11-26 13:03:50 -08002480 case InstArithmetic::Frem:
2481 llvm::report_fatal_error("Helper call was expected");
2482 break;
John Porto7e93c622015-06-23 10:58:57 -07002483 }
2484}
2485
John Porto4a566862016-01-04 09:33:41 -08002486template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002487void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Instr) {
2488 Variable *Dest = Instr->getDest();
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08002489 if (Dest->isRematerializable()) {
John Porto1d937a82015-12-17 06:19:34 -08002490 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08002491 return;
2492 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002493 Operand *Src = Instr->getSrc(0);
David Sehre3984282015-12-15 17:34:55 -08002494 assert(Dest->getType() == Src->getType());
2495 lowerMove(Dest, Src, false);
John Porto7e93c622015-06-23 10:58:57 -07002496}
2497
John Porto4a566862016-01-04 09:33:41 -08002498template <typename TraitsType>
2499void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) {
David Sehre3984282015-12-15 17:34:55 -08002500 if (Br->isUnconditional()) {
2501 _br(Br->getTargetUnconditional());
John Porto7e93c622015-06-23 10:58:57 -07002502 return;
2503 }
David Sehre3984282015-12-15 17:34:55 -08002504 Operand *Cond = Br->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07002505
2506 // Handle folding opportunities.
David Sehre3984282015-12-15 17:34:55 -08002507 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
John Porto7e93c622015-06-23 10:58:57 -07002508 assert(Producer->isDeleted());
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002509 switch (BoolFolding<Traits>::getProducerKind(Producer)) {
John Porto7e93c622015-06-23 10:58:57 -07002510 default:
2511 break;
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002512 case BoolFolding<Traits>::PK_Icmp32:
2513 case BoolFolding<Traits>::PK_Icmp64: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002514 lowerIcmpAndConsumer(llvm::cast<InstIcmp>(Producer), Br);
John Porto7e93c622015-06-23 10:58:57 -07002515 return;
2516 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002517 case BoolFolding<Traits>::PK_Fcmp: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002518 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br);
David Sehrdaf096c2015-11-11 10:56:58 -08002519 return;
2520 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002521 case BoolFolding<Traits>::PK_Arith: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002522 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br);
David Sehrdaf096c2015-11-11 10:56:58 -08002523 return;
2524 }
John Porto7e93c622015-06-23 10:58:57 -07002525 }
2526 }
John Porto7e93c622015-06-23 10:58:57 -07002527 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2528 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2529 _cmp(Src0, Zero);
David Sehre3984282015-12-15 17:34:55 -08002530 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07002531}
2532
David Sehr0c68bef2016-01-20 10:00:23 -08002533// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
2534// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
2535inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {
2536 return S0 < S1 ? S1 : S0;
2537}
2538
2539template <typename TraitsType>
2540void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
2541 // Common x86 calling convention lowering:
2542 //
2543 // * At the point before the call, the stack must be aligned to 16 bytes.
2544 //
2545 // * Non-register arguments are pushed onto the stack in right-to-left order,
2546 // such that the left-most argument ends up on the top of the stack at the
2547 // lowest memory address.
2548 //
2549 // * Stack arguments of vector type are aligned to start at the next highest
2550 // multiple of 16 bytes. Other stack arguments are aligned to the next word
2551 // size boundary (4 or 8 bytes, respectively).
2552 NeedsStackAlignment = true;
2553
2554 using OperandList =
2555 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
2556 Traits::X86_MAX_GPR_ARGS)>;
2557 OperandList XmmArgs;
2558 CfgVector<std::pair<const Type, Operand *>> GprArgs;
2559 OperandList StackArgs, StackArgLocations;
2560 uint32_t ParameterAreaSizeBytes = 0;
2561
2562 // Classify each argument operand according to the location where the argument
2563 // is passed.
2564 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2565 Operand *Arg = Instr->getArg(i);
2566 const Type Ty = Arg->getType();
2567 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2568 assert(typeWidthInBytes(Ty) >= 4);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002569 if (isVectorType(Ty) &&
2570 Traits::getRegisterForXmmArgNum(XmmArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002571 XmmArgs.push_back(Arg);
2572 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002573 Traits::getRegisterForXmmArgNum(XmmArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002574 XmmArgs.push_back(Arg);
2575 } else if (isScalarIntegerType(Ty) &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002576 Traits::getRegisterForGprArgNum(Ty, GprArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002577 GprArgs.emplace_back(Ty, Arg);
2578 } else {
2579 // Place on stack.
2580 StackArgs.push_back(Arg);
2581 if (isVectorType(Arg->getType())) {
2582 ParameterAreaSizeBytes =
2583 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2584 }
2585 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
2586 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2587 StackArgLocations.push_back(
2588 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2589 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2590 }
2591 }
2592 // Ensure there is enough space for the fstp/movs for floating returns.
2593 Variable *Dest = Instr->getDest();
2594 const Type DestTy = Dest ? Dest->getType() : IceType_void;
John Porto4ab4fbe2016-01-20 13:44:30 -08002595 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
David Sehr0c68bef2016-01-20 10:00:23 -08002596 if (isScalarFloatingType(DestTy)) {
2597 ParameterAreaSizeBytes =
2598 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
2599 typeWidthInBytesOnStack(DestTy));
2600 }
2601 }
2602 // Adjust the parameter area so that the stack is aligned. It is assumed that
2603 // the stack is already aligned at the start of the calling sequence.
2604 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2605 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
2606 // Copy arguments that are passed on the stack to the appropriate stack
Jim Stichnothf5319312016-06-10 12:21:17 -07002607 // locations. We make sure legalize() is called on each argument at this
2608 // point, to allow availabilityGet() to work.
David Sehr0c68bef2016-01-20 10:00:23 -08002609 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
Jim Stichnothf5319312016-06-10 12:21:17 -07002610 lowerStore(
2611 InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
David Sehr0c68bef2016-01-20 10:00:23 -08002612 }
2613 // Copy arguments to be passed in registers to the appropriate registers.
2614 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothf5319312016-06-10 12:21:17 -07002615 XmmArgs[i] =
2616 legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
David Sehr0c68bef2016-01-20 10:00:23 -08002617 }
2618 // Materialize moves for arguments passed in GPRs.
2619 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
2620 const Type SignatureTy = GprArgs[i].first;
Jim Stichnothe4506562016-06-20 06:44:07 -07002621 Operand *Arg =
2622 legalize(GprArgs[i].second, Legal_Default | Legal_Rematerializable);
Jim Stichnothf5319312016-06-10 12:21:17 -07002623 GprArgs[i].second =
David Sehr0c68bef2016-01-20 10:00:23 -08002624 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
2625 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
2626 assert(SignatureTy == Arg->getType());
2627 (void)SignatureTy;
Jim Stichnothf5319312016-06-10 12:21:17 -07002628 }
2629 // Generate a FakeUse of register arguments so that they do not get dead code
2630 // eliminated as a result of the FakeKill of scratch registers after the call.
2631 // These need to be right before the call instruction.
2632 for (auto *Arg : XmmArgs) {
2633 Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
2634 }
2635 for (auto &ArgPair : GprArgs) {
2636 Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
David Sehr0c68bef2016-01-20 10:00:23 -08002637 }
2638 // Generate the call instruction. Assign its result to a temporary with high
2639 // register allocation weight.
2640 // ReturnReg doubles as ReturnRegLo as necessary.
2641 Variable *ReturnReg = nullptr;
2642 Variable *ReturnRegHi = nullptr;
2643 if (Dest) {
2644 switch (DestTy) {
2645 case IceType_NUM:
2646 case IceType_void:
2647 case IceType_i1:
2648 case IceType_i8:
2649 case IceType_i16:
2650 llvm::report_fatal_error("Invalid Call dest type");
2651 break;
2652 case IceType_i32:
2653 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
2654 break;
2655 case IceType_i64:
2656 if (Traits::Is64Bit) {
Jim Stichnothee1aae82016-02-02 09:29:21 -08002657 ReturnReg = makeReg(IceType_i64, Traits::getRaxOrDie());
David Sehr0c68bef2016-01-20 10:00:23 -08002658 } else {
2659 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2660 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2661 }
2662 break;
2663 case IceType_f32:
2664 case IceType_f64:
2665 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2666 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2667 // the fstp instruction.
2668 break;
2669 }
2670 // Fallthrough intended.
2671 case IceType_v4i1:
2672 case IceType_v8i1:
2673 case IceType_v16i1:
2674 case IceType_v16i8:
2675 case IceType_v8i16:
2676 case IceType_v4i32:
2677 case IceType_v4f32:
2678 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
2679 break;
2680 }
2681 }
2682 // Emit the call to the function.
2683 Operand *CallTarget =
2684 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
2685 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg);
2686 // Keep the upper return register live on 32-bit platform.
2687 if (ReturnRegHi)
2688 Context.insert<InstFakeDef>(ReturnRegHi);
2689 // Mark the call as killing all the caller-save registers.
2690 Context.insert<InstFakeKill>(NewCall);
2691 // Handle x86-32 floating point returns.
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002692 if (Dest != nullptr && isScalarFloatingType(DestTy) &&
David Sehr0c68bef2016-01-20 10:00:23 -08002693 !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2694 // Special treatment for an FP function which returns its result in st(0).
2695 // If Dest ends up being a physical xmm register, the fstp emit code will
2696 // route st(0) through the space reserved in the function argument area
2697 // we allocated.
2698 _fstp(Dest);
2699 // Create a fake use of Dest in case it actually isn't used, because st(0)
2700 // still needs to be popped.
2701 Context.insert<InstFakeUse>(Dest);
2702 }
2703 // Generate a FakeUse to keep the call live if necessary.
2704 if (Instr->hasSideEffects() && ReturnReg) {
2705 Context.insert<InstFakeUse>(ReturnReg);
2706 }
2707 // Process the return value, if any.
2708 if (Dest == nullptr)
2709 return;
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002710 // Assign the result of the call to Dest. Route it through a temporary so
2711 // that the local register availability peephole can be subsequently used.
2712 Variable *Tmp = nullptr;
David Sehr0c68bef2016-01-20 10:00:23 -08002713 if (isVectorType(DestTy)) {
2714 assert(ReturnReg && "Vector type requires a return register");
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002715 Tmp = makeReg(DestTy);
2716 _movp(Tmp, ReturnReg);
2717 _movp(Dest, Tmp);
David Sehr0c68bef2016-01-20 10:00:23 -08002718 } else if (isScalarFloatingType(DestTy)) {
2719 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2720 assert(ReturnReg && "FP type requires a return register");
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002721 _mov(Tmp, ReturnReg);
2722 _mov(Dest, Tmp);
David Sehr0c68bef2016-01-20 10:00:23 -08002723 }
2724 } else {
2725 assert(isScalarIntegerType(DestTy));
2726 assert(ReturnReg && "Integer type requires a return register");
2727 if (DestTy == IceType_i64 && !Traits::Is64Bit) {
2728 assert(ReturnRegHi && "64-bit type requires two return registers");
2729 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
2730 Variable *DestLo = Dest64On32->getLo();
2731 Variable *DestHi = Dest64On32->getHi();
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002732 _mov(Tmp, ReturnReg);
2733 _mov(DestLo, Tmp);
2734 Variable *TmpHi = nullptr;
2735 _mov(TmpHi, ReturnRegHi);
2736 _mov(DestHi, TmpHi);
David Sehr0c68bef2016-01-20 10:00:23 -08002737 } else {
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002738 _mov(Tmp, ReturnReg);
2739 _mov(Dest, Tmp);
David Sehr0c68bef2016-01-20 10:00:23 -08002740 }
2741 }
2742}
2743
John Porto4a566862016-01-04 09:33:41 -08002744template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002745void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
John Porto7e93c622015-06-23 10:58:57 -07002746 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002747 InstCast::OpKind CastKind = Instr->getCastKind();
2748 Variable *Dest = Instr->getDest();
Jim Stichnothc59288b2015-11-09 11:38:40 -08002749 Type DestTy = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07002750 switch (CastKind) {
2751 default:
2752 Func->setError("Cast type not supported");
2753 return;
2754 case InstCast::Sext: {
2755 // Src0RM is the source operand legalized to physical register or memory,
2756 // but not immediate, since the relevant x86 native instructions don't
Andrew Scull57e12682015-09-16 11:30:19 -07002757 // allow an immediate operand. If the operand is an immediate, we could
2758 // consider computing the strength-reduced result at translation time, but
2759 // we're unlikely to see something like that in the bitcode that the
2760 // optimizer wouldn't have already taken care of.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002761 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002762 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002763 if (DestTy == IceType_v16i8) {
2764 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
Jim Stichnothc59288b2015-11-09 11:38:40 -08002765 Variable *OneMask = makeVectorOfOnes(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002766 Variable *T = makeReg(DestTy);
2767 _movp(T, Src0RM);
2768 _pand(T, OneMask);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002769 Variable *Zeros = makeVectorOfZeros(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002770 _pcmpgt(T, Zeros);
2771 _movp(Dest, T);
2772 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07002773 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07002774 SizeT ShiftAmount =
2775 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2776 1;
2777 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2778 Variable *T = makeReg(DestTy);
2779 _movp(T, Src0RM);
2780 _psll(T, ShiftConstant);
2781 _psra(T, ShiftConstant);
2782 _movp(Dest, T);
2783 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002784 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002785 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2786 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002787 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2788 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07002789 Variable *T_Lo = makeReg(DestLo->getType());
2790 if (Src0RM->getType() == IceType_i32) {
2791 _mov(T_Lo, Src0RM);
2792 } else if (Src0RM->getType() == IceType_i1) {
2793 _movzx(T_Lo, Src0RM);
2794 _shl(T_Lo, Shift);
2795 _sar(T_Lo, Shift);
2796 } else {
2797 _movsx(T_Lo, Src0RM);
2798 }
2799 _mov(DestLo, T_Lo);
2800 Variable *T_Hi = nullptr;
2801 _mov(T_Hi, T_Lo);
2802 if (Src0RM->getType() != IceType_i1)
2803 // For i1, the sar instruction is already done above.
2804 _sar(T_Hi, Shift);
2805 _mov(DestHi, T_Hi);
2806 } else if (Src0RM->getType() == IceType_i1) {
2807 // t1 = src
2808 // shl t1, dst_bitwidth - 1
2809 // sar t1, dst_bitwidth - 1
2810 // dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002811 size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002812 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002813 Variable *T = makeReg(DestTy);
2814 if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
John Porto7e93c622015-06-23 10:58:57 -07002815 _mov(T, Src0RM);
2816 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07002817 // Widen the source using movsx or movzx. (It doesn't matter which one,
2818 // since the following shl/sar overwrite the bits.)
John Porto7e93c622015-06-23 10:58:57 -07002819 _movzx(T, Src0RM);
2820 }
2821 _shl(T, ShiftAmount);
2822 _sar(T, ShiftAmount);
2823 _mov(Dest, T);
2824 } else {
2825 // t1 = movsx src; dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002826 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002827 _movsx(T, Src0RM);
2828 _mov(Dest, T);
2829 }
2830 break;
2831 }
2832 case InstCast::Zext: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002833 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002834 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002835 // onemask = materialize(1,1,...); dest = onemask & src
John Porto7e93c622015-06-23 10:58:57 -07002836 Variable *OneMask = makeVectorOfOnes(DestTy);
2837 Variable *T = makeReg(DestTy);
2838 _movp(T, Src0RM);
2839 _pand(T, OneMask);
2840 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002841 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002842 // t1=movzx src; dst.lo=t1; dst.hi=0
2843 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002844 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2845 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07002846 Variable *Tmp = makeReg(DestLo->getType());
2847 if (Src0RM->getType() == IceType_i32) {
2848 _mov(Tmp, Src0RM);
2849 } else {
2850 _movzx(Tmp, Src0RM);
2851 }
John Porto7e93c622015-06-23 10:58:57 -07002852 _mov(DestLo, Tmp);
2853 _mov(DestHi, Zero);
2854 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth485d0772015-10-09 06:52:19 -07002855 // t = Src0RM; Dest = t
John Porto1d235422015-08-12 12:37:53 -07002856 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07002857 if (DestTy == IceType_i8) {
John Porto7e93c622015-06-23 10:58:57 -07002858 _mov(T, Src0RM);
2859 } else {
John Porto1d235422015-08-12 12:37:53 -07002860 assert(DestTy != IceType_i1);
2861 assert(Traits::Is64Bit || DestTy != IceType_i64);
John Porto7e93c622015-06-23 10:58:57 -07002862 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
John Porto1d235422015-08-12 12:37:53 -07002863 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
2864 // written to the stack (i.e., in -Om1) will be fully zero-extended.
2865 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07002866 _movzx(T, Src0RM);
2867 }
John Porto7e93c622015-06-23 10:58:57 -07002868 _mov(Dest, T);
2869 } else {
2870 // t1 = movzx src; dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002871 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002872 _movzx(T, Src0RM);
2873 _mov(Dest, T);
2874 }
2875 break;
2876 }
2877 case InstCast::Trunc: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002878 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002879 // onemask = materialize(1,1,...); dst = src & onemask
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002880 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002881 Type Src0Ty = Src0RM->getType();
2882 Variable *OneMask = makeVectorOfOnes(Src0Ty);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002883 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002884 _movp(T, Src0RM);
2885 _pand(T, OneMask);
2886 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002887 } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
2888 // Make sure we truncate from and into valid registers.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002889 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
Jim Stichnothc59288b2015-11-09 11:38:40 -08002890 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
2891 Src0 = loOperand(Src0);
2892 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2893 Variable *T = copyToReg8(Src0RM);
2894 if (DestTy == IceType_i1)
2895 _and(T, Ctx->getConstantInt1(1));
2896 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002897 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002898 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
John Porto1d235422015-08-12 12:37:53 -07002899 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -07002900 Src0 = loOperand(Src0);
2901 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2902 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002903 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002904 _mov(T, Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002905 _mov(Dest, T);
2906 }
2907 break;
2908 }
2909 case InstCast::Fptrunc:
2910 case InstCast::Fpext: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002911 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002912 // t1 = cvt Src0RM; Dest = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002913 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002914 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002915 _mov(Dest, T);
2916 break;
2917 }
2918 case InstCast::Fptosi:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002919 if (isVectorType(DestTy)) {
2920 assert(DestTy == IceType_v4i32 &&
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002921 Instr->getSrc(0)->getType() == IceType_v4f32);
2922 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08002923 if (llvm::isa<X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002924 Src0RM = legalizeToReg(Src0RM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002925 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002926 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002927 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002928 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
David Sehr26217e32015-11-26 13:03:50 -08002929 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002930 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002931 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002932 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002933 Variable *T_1 = nullptr;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002934 if (Traits::Is64Bit && DestTy == IceType_i64) {
John Porto1d235422015-08-12 12:37:53 -07002935 T_1 = makeReg(IceType_i64);
2936 } else {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002937 assert(DestTy != IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002938 T_1 = makeReg(IceType_i32);
2939 }
2940 // cvt() requires its integer argument to be a GPR.
Jim Stichnothc59288b2015-11-09 11:38:40 -08002941 Variable *T_2 = makeReg(DestTy);
2942 if (isByteSizedType(DestTy)) {
2943 assert(T_1->getType() == IceType_i32);
2944 T_1->setRegClass(RCX86_Is32To8);
2945 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2946 }
John Porto921856d2015-07-07 11:56:26 -07002947 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002948 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnothc59288b2015-11-09 11:38:40 -08002949 if (DestTy == IceType_i1)
John Porto7e93c622015-06-23 10:58:57 -07002950 _and(T_2, Ctx->getConstantInt1(1));
2951 _mov(Dest, T_2);
2952 }
2953 break;
2954 case InstCast::Fptoui:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002955 if (isVectorType(DestTy)) {
David Sehr26217e32015-11-26 13:03:50 -08002956 llvm::report_fatal_error("Helper call was expected");
Jim Stichnothc59288b2015-11-09 11:38:40 -08002957 } else if (DestTy == IceType_i64 ||
2958 (!Traits::Is64Bit && DestTy == IceType_i32)) {
David Sehr26217e32015-11-26 13:03:50 -08002959 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002960 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002961 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002962 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Jim Stichnothc59288b2015-11-09 11:38:40 -08002963 assert(DestTy != IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002964 Variable *T_1 = nullptr;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002965 if (Traits::Is64Bit && DestTy == IceType_i32) {
John Porto1d235422015-08-12 12:37:53 -07002966 T_1 = makeReg(IceType_i64);
2967 } else {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002968 assert(DestTy != IceType_i32);
John Porto1d235422015-08-12 12:37:53 -07002969 T_1 = makeReg(IceType_i32);
2970 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002971 Variable *T_2 = makeReg(DestTy);
2972 if (isByteSizedType(DestTy)) {
2973 assert(T_1->getType() == IceType_i32);
2974 T_1->setRegClass(RCX86_Is32To8);
2975 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2976 }
John Porto921856d2015-07-07 11:56:26 -07002977 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002978 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnothc59288b2015-11-09 11:38:40 -08002979 if (DestTy == IceType_i1)
John Porto7e93c622015-06-23 10:58:57 -07002980 _and(T_2, Ctx->getConstantInt1(1));
2981 _mov(Dest, T_2);
2982 }
2983 break;
2984 case InstCast::Sitofp:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002985 if (isVectorType(DestTy)) {
2986 assert(DestTy == IceType_v4f32 &&
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002987 Instr->getSrc(0)->getType() == IceType_v4i32);
2988 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08002989 if (llvm::isa<X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002990 Src0RM = legalizeToReg(Src0RM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002991 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002992 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07002993 _movp(Dest, T);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002994 } else if (!Traits::Is64Bit && Instr->getSrc(0)->getType() == IceType_i64) {
David Sehr26217e32015-11-26 13:03:50 -08002995 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002996 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002997 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002998 // Sign-extend the operand.
2999 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07003000 Variable *T_1 = nullptr;
3001 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
3002 T_1 = makeReg(IceType_i64);
3003 } else {
3004 assert(Src0RM->getType() != IceType_i64);
3005 T_1 = makeReg(IceType_i32);
3006 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08003007 Variable *T_2 = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003008 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07003009 _mov(T_1, Src0RM);
3010 else
3011 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07003012 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07003013 _mov(Dest, T_2);
3014 }
3015 break;
3016 case InstCast::Uitofp: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003017 Operand *Src0 = Instr->getSrc(0);
John Porto7e93c622015-06-23 10:58:57 -07003018 if (isVectorType(Src0->getType())) {
David Sehr26217e32015-11-26 13:03:50 -08003019 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003020 } else if (Src0->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07003021 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
David Sehr26217e32015-11-26 13:03:50 -08003022 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003023 } else {
3024 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3025 // Zero-extend the operand.
3026 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07003027 Variable *T_1 = nullptr;
3028 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
3029 T_1 = makeReg(IceType_i64);
3030 } else {
3031 assert(Src0RM->getType() != IceType_i64);
3032 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
3033 T_1 = makeReg(IceType_i32);
3034 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08003035 Variable *T_2 = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003036 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07003037 _mov(T_1, Src0RM);
3038 else
3039 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07003040 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07003041 _mov(Dest, T_2);
3042 }
3043 break;
3044 }
3045 case InstCast::Bitcast: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003046 Operand *Src0 = Instr->getSrc(0);
Jim Stichnothc59288b2015-11-09 11:38:40 -08003047 if (DestTy == Src0->getType()) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003048 auto *Assign = InstAssign::create(Func, Dest, Src0);
John Porto7e93c622015-06-23 10:58:57 -07003049 lowerAssign(Assign);
3050 return;
3051 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08003052 switch (DestTy) {
John Porto7e93c622015-06-23 10:58:57 -07003053 default:
3054 llvm_unreachable("Unexpected Bitcast dest type");
3055 case IceType_i8: {
David Sehr26217e32015-11-26 13:03:50 -08003056 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003057 } break;
3058 case IceType_i16: {
David Sehr26217e32015-11-26 13:03:50 -08003059 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003060 } break;
3061 case IceType_i32:
3062 case IceType_f32: {
Jim Stichnotha1410df2016-06-16 10:02:48 -07003063 Variable *Src0R = legalizeToReg(Src0);
3064 Variable *T = makeReg(DestTy);
3065 _movd(T, Src0R);
3066 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07003067 } break;
3068 case IceType_i64: {
John Porto1d235422015-08-12 12:37:53 -07003069 assert(Src0->getType() == IceType_f64);
3070 if (Traits::Is64Bit) {
John Porto1d235422015-08-12 12:37:53 -07003071 Variable *Src0R = legalizeToReg(Src0);
3072 Variable *T = makeReg(IceType_i64);
3073 _movd(T, Src0R);
3074 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07003075 } else {
John Porto1d235422015-08-12 12:37:53 -07003076 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3077 // a.i64 = bitcast b.f64 ==>
3078 // s.f64 = spill b.f64
3079 // t_lo.i32 = lo(s.f64)
3080 // a_lo.i32 = t_lo.i32
3081 // t_hi.i32 = hi(s.f64)
3082 // a_hi.i32 = t_hi.i32
3083 Operand *SpillLo, *SpillHi;
3084 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
John Porto4a566862016-01-04 09:33:41 -08003085 SpillVariable *SpillVar =
3086 Func->makeVariable<SpillVariable>(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003087 SpillVar->setLinkedTo(Src0Var);
3088 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07003089 Spill->setMustNotHaveReg();
John Porto1d235422015-08-12 12:37:53 -07003090 _movq(Spill, Src0RM);
3091 SpillLo = Traits::VariableSplit::create(Func, Spill,
3092 Traits::VariableSplit::Low);
3093 SpillHi = Traits::VariableSplit::create(Func, Spill,
3094 Traits::VariableSplit::High);
3095 } else {
3096 SpillLo = loOperand(Src0RM);
3097 SpillHi = hiOperand(Src0RM);
3098 }
3099
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003100 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3101 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto1d235422015-08-12 12:37:53 -07003102 Variable *T_Lo = makeReg(IceType_i32);
3103 Variable *T_Hi = makeReg(IceType_i32);
3104
3105 _mov(T_Lo, SpillLo);
3106 _mov(DestLo, T_Lo);
3107 _mov(T_Hi, SpillHi);
3108 _mov(DestHi, T_Hi);
John Porto7e93c622015-06-23 10:58:57 -07003109 }
John Porto7e93c622015-06-23 10:58:57 -07003110 } break;
3111 case IceType_f64: {
John Porto7e93c622015-06-23 10:58:57 -07003112 assert(Src0->getType() == IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07003113 if (Traits::Is64Bit) {
3114 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3115 Variable *T = makeReg(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003116 _movd(T, Src0RM);
3117 _mov(Dest, T);
3118 } else {
3119 Src0 = legalize(Src0);
John Porto4a566862016-01-04 09:33:41 -08003120 if (llvm::isa<X86OperandMem>(Src0)) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08003121 Variable *T = Func->makeVariable(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003122 _movq(T, Src0);
3123 _movq(Dest, T);
3124 break;
3125 }
3126 // a.f64 = bitcast b.i64 ==>
3127 // t_lo.i32 = b_lo.i32
3128 // FakeDef(s.f64)
3129 // lo(s.f64) = t_lo.i32
3130 // t_hi.i32 = b_hi.i32
3131 // hi(s.f64) = t_hi.i32
3132 // a.f64 = s.f64
John Porto4a566862016-01-04 09:33:41 -08003133 SpillVariable *SpillVar =
3134 Func->makeVariable<SpillVariable>(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003135 SpillVar->setLinkedTo(Dest);
3136 Variable *Spill = SpillVar;
Andrew Scull11c9a322015-08-28 14:24:14 -07003137 Spill->setMustNotHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07003138
John Porto1d235422015-08-12 12:37:53 -07003139 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003140 auto *SpillLo = Traits::VariableSplit::create(
John Porto1d235422015-08-12 12:37:53 -07003141 Func, Spill, Traits::VariableSplit::Low);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003142 auto *SpillHi = Traits::VariableSplit::create(
John Porto1d235422015-08-12 12:37:53 -07003143 Func, Spill, Traits::VariableSplit::High);
3144 _mov(T_Lo, loOperand(Src0));
3145 // Technically, the Spill is defined after the _store happens, but
Andrew Scull57e12682015-09-16 11:30:19 -07003146 // SpillLo is considered a "use" of Spill so define Spill before it is
3147 // used.
John Porto1d937a82015-12-17 06:19:34 -08003148 Context.insert<InstFakeDef>(Spill);
John Porto1d235422015-08-12 12:37:53 -07003149 _store(T_Lo, SpillLo);
3150 _mov(T_Hi, hiOperand(Src0));
3151 _store(T_Hi, SpillHi);
3152 _movq(Dest, Spill);
3153 }
John Porto7e93c622015-06-23 10:58:57 -07003154 } break;
3155 case IceType_v8i1: {
David Sehr26217e32015-11-26 13:03:50 -08003156 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003157 } break;
3158 case IceType_v16i1: {
David Sehr26217e32015-11-26 13:03:50 -08003159 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003160 } break;
3161 case IceType_v8i16:
3162 case IceType_v16i8:
3163 case IceType_v4i32:
3164 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07003165 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07003166 } break;
3167 }
3168 break;
3169 }
3170 }
3171}
3172
John Porto4a566862016-01-04 09:33:41 -08003173template <typename TraitsType>
3174void TargetX86Base<TraitsType>::lowerExtractElement(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003175 const InstExtractElement *Instr) {
3176 Operand *SourceVectNotLegalized = Instr->getSrc(0);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003177 auto *ElementIndex = llvm::dyn_cast<ConstantInteger32>(Instr->getSrc(1));
John Porto7e93c622015-06-23 10:58:57 -07003178 // Only constant indices are allowed in PNaCl IR.
3179 assert(ElementIndex);
3180
3181 unsigned Index = ElementIndex->getValue();
3182 Type Ty = SourceVectNotLegalized->getType();
3183 Type ElementTy = typeElementType(Ty);
3184 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
John Porto7e93c622015-06-23 10:58:57 -07003185
3186 // TODO(wala): Determine the best lowering sequences for each type.
3187 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003188 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
3189 Variable *ExtractedElementR =
3190 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
3191 if (CanUsePextr) {
3192 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
3193 // bits of the destination register, so we represent this by always
3194 // extracting into an i32 register. The _mov into Dest below will do
3195 // truncation as necessary.
John Porto7e93c622015-06-23 10:58:57 -07003196 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07003197 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003198 _pextr(ExtractedElementR, SourceVectR, Mask);
3199 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3200 // Use pshufd and movd/movss.
3201 Variable *T = nullptr;
3202 if (Index) {
Andrew Scull57e12682015-09-16 11:30:19 -07003203 // The shuffle only needs to occur if the element to be extracted is not
3204 // at the lowest index.
John Porto7e93c622015-06-23 10:58:57 -07003205 Constant *Mask = Ctx->getConstantInt32(Index);
3206 T = makeReg(Ty);
3207 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
3208 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003209 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003210 }
3211
3212 if (InVectorElementTy == IceType_i32) {
3213 _movd(ExtractedElementR, T);
3214 } else { // Ty == IceType_f32
Andrew Scull57e12682015-09-16 11:30:19 -07003215 // TODO(wala): _movss is only used here because _mov does not allow a
3216 // vector source and a scalar destination. _mov should be able to be
3217 // used here.
3218 // _movss is a binary instruction, so the FakeDef is needed to keep the
3219 // live range analysis consistent.
John Porto1d937a82015-12-17 06:19:34 -08003220 Context.insert<InstFakeDef>(ExtractedElementR);
John Porto7e93c622015-06-23 10:58:57 -07003221 _movss(ExtractedElementR, T);
3222 }
3223 } else {
3224 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
3225 // Spill the value to a stack slot and do the extraction in memory.
3226 //
Andrew Scull57e12682015-09-16 11:30:19 -07003227 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3228 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003229 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003230 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003231 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003232
3233 // Compute the location of the element in memory.
3234 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto4a566862016-01-04 09:33:41 -08003235 X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003236 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3237 _mov(ExtractedElementR, Loc);
3238 }
3239
3240 if (ElementTy == IceType_i1) {
3241 // Truncate extracted integers to i1s if necessary.
3242 Variable *T = makeReg(IceType_i1);
3243 InstCast *Cast =
3244 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
3245 lowerCast(Cast);
3246 ExtractedElementR = T;
3247 }
3248
3249 // Copy the element to the destination.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003250 Variable *Dest = Instr->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003251 _mov(Dest, ExtractedElementR);
3252}
3253
John Porto4a566862016-01-04 09:33:41 -08003254template <typename TraitsType>
3255void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) {
David Sehre3984282015-12-15 17:34:55 -08003256 Variable *Dest = Fcmp->getDest();
3257
3258 if (isVectorType(Dest->getType())) {
3259 lowerFcmpVector(Fcmp);
3260 } else {
3261 constexpr Inst *Consumer = nullptr;
3262 lowerFcmpAndConsumer(Fcmp, Consumer);
3263 }
David Sehrdaf096c2015-11-11 10:56:58 -08003264}
3265
John Porto4a566862016-01-04 09:33:41 -08003266template <typename TraitsType>
3267void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
3268 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003269 Operand *Src0 = Fcmp->getSrc(0);
3270 Operand *Src1 = Fcmp->getSrc(1);
3271 Variable *Dest = Fcmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003272
David Sehre3984282015-12-15 17:34:55 -08003273 if (isVectorType(Dest->getType()))
3274 llvm::report_fatal_error("Vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07003275
David Sehre3984282015-12-15 17:34:55 -08003276 if (Consumer != nullptr) {
3277 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3278 if (lowerOptimizeFcmpSelect(Fcmp, Select))
3279 return;
John Porto7e93c622015-06-23 10:58:57 -07003280 }
John Porto7e93c622015-06-23 10:58:57 -07003281 }
3282
3283 // Lowering a = fcmp cond, b, c
3284 // ucomiss b, c /* only if C1 != Br_None */
3285 // /* but swap b,c order if SwapOperands==true */
3286 // mov a, <default>
3287 // j<C1> label /* only if C1 != Br_None */
3288 // j<C2> label /* only if C2 != Br_None */
3289 // FakeUse(a) /* only if C1 != Br_None */
3290 // mov a, !<default> /* only if C1 != Br_None */
3291 // label: /* only if C1 != Br_None */
3292 //
3293 // setcc lowering when C1 != Br_None && C2 == Br_None:
3294 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
3295 // setcc a, C1
David Sehre3984282015-12-15 17:34:55 -08003296 InstFcmp::FCond Condition = Fcmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003297 assert(Condition < Traits::TableFcmpSize);
3298 if (Traits::TableFcmp[Condition].SwapScalarOperands)
John Porto7e93c622015-06-23 10:58:57 -07003299 std::swap(Src0, Src1);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003300 const bool HasC1 = (Traits::TableFcmp[Condition].C1 != Traits::Cond::Br_None);
3301 const bool HasC2 = (Traits::TableFcmp[Condition].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07003302 if (HasC1) {
3303 Src0 = legalize(Src0);
3304 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3305 Variable *T = nullptr;
3306 _mov(T, Src0);
3307 _ucomiss(T, Src1RM);
3308 if (!HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003309 assert(Traits::TableFcmp[Condition].Default);
3310 setccOrConsumer(Traits::TableFcmp[Condition].C1, Dest, Consumer);
John Porto7e93c622015-06-23 10:58:57 -07003311 return;
3312 }
3313 }
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003314 int32_t IntDefault = Traits::TableFcmp[Condition].Default;
David Sehre3984282015-12-15 17:34:55 -08003315 if (Consumer == nullptr) {
David Sehrdaf096c2015-11-11 10:56:58 -08003316 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
3317 _mov(Dest, Default);
3318 if (HasC1) {
John Porto4a566862016-01-04 09:33:41 -08003319 InstX86Label *Label = InstX86Label::create(Func, this);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003320 _br(Traits::TableFcmp[Condition].C1, Label);
David Sehrdaf096c2015-11-11 10:56:58 -08003321 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003322 _br(Traits::TableFcmp[Condition].C2, Label);
David Sehrdaf096c2015-11-11 10:56:58 -08003323 }
3324 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
David Sehre3984282015-12-15 17:34:55 -08003325 _redefined(_mov(Dest, NonDefault));
David Sehrdaf096c2015-11-11 10:56:58 -08003326 Context.insert(Label);
John Porto7e93c622015-06-23 10:58:57 -07003327 }
David Sehre3984282015-12-15 17:34:55 -08003328 return;
3329 }
3330 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrdaf096c2015-11-11 10:56:58 -08003331 CfgNode *TrueSucc = Br->getTargetTrue();
3332 CfgNode *FalseSucc = Br->getTargetFalse();
3333 if (IntDefault != 0)
3334 std::swap(TrueSucc, FalseSucc);
3335 if (HasC1) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003336 _br(Traits::TableFcmp[Condition].C1, FalseSucc);
David Sehrdaf096c2015-11-11 10:56:58 -08003337 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003338 _br(Traits::TableFcmp[Condition].C2, FalseSucc);
David Sehrdaf096c2015-11-11 10:56:58 -08003339 }
3340 _br(TrueSucc);
3341 return;
3342 }
3343 _br(FalseSucc);
David Sehre3984282015-12-15 17:34:55 -08003344 return;
John Porto7e93c622015-06-23 10:58:57 -07003345 }
David Sehre3984282015-12-15 17:34:55 -08003346 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3347 Operand *SrcT = Select->getTrueOperand();
3348 Operand *SrcF = Select->getFalseOperand();
3349 Variable *SelectDest = Select->getDest();
3350 if (IntDefault != 0)
3351 std::swap(SrcT, SrcF);
3352 lowerMove(SelectDest, SrcF, false);
3353 if (HasC1) {
John Porto4a566862016-01-04 09:33:41 -08003354 InstX86Label *Label = InstX86Label::create(Func, this);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003355 _br(Traits::TableFcmp[Condition].C1, Label);
David Sehre3984282015-12-15 17:34:55 -08003356 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003357 _br(Traits::TableFcmp[Condition].C2, Label);
David Sehre3984282015-12-15 17:34:55 -08003358 }
3359 static constexpr bool IsRedefinition = true;
3360 lowerMove(SelectDest, SrcT, IsRedefinition);
3361 Context.insert(Label);
3362 }
3363 return;
3364 }
3365 llvm::report_fatal_error("Unexpected consumer type");
3366}
3367
John Porto4a566862016-01-04 09:33:41 -08003368template <typename TraitsType>
3369void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) {
David Sehre3984282015-12-15 17:34:55 -08003370 Operand *Src0 = Fcmp->getSrc(0);
3371 Operand *Src1 = Fcmp->getSrc(1);
3372 Variable *Dest = Fcmp->getDest();
3373
3374 if (!isVectorType(Dest->getType()))
3375 llvm::report_fatal_error("Expected vector compare");
3376
3377 InstFcmp::FCond Condition = Fcmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003378 assert(Condition < Traits::TableFcmpSize);
David Sehre3984282015-12-15 17:34:55 -08003379
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003380 if (Traits::TableFcmp[Condition].SwapVectorOperands)
David Sehre3984282015-12-15 17:34:55 -08003381 std::swap(Src0, Src1);
3382
3383 Variable *T = nullptr;
3384
3385 if (Condition == InstFcmp::True) {
3386 // makeVectorOfOnes() requires an integer vector type.
3387 T = makeVectorOfMinusOnes(IceType_v4i32);
3388 } else if (Condition == InstFcmp::False) {
3389 T = makeVectorOfZeros(Dest->getType());
3390 } else {
3391 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3392 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08003393 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003394 Src1RM = legalizeToReg(Src1RM);
3395
3396 switch (Condition) {
3397 default: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003398 const CmppsCond Predicate = Traits::TableFcmp[Condition].Predicate;
David Sehre3984282015-12-15 17:34:55 -08003399 assert(Predicate != Traits::Cond::Cmpps_Invalid);
3400 T = makeReg(Src0RM->getType());
3401 _movp(T, Src0RM);
3402 _cmpps(T, Src1RM, Predicate);
3403 } break;
3404 case InstFcmp::One: {
3405 // Check both unequal and ordered.
3406 T = makeReg(Src0RM->getType());
3407 Variable *T2 = makeReg(Src0RM->getType());
3408 _movp(T, Src0RM);
3409 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
3410 _movp(T2, Src0RM);
3411 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
3412 _pand(T, T2);
3413 } break;
3414 case InstFcmp::Ueq: {
3415 // Check both equal or unordered.
3416 T = makeReg(Src0RM->getType());
3417 Variable *T2 = makeReg(Src0RM->getType());
3418 _movp(T, Src0RM);
3419 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
3420 _movp(T2, Src0RM);
3421 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
3422 _por(T, T2);
3423 } break;
3424 }
3425 }
3426
3427 assert(T != nullptr);
3428 _movp(Dest, T);
3429 eliminateNextVectorSextInstruction(Dest);
John Porto7e93c622015-06-23 10:58:57 -07003430}
3431
David Sehr5c875422015-10-15 10:38:53 -07003432inline bool isZero(const Operand *Opnd) {
3433 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
3434 return C64->getValue() == 0;
3435 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
3436 return C32->getValue() == 0;
3437 return false;
3438}
3439
John Porto4a566862016-01-04 09:33:41 -08003440template <typename TraitsType>
3441void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
3442 const Inst *Consumer) {
David Sehrd9810252015-10-16 13:23:17 -07003443 Operand *Src0 = legalize(Icmp->getSrc(0));
3444 Operand *Src1 = legalize(Icmp->getSrc(1));
3445 Variable *Dest = Icmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003446
David Sehre3984282015-12-15 17:34:55 -08003447 if (isVectorType(Dest->getType()))
3448 llvm::report_fatal_error("Vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07003449
John Porto1d235422015-08-12 12:37:53 -07003450 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
David Sehre3984282015-12-15 17:34:55 -08003451 lowerIcmp64(Icmp, Consumer);
John Porto7e93c622015-06-23 10:58:57 -07003452 return;
3453 }
3454
3455 // cmp b, c
David Sehr5c875422015-10-15 10:38:53 -07003456 if (isZero(Src1)) {
David Sehrd9810252015-10-16 13:23:17 -07003457 switch (Icmp->getCondition()) {
David Sehr5c875422015-10-15 10:38:53 -07003458 default:
3459 break;
3460 case InstIcmp::Uge:
David Sehre3984282015-12-15 17:34:55 -08003461 movOrConsumer(true, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003462 return;
3463 case InstIcmp::Ult:
David Sehre3984282015-12-15 17:34:55 -08003464 movOrConsumer(false, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003465 return;
3466 }
3467 }
John Porto7e93c622015-06-23 10:58:57 -07003468 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
3469 _cmp(Src0RM, Src1);
David Sehre3984282015-12-15 17:34:55 -08003470 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
3471 Consumer);
3472}
3473
John Porto4a566862016-01-04 09:33:41 -08003474template <typename TraitsType>
3475void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) {
David Sehre3984282015-12-15 17:34:55 -08003476 Operand *Src0 = legalize(Icmp->getSrc(0));
3477 Operand *Src1 = legalize(Icmp->getSrc(1));
3478 Variable *Dest = Icmp->getDest();
3479
3480 if (!isVectorType(Dest->getType()))
3481 llvm::report_fatal_error("Expected a vector compare");
3482
3483 Type Ty = Src0->getType();
3484 // Promote i1 vectors to 128 bit integer vector types.
3485 if (typeElementType(Ty) == IceType_i1) {
3486 Type NewTy = IceType_NUM;
3487 switch (Ty) {
3488 default:
3489 llvm::report_fatal_error("unexpected type");
3490 break;
3491 case IceType_v4i1:
3492 NewTy = IceType_v4i32;
3493 break;
3494 case IceType_v8i1:
3495 NewTy = IceType_v8i16;
3496 break;
3497 case IceType_v16i1:
3498 NewTy = IceType_v16i8;
3499 break;
3500 }
3501 Variable *NewSrc0 = Func->makeVariable(NewTy);
3502 Variable *NewSrc1 = Func->makeVariable(NewTy);
3503 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
3504 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
3505 Src0 = NewSrc0;
3506 Src1 = NewSrc1;
3507 Ty = NewTy;
3508 }
3509
3510 InstIcmp::ICond Condition = Icmp->getCondition();
3511
3512 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3513 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3514
3515 // SSE2 only has signed comparison operations. Transform unsigned inputs in
3516 // a manner that allows for the use of signed comparison operations by
3517 // flipping the high order bits.
3518 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
3519 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
3520 Variable *T0 = makeReg(Ty);
3521 Variable *T1 = makeReg(Ty);
3522 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
3523 _movp(T0, Src0RM);
3524 _pxor(T0, HighOrderBits);
3525 _movp(T1, Src1RM);
3526 _pxor(T1, HighOrderBits);
3527 Src0RM = T0;
3528 Src1RM = T1;
3529 }
3530
3531 Variable *T = makeReg(Ty);
3532 switch (Condition) {
3533 default:
3534 llvm_unreachable("unexpected condition");
3535 break;
3536 case InstIcmp::Eq: {
John Porto4a566862016-01-04 09:33:41 -08003537 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003538 Src1RM = legalizeToReg(Src1RM);
3539 _movp(T, Src0RM);
3540 _pcmpeq(T, Src1RM);
3541 } break;
3542 case InstIcmp::Ne: {
John Porto4a566862016-01-04 09:33:41 -08003543 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003544 Src1RM = legalizeToReg(Src1RM);
3545 _movp(T, Src0RM);
3546 _pcmpeq(T, Src1RM);
3547 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3548 _pxor(T, MinusOne);
3549 } break;
3550 case InstIcmp::Ugt:
3551 case InstIcmp::Sgt: {
John Porto4a566862016-01-04 09:33:41 -08003552 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003553 Src1RM = legalizeToReg(Src1RM);
3554 _movp(T, Src0RM);
3555 _pcmpgt(T, Src1RM);
3556 } break;
3557 case InstIcmp::Uge:
3558 case InstIcmp::Sge: {
3559 // !(Src1RM > Src0RM)
John Porto4a566862016-01-04 09:33:41 -08003560 if (llvm::isa<X86OperandMem>(Src0RM))
David Sehre3984282015-12-15 17:34:55 -08003561 Src0RM = legalizeToReg(Src0RM);
3562 _movp(T, Src1RM);
3563 _pcmpgt(T, Src0RM);
3564 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3565 _pxor(T, MinusOne);
3566 } break;
3567 case InstIcmp::Ult:
3568 case InstIcmp::Slt: {
John Porto4a566862016-01-04 09:33:41 -08003569 if (llvm::isa<X86OperandMem>(Src0RM))
David Sehre3984282015-12-15 17:34:55 -08003570 Src0RM = legalizeToReg(Src0RM);
3571 _movp(T, Src1RM);
3572 _pcmpgt(T, Src0RM);
3573 } break;
3574 case InstIcmp::Ule:
3575 case InstIcmp::Sle: {
3576 // !(Src0RM > Src1RM)
John Porto4a566862016-01-04 09:33:41 -08003577 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003578 Src1RM = legalizeToReg(Src1RM);
3579 _movp(T, Src0RM);
3580 _pcmpgt(T, Src1RM);
3581 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3582 _pxor(T, MinusOne);
3583 } break;
3584 }
3585
3586 _movp(Dest, T);
3587 eliminateNextVectorSextInstruction(Dest);
John Porto7e93c622015-06-23 10:58:57 -07003588}
3589
John Porto4a566862016-01-04 09:33:41 -08003590template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07003591template <typename T>
3592typename std::enable_if<!T::Is64Bit, void>::type
John Porto4a566862016-01-04 09:33:41 -08003593TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp,
3594 const Inst *Consumer) {
John Porto1d235422015-08-12 12:37:53 -07003595 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
David Sehrd9810252015-10-16 13:23:17 -07003596 Operand *Src0 = legalize(Icmp->getSrc(0));
3597 Operand *Src1 = legalize(Icmp->getSrc(1));
3598 Variable *Dest = Icmp->getDest();
3599 InstIcmp::ICond Condition = Icmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003600 assert(Condition < Traits::TableIcmp64Size);
David Sehr5c875422015-10-15 10:38:53 -07003601 Operand *Src0LoRM = nullptr;
3602 Operand *Src0HiRM = nullptr;
3603 // Legalize the portions of Src0 that are going to be needed.
3604 if (isZero(Src1)) {
3605 switch (Condition) {
3606 default:
3607 llvm_unreachable("unexpected condition");
3608 break;
3609 // These two are not optimized, so we fall through to the general case,
3610 // which needs the upper and lower halves legalized.
3611 case InstIcmp::Sgt:
3612 case InstIcmp::Sle:
Jim Stichnoth1fb030c2015-10-15 11:10:38 -07003613 // These four compare after performing an "or" of the high and low half, so
3614 // they need the upper and lower halves legalized.
David Sehr5c875422015-10-15 10:38:53 -07003615 case InstIcmp::Eq:
3616 case InstIcmp::Ule:
3617 case InstIcmp::Ne:
3618 case InstIcmp::Ugt:
3619 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3620 // These two test only the high half's sign bit, so they need only
3621 // the upper half legalized.
3622 case InstIcmp::Sge:
3623 case InstIcmp::Slt:
3624 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3625 break;
3626
3627 // These two move constants and hence need no legalization.
3628 case InstIcmp::Uge:
3629 case InstIcmp::Ult:
3630 break;
3631 }
3632 } else {
3633 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3634 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3635 }
3636 // Optimize comparisons with zero.
3637 if (isZero(Src1)) {
3638 Constant *SignMask = Ctx->getConstantInt32(0x80000000);
3639 Variable *Temp = nullptr;
3640 switch (Condition) {
3641 default:
3642 llvm_unreachable("unexpected condition");
3643 break;
3644 case InstIcmp::Eq:
3645 case InstIcmp::Ule:
David Sehraa0b1a12015-10-27 16:55:40 -07003646 // Mov Src0HiRM first, because it was legalized most recently, and will
3647 // sometimes avoid a move before the OR.
3648 _mov(Temp, Src0HiRM);
3649 _or(Temp, Src0LoRM);
John Porto1d937a82015-12-17 06:19:34 -08003650 Context.insert<InstFakeUse>(Temp);
David Sehre3984282015-12-15 17:34:55 -08003651 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003652 return;
3653 case InstIcmp::Ne:
3654 case InstIcmp::Ugt:
David Sehraa0b1a12015-10-27 16:55:40 -07003655 // Mov Src0HiRM first, because it was legalized most recently, and will
3656 // sometimes avoid a move before the OR.
3657 _mov(Temp, Src0HiRM);
3658 _or(Temp, Src0LoRM);
John Porto1d937a82015-12-17 06:19:34 -08003659 Context.insert<InstFakeUse>(Temp);
David Sehre3984282015-12-15 17:34:55 -08003660 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003661 return;
3662 case InstIcmp::Uge:
David Sehre3984282015-12-15 17:34:55 -08003663 movOrConsumer(true, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003664 return;
3665 case InstIcmp::Ult:
David Sehre3984282015-12-15 17:34:55 -08003666 movOrConsumer(false, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003667 return;
3668 case InstIcmp::Sgt:
3669 break;
3670 case InstIcmp::Sge:
3671 _test(Src0HiRM, SignMask);
David Sehre3984282015-12-15 17:34:55 -08003672 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003673 return;
3674 case InstIcmp::Slt:
3675 _test(Src0HiRM, SignMask);
David Sehre3984282015-12-15 17:34:55 -08003676 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003677 return;
3678 case InstIcmp::Sle:
3679 break;
3680 }
3681 }
3682 // Handle general compares.
3683 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3684 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
David Sehre3984282015-12-15 17:34:55 -08003685 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003686 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
3687 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
John Porto4a566862016-01-04 09:33:41 -08003688 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
3689 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
David Sehrd9810252015-10-16 13:23:17 -07003690 _mov(Dest, One);
3691 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003692 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3693 _br(Traits::TableIcmp64[Condition].C1, LabelTrue);
3694 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3695 _br(Traits::TableIcmp64[Condition].C2, LabelFalse);
David Sehrd9810252015-10-16 13:23:17 -07003696 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003697 _br(Traits::TableIcmp64[Condition].C3, LabelTrue);
David Sehrd9810252015-10-16 13:23:17 -07003698 Context.insert(LabelFalse);
David Sehre3984282015-12-15 17:34:55 -08003699 _redefined(_mov(Dest, Zero));
David Sehrd9810252015-10-16 13:23:17 -07003700 Context.insert(LabelTrue);
David Sehre3984282015-12-15 17:34:55 -08003701 return;
3702 }
3703 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrd9810252015-10-16 13:23:17 -07003704 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003705 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3706 _br(Traits::TableIcmp64[Condition].C1, Br->getTargetTrue());
3707 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3708 _br(Traits::TableIcmp64[Condition].C2, Br->getTargetFalse());
David Sehrd9810252015-10-16 13:23:17 -07003709 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003710 _br(Traits::TableIcmp64[Condition].C3, Br->getTargetTrue(),
David Sehrd9810252015-10-16 13:23:17 -07003711 Br->getTargetFalse());
David Sehre3984282015-12-15 17:34:55 -08003712 return;
David Sehrd9810252015-10-16 13:23:17 -07003713 }
David Sehre3984282015-12-15 17:34:55 -08003714 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3715 Operand *SrcT = Select->getTrueOperand();
3716 Operand *SrcF = Select->getFalseOperand();
3717 Variable *SelectDest = Select->getDest();
John Porto4a566862016-01-04 09:33:41 -08003718 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
3719 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
David Sehre3984282015-12-15 17:34:55 -08003720 lowerMove(SelectDest, SrcT, false);
3721 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003722 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3723 _br(Traits::TableIcmp64[Condition].C1, LabelTrue);
3724 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3725 _br(Traits::TableIcmp64[Condition].C2, LabelFalse);
David Sehre3984282015-12-15 17:34:55 -08003726 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003727 _br(Traits::TableIcmp64[Condition].C3, LabelTrue);
David Sehre3984282015-12-15 17:34:55 -08003728 Context.insert(LabelFalse);
3729 static constexpr bool IsRedefinition = true;
3730 lowerMove(SelectDest, SrcF, IsRedefinition);
3731 Context.insert(LabelTrue);
3732 return;
3733 }
3734 llvm::report_fatal_error("Unexpected consumer type");
David Sehrd9810252015-10-16 13:23:17 -07003735}
3736
John Porto4a566862016-01-04 09:33:41 -08003737template <typename TraitsType>
3738void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition,
3739 Variable *Dest,
3740 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003741 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003742 _setcc(Dest, Condition);
David Sehre3984282015-12-15 17:34:55 -08003743 return;
David Sehrd9810252015-10-16 13:23:17 -07003744 }
David Sehre3984282015-12-15 17:34:55 -08003745 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3746 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
3747 return;
3748 }
3749 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3750 Operand *SrcT = Select->getTrueOperand();
3751 Operand *SrcF = Select->getFalseOperand();
3752 Variable *SelectDest = Select->getDest();
3753 lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
3754 return;
3755 }
3756 llvm::report_fatal_error("Unexpected consumer type");
David Sehrd9810252015-10-16 13:23:17 -07003757}
3758
John Porto4a566862016-01-04 09:33:41 -08003759template <typename TraitsType>
3760void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest,
3761 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003762 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003763 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
David Sehre3984282015-12-15 17:34:55 -08003764 return;
3765 }
3766 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrd9810252015-10-16 13:23:17 -07003767 // TODO(sehr,stichnot): This could be done with a single unconditional
3768 // branch instruction, but subzero doesn't know how to handle the resulting
3769 // control flow graph changes now. Make it do so to eliminate mov and cmp.
3770 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
3771 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
3772 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
David Sehre3984282015-12-15 17:34:55 -08003773 return;
David Sehrd9810252015-10-16 13:23:17 -07003774 }
David Sehre3984282015-12-15 17:34:55 -08003775 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3776 Operand *Src = nullptr;
3777 if (IcmpResult) {
3778 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
3779 } else {
3780 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
3781 }
3782 Variable *SelectDest = Select->getDest();
3783 lowerMove(SelectDest, Src, false);
3784 return;
3785 }
3786 llvm::report_fatal_error("Unexpected consumer type");
John Porto1d235422015-08-12 12:37:53 -07003787}
3788
John Porto4a566862016-01-04 09:33:41 -08003789template <typename TraitsType>
3790void TargetX86Base<TraitsType>::lowerArithAndConsumer(
3791 const InstArithmetic *Arith, const Inst *Consumer) {
David Sehrdaf096c2015-11-11 10:56:58 -08003792 Variable *T = nullptr;
3793 Operand *Src0 = legalize(Arith->getSrc(0));
3794 Operand *Src1 = legalize(Arith->getSrc(1));
3795 Variable *Dest = Arith->getDest();
3796 switch (Arith->getOp()) {
3797 default:
3798 llvm_unreachable("arithmetic operator not AND or OR");
3799 break;
3800 case InstArithmetic::And:
3801 _mov(T, Src0);
3802 // Test cannot have an address in the second position. Since T is
3803 // guaranteed to be a register and Src1 could be a memory load, ensure
3804 // that the second argument is a register.
3805 if (llvm::isa<Constant>(Src1))
3806 _test(T, Src1);
3807 else
3808 _test(Src1, T);
3809 break;
3810 case InstArithmetic::Or:
3811 _mov(T, Src0);
3812 _or(T, Src1);
3813 break;
3814 }
David Sehre3984282015-12-15 17:34:55 -08003815
3816 if (Consumer == nullptr) {
3817 llvm::report_fatal_error("Expected a consumer instruction");
3818 }
3819 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
John Porto1d937a82015-12-17 06:19:34 -08003820 Context.insert<InstFakeUse>(T);
3821 Context.insert<InstFakeDef>(Dest);
David Sehre3984282015-12-15 17:34:55 -08003822 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3823 return;
3824 }
3825 llvm::report_fatal_error("Unexpected consumer type");
David Sehrdaf096c2015-11-11 10:56:58 -08003826}
3827
John Porto4a566862016-01-04 09:33:41 -08003828template <typename TraitsType>
3829void TargetX86Base<TraitsType>::lowerInsertElement(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003830 const InstInsertElement *Instr) {
3831 Operand *SourceVectNotLegalized = Instr->getSrc(0);
3832 Operand *ElementToInsertNotLegalized = Instr->getSrc(1);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003833 auto *ElementIndex = llvm::dyn_cast<ConstantInteger32>(Instr->getSrc(2));
John Porto7e93c622015-06-23 10:58:57 -07003834 // Only constant indices are allowed in PNaCl IR.
3835 assert(ElementIndex);
3836 unsigned Index = ElementIndex->getValue();
3837 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3838
3839 Type Ty = SourceVectNotLegalized->getType();
3840 Type ElementTy = typeElementType(Ty);
3841 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3842
3843 if (ElementTy == IceType_i1) {
Andrew Scull57e12682015-09-16 11:30:19 -07003844 // Expand the element to the appropriate size for it to be inserted in the
3845 // vector.
John Porto5aeed952015-07-21 13:39:09 -07003846 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003847 auto *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3848 ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003849 lowerCast(Cast);
3850 ElementToInsertNotLegalized = Expanded;
3851 }
3852
3853 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07003854 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07003855 // Use insertps, pinsrb, pinsrw, or pinsrd.
3856 Operand *ElementRM =
3857 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3858 Operand *SourceVectRM =
3859 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3860 Variable *T = makeReg(Ty);
3861 _movp(T, SourceVectRM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08003862 if (Ty == IceType_v4f32) {
John Porto7e93c622015-06-23 10:58:57 -07003863 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Jim Stichnothc59288b2015-11-09 11:38:40 -08003864 } else {
3865 // For the pinsrb and pinsrw instructions, when the source operand is a
3866 // register, it must be a full r32 register like eax, and not ax/al/ah.
John Porto4a566862016-01-04 09:33:41 -08003867 // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for
3868 // the use
Jim Stichnothc59288b2015-11-09 11:38:40 -08003869 // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
3870 // validates that the original and base register encodings are the same.
3871 if (ElementRM->getType() == IceType_i8 &&
3872 llvm::isa<Variable>(ElementRM)) {
3873 // Don't use ah/bh/ch/dh for pinsrb.
3874 ElementRM = copyToReg8(ElementRM);
3875 }
John Porto7e93c622015-06-23 10:58:57 -07003876 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Jim Stichnothc59288b2015-11-09 11:38:40 -08003877 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003878 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003879 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3880 // Use shufps or movss.
3881 Variable *ElementR = nullptr;
3882 Operand *SourceVectRM =
3883 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3884
3885 if (InVectorElementTy == IceType_f32) {
3886 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07003887 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003888 } else {
3889 // Copy an integer to an XMM register.
3890 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3891 ElementR = makeReg(Ty);
3892 _movd(ElementR, T);
3893 }
3894
3895 if (Index == 0) {
3896 Variable *T = makeReg(Ty);
3897 _movp(T, SourceVectRM);
3898 _movss(T, ElementR);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003899 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003900 return;
3901 }
3902
Andrew Scull57e12682015-09-16 11:30:19 -07003903 // shufps treats the source and destination operands as vectors of four
3904 // doublewords. The destination's two high doublewords are selected from
3905 // the source operand and the two low doublewords are selected from the
3906 // (original value of) the destination operand. An insertelement operation
3907 // can be effected with a sequence of two shufps operations with
3908 // appropriate masks. In all cases below, Element[0] is being inserted into
3909 // SourceVectOperand. Indices are ordered from left to right.
John Porto7e93c622015-06-23 10:58:57 -07003910 //
3911 // insertelement into index 1 (result is stored in ElementR):
3912 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3913 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
3914 //
3915 // insertelement into index 2 (result is stored in T):
3916 // T := SourceVectRM
3917 // ElementR := ElementR[0, 0] T[0, 3]
3918 // T := T[0, 1] ElementR[0, 3]
3919 //
3920 // insertelement into index 3 (result is stored in T):
3921 // T := SourceVectRM
3922 // ElementR := ElementR[0, 0] T[0, 2]
3923 // T := T[0, 1] ElementR[3, 0]
3924 const unsigned char Mask1[3] = {0, 192, 128};
3925 const unsigned char Mask2[3] = {227, 196, 52};
3926
3927 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3928 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
3929
3930 if (Index == 1) {
3931 _shufps(ElementR, SourceVectRM, Mask1Constant);
3932 _shufps(ElementR, SourceVectRM, Mask2Constant);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003933 _movp(Instr->getDest(), ElementR);
John Porto7e93c622015-06-23 10:58:57 -07003934 } else {
3935 Variable *T = makeReg(Ty);
3936 _movp(T, SourceVectRM);
3937 _shufps(ElementR, T, Mask1Constant);
3938 _shufps(T, ElementR, Mask2Constant);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003939 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003940 }
3941 } else {
3942 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
Andrew Scull57e12682015-09-16 11:30:19 -07003943 // Spill the value to a stack slot and perform the insertion in memory.
John Porto7e93c622015-06-23 10:58:57 -07003944 //
Andrew Scull57e12682015-09-16 11:30:19 -07003945 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3946 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003947 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003948 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003949 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003950
3951 // Compute the location of the position to insert in memory.
3952 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto4a566862016-01-04 09:33:41 -08003953 X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003954 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07003955 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07003956
3957 Variable *T = makeReg(Ty);
3958 _movp(T, Slot);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003959 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003960 }
3961}
3962
John Porto4a566862016-01-04 09:33:41 -08003963template <typename TraitsType>
3964void TargetX86Base<TraitsType>::lowerIntrinsicCall(
John Porto7e93c622015-06-23 10:58:57 -07003965 const InstIntrinsicCall *Instr) {
3966 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3967 case Intrinsics::AtomicCmpxchg: {
3968 if (!Intrinsics::isMemoryOrderValid(
3969 ID, getConstantMemoryOrder(Instr->getArg(3)),
3970 getConstantMemoryOrder(Instr->getArg(4)))) {
3971 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3972 return;
3973 }
3974 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07003975 Operand *PtrToMem = legalize(Instr->getArg(0));
3976 Operand *Expected = legalize(Instr->getArg(1));
3977 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003978 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3979 return;
3980 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
3981 return;
3982 }
3983 case Intrinsics::AtomicFence:
3984 if (!Intrinsics::isMemoryOrderValid(
3985 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
3986 Func->setError("Unexpected memory ordering for AtomicFence");
3987 return;
3988 }
3989 _mfence();
3990 return;
3991 case Intrinsics::AtomicFenceAll:
Andrew Scull57e12682015-09-16 11:30:19 -07003992 // NOTE: FenceAll should prevent and load/store from being moved across the
3993 // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
3994 // currently marked coarsely as "HasSideEffects".
John Porto7e93c622015-06-23 10:58:57 -07003995 _mfence();
3996 return;
3997 case Intrinsics::AtomicIsLockFree: {
3998 // X86 is always lock free for 8/16/32/64 bit accesses.
Andrew Scull57e12682015-09-16 11:30:19 -07003999 // TODO(jvoung): Since the result is constant when given a constant byte
4000 // size, this opens up DCE opportunities.
John Porto7e93c622015-06-23 10:58:57 -07004001 Operand *ByteSize = Instr->getArg(0);
4002 Variable *Dest = Instr->getDest();
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004003 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
John Porto7e93c622015-06-23 10:58:57 -07004004 Constant *Result;
4005 switch (CI->getValue()) {
4006 default:
Andrew Scull57e12682015-09-16 11:30:19 -07004007 // Some x86-64 processors support the cmpxchg16b instruction, which can
4008 // make 16-byte operations lock free (when used with the LOCK prefix).
4009 // However, that's not supported in 32-bit mode, so just return 0 even
4010 // for large sizes.
John Porto7e93c622015-06-23 10:58:57 -07004011 Result = Ctx->getConstantZero(IceType_i32);
4012 break;
4013 case 1:
4014 case 2:
4015 case 4:
4016 case 8:
4017 Result = Ctx->getConstantInt32(1);
4018 break;
4019 }
4020 _mov(Dest, Result);
4021 return;
4022 }
4023 // The PNaCl ABI requires the byte size to be a compile-time constant.
4024 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4025 return;
4026 }
4027 case Intrinsics::AtomicLoad: {
Andrew Scull57e12682015-09-16 11:30:19 -07004028 // We require the memory address to be naturally aligned. Given that is the
4029 // case, then normal loads are atomic.
John Porto7e93c622015-06-23 10:58:57 -07004030 if (!Intrinsics::isMemoryOrderValid(
4031 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4032 Func->setError("Unexpected memory ordering for AtomicLoad");
4033 return;
4034 }
4035 Variable *Dest = Instr->getDest();
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004036 if (!Traits::Is64Bit) {
4037 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) {
4038 // Follow what GCC does and use a movq instead of what lowerLoad()
4039 // normally does (split the load into two). Thus, this skips
4040 // load/arithmetic op folding. Load/arithmetic folding can't happen
4041 // anyway, since this is x86-32 and integer arithmetic only happens on
4042 // 32-bit quantities.
4043 Variable *T = makeReg(IceType_f64);
John Porto4a566862016-01-04 09:33:41 -08004044 X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004045 _movq(T, Addr);
4046 // Then cast the bits back out of the XMM register to the i64 Dest.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004047 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004048 lowerCast(Cast);
4049 // Make sure that the atomic load isn't elided when unused.
John Porto1d937a82015-12-17 06:19:34 -08004050 Context.insert<InstFakeUse>(Dest64On32->getLo());
4051 Context.insert<InstFakeUse>(Dest64On32->getHi());
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004052 return;
4053 }
John Porto7e93c622015-06-23 10:58:57 -07004054 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004055 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
John Porto7e93c622015-06-23 10:58:57 -07004056 lowerLoad(Load);
4057 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
Andrew Scull57e12682015-09-16 11:30:19 -07004058 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
4059 // the FakeUse on the last-inserted instruction's dest.
John Porto1d937a82015-12-17 06:19:34 -08004060 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
John Porto7e93c622015-06-23 10:58:57 -07004061 return;
4062 }
4063 case Intrinsics::AtomicRMW:
4064 if (!Intrinsics::isMemoryOrderValid(
4065 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4066 Func->setError("Unexpected memory ordering for AtomicRMW");
4067 return;
4068 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004069 lowerAtomicRMW(
4070 Instr->getDest(),
4071 static_cast<uint32_t>(
4072 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
4073 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004074 return;
4075 case Intrinsics::AtomicStore: {
4076 if (!Intrinsics::isMemoryOrderValid(
4077 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4078 Func->setError("Unexpected memory ordering for AtomicStore");
4079 return;
4080 }
Andrew Scull57e12682015-09-16 11:30:19 -07004081 // We require the memory address to be naturally aligned. Given that is the
4082 // case, then normal stores are atomic. Add a fence after the store to make
4083 // it visible.
John Porto7e93c622015-06-23 10:58:57 -07004084 Operand *Value = Instr->getArg(0);
4085 Operand *Ptr = Instr->getArg(1);
John Porto1d235422015-08-12 12:37:53 -07004086 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
Andrew Scull57e12682015-09-16 11:30:19 -07004087 // Use a movq instead of what lowerStore() normally does (split the store
4088 // into two), following what GCC does. Cast the bits from int -> to an
4089 // xmm register first.
John Porto7e93c622015-06-23 10:58:57 -07004090 Variable *T = makeReg(IceType_f64);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004091 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
John Porto7e93c622015-06-23 10:58:57 -07004092 lowerCast(Cast);
4093 // Then store XMM w/ a movq.
John Porto4a566862016-01-04 09:33:41 -08004094 X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07004095 _storeq(T, Addr);
4096 _mfence();
4097 return;
4098 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004099 auto *Store = InstStore::create(Func, Value, Ptr);
John Porto7e93c622015-06-23 10:58:57 -07004100 lowerStore(Store);
4101 _mfence();
4102 return;
4103 }
4104 case Intrinsics::Bswap: {
4105 Variable *Dest = Instr->getDest();
4106 Operand *Val = Instr->getArg(0);
Andrew Scull57e12682015-09-16 11:30:19 -07004107 // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
4108 // must be a register. Use rotate left for 16-bit bswap.
John Porto1d235422015-08-12 12:37:53 -07004109 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004110 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07004111 Variable *T_Lo = legalizeToReg(loOperand(Val));
4112 Variable *T_Hi = legalizeToReg(hiOperand(Val));
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004113 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4114 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004115 _bswap(T_Lo);
4116 _bswap(T_Hi);
4117 _mov(DestLo, T_Hi);
4118 _mov(DestHi, T_Lo);
John Porto1d235422015-08-12 12:37:53 -07004119 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
4120 Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004121 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07004122 _bswap(T);
4123 _mov(Dest, T);
4124 } else {
4125 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07004126 Constant *Eight = Ctx->getConstantInt16(8);
4127 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07004128 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07004129 _mov(T, Val);
4130 _rol(T, Eight);
4131 _mov(Dest, T);
4132 }
4133 return;
4134 }
4135 case Intrinsics::Ctpop: {
4136 Variable *Dest = Instr->getDest();
John Porto1d235422015-08-12 12:37:53 -07004137 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07004138 Operand *Val = Instr->getArg(0);
John Porto1d235422015-08-12 12:37:53 -07004139 Type ValTy = Val->getType();
4140 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
4141
4142 if (!Traits::Is64Bit) {
4143 T = Dest;
4144 } else {
4145 T = makeReg(IceType_i64);
4146 if (ValTy == IceType_i32) {
4147 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
4148 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
4149 // ensure we will not have any bits set on Val's upper 32 bits.
4150 Variable *V = makeReg(IceType_i64);
4151 _movzx(V, Val);
4152 Val = V;
4153 }
4154 ValTy = IceType_i64;
4155 }
4156
Karl Schimpf20070e82016-03-17 13:30:13 -07004157 InstCall *Call =
4158 makeHelperCall(ValTy == IceType_i32 ? RuntimeHelper::H_call_ctpop_i32
4159 : RuntimeHelper::H_call_ctpop_i64,
4160 T, 1);
John Porto7e93c622015-06-23 10:58:57 -07004161 Call->addArg(Val);
4162 lowerCall(Call);
4163 // The popcount helpers always return 32-bit values, while the intrinsic's
4164 // signature matches the native POPCNT instruction and fills a 64-bit reg
4165 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
4166 // the user doesn't do that in the IR. If the user does that in the IR,
4167 // then this zero'ing instruction is dead and gets optimized out.
John Porto1d235422015-08-12 12:37:53 -07004168 if (!Traits::Is64Bit) {
4169 assert(T == Dest);
4170 if (Val->getType() == IceType_i64) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004171 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto1d235422015-08-12 12:37:53 -07004172 Constant *Zero = Ctx->getConstantZero(IceType_i32);
4173 _mov(DestHi, Zero);
4174 }
4175 } else {
4176 assert(Val->getType() == IceType_i64);
4177 // T is 64 bit. It needs to be copied to dest. We need to:
4178 //
4179 // T_1.32 = trunc T.64 to i32
4180 // T_2.64 = zext T_1.32 to i64
4181 // Dest.<<right_size>> = T_2.<<right_size>>
4182 //
4183 // which ensures the upper 32 bits will always be cleared. Just doing a
4184 //
4185 // mov Dest.32 = trunc T.32 to i32
4186 //
4187 // is dangerous because there's a chance the compiler will optimize this
4188 // copy out. To use _movzx we need two new registers (one 32-, and
4189 // another 64-bit wide.)
4190 Variable *T_1 = makeReg(IceType_i32);
4191 _mov(T_1, T);
4192 Variable *T_2 = makeReg(IceType_i64);
4193 _movzx(T_2, T_1);
4194 _mov(Dest, T_2);
John Porto7e93c622015-06-23 10:58:57 -07004195 }
4196 return;
4197 }
4198 case Intrinsics::Ctlz: {
Andrew Scull57e12682015-09-16 11:30:19 -07004199 // The "is zero undef" parameter is ignored and we always return a
4200 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07004201 Operand *Val = legalize(Instr->getArg(0));
4202 Operand *FirstVal;
4203 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07004204 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004205 FirstVal = loOperand(Val);
4206 SecondVal = hiOperand(Val);
4207 } else {
4208 FirstVal = Val;
4209 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004210 constexpr bool IsCttz = false;
John Porto7e93c622015-06-23 10:58:57 -07004211 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
4212 SecondVal);
4213 return;
4214 }
4215 case Intrinsics::Cttz: {
Andrew Scull57e12682015-09-16 11:30:19 -07004216 // The "is zero undef" parameter is ignored and we always return a
4217 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07004218 Operand *Val = legalize(Instr->getArg(0));
4219 Operand *FirstVal;
4220 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07004221 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004222 FirstVal = hiOperand(Val);
4223 SecondVal = loOperand(Val);
4224 } else {
4225 FirstVal = Val;
4226 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004227 constexpr bool IsCttz = true;
John Porto7e93c622015-06-23 10:58:57 -07004228 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
4229 SecondVal);
4230 return;
4231 }
4232 case Intrinsics::Fabs: {
4233 Operand *Src = legalize(Instr->getArg(0));
4234 Type Ty = Src->getType();
4235 Variable *Dest = Instr->getDest();
4236 Variable *T = makeVectorOfFabsMask(Ty);
Andrew Scull57e12682015-09-16 11:30:19 -07004237 // The pand instruction operates on an m128 memory operand, so if Src is an
4238 // f32 or f64, we need to make sure it's in a register.
John Porto7e93c622015-06-23 10:58:57 -07004239 if (isVectorType(Ty)) {
John Porto4a566862016-01-04 09:33:41 -08004240 if (llvm::isa<X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07004241 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07004242 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004243 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07004244 }
4245 _pand(T, Src);
4246 if (isVectorType(Ty))
4247 _movp(Dest, T);
4248 else
4249 _mov(Dest, T);
4250 return;
4251 }
4252 case Intrinsics::Longjmp: {
Karl Schimpf20070e82016-03-17 13:30:13 -07004253 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_longjmp, nullptr, 2);
John Porto7e93c622015-06-23 10:58:57 -07004254 Call->addArg(Instr->getArg(0));
4255 Call->addArg(Instr->getArg(1));
4256 lowerCall(Call);
4257 return;
4258 }
4259 case Intrinsics::Memcpy: {
Andrew Scull9df4a372015-08-07 09:19:35 -07004260 lowerMemcpy(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004261 return;
4262 }
4263 case Intrinsics::Memmove: {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004264 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004265 return;
4266 }
4267 case Intrinsics::Memset: {
Andrew Scull713dbde2015-08-04 14:25:27 -07004268 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004269 return;
4270 }
4271 case Intrinsics::NaClReadTP: {
John Porto56958cb2016-01-14 09:18:18 -08004272 if (NeedSandboxing) {
John Porto4a566862016-01-04 09:33:41 -08004273 Operand *Src =
4274 dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07004275 Variable *Dest = Instr->getDest();
4276 Variable *T = nullptr;
4277 _mov(T, Src);
4278 _mov(Dest, T);
4279 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07004280 InstCall *Call =
4281 makeHelperCall(RuntimeHelper::H_call_read_tp, Instr->getDest(), 0);
John Porto7e93c622015-06-23 10:58:57 -07004282 lowerCall(Call);
4283 }
4284 return;
4285 }
4286 case Intrinsics::Setjmp: {
Karl Schimpf20070e82016-03-17 13:30:13 -07004287 InstCall *Call =
4288 makeHelperCall(RuntimeHelper::H_call_setjmp, Instr->getDest(), 1);
John Porto7e93c622015-06-23 10:58:57 -07004289 Call->addArg(Instr->getArg(0));
4290 lowerCall(Call);
4291 return;
4292 }
4293 case Intrinsics::Sqrt: {
4294 Operand *Src = legalize(Instr->getArg(0));
4295 Variable *Dest = Instr->getDest();
4296 Variable *T = makeReg(Dest->getType());
4297 _sqrtss(T, Src);
4298 _mov(Dest, T);
4299 return;
4300 }
4301 case Intrinsics::Stacksave: {
John Porto56958cb2016-01-14 09:18:18 -08004302 if (!Traits::Is64Bit || !NeedSandboxing) {
4303 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg(),
4304 Traits::WordType);
4305 Variable *Dest = Instr->getDest();
4306 _mov(Dest, esp);
4307 return;
4308 }
4309 Variable *esp = Func->getTarget()->getPhysicalRegister(
4310 Traits::RegisterSet::Reg_esp, IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07004311 Variable *Dest = Instr->getDest();
4312 _mov(Dest, esp);
John Porto56958cb2016-01-14 09:18:18 -08004313
John Porto7e93c622015-06-23 10:58:57 -07004314 return;
4315 }
4316 case Intrinsics::Stackrestore: {
John Porto008f4ce2015-12-24 13:22:18 -08004317 Operand *Src = Instr->getArg(0);
John Porto56958cb2016-01-14 09:18:18 -08004318 _mov_sp(Src);
John Porto7e93c622015-06-23 10:58:57 -07004319 return;
4320 }
John Porto56958cb2016-01-14 09:18:18 -08004321
John Porto7e93c622015-06-23 10:58:57 -07004322 case Intrinsics::Trap:
4323 _ud2();
4324 return;
4325 case Intrinsics::UnknownIntrinsic:
4326 Func->setError("Should not be lowering UnknownIntrinsic");
4327 return;
4328 }
4329 return;
4330}
4331
John Porto4a566862016-01-04 09:33:41 -08004332template <typename TraitsType>
4333void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev,
4334 Operand *Ptr,
4335 Operand *Expected,
4336 Operand *Desired) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004337 Type Ty = Expected->getType();
4338 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004339 // Reserve the pre-colored registers first, before adding any more
4340 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07004341 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
4342 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
4343 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
4344 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07004345 _mov(T_eax, loOperand(Expected));
4346 _mov(T_edx, hiOperand(Expected));
4347 _mov(T_ebx, loOperand(Desired));
4348 _mov(T_ecx, hiOperand(Desired));
John Porto4a566862016-01-04 09:33:41 -08004349 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004350 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004351 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004352 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
4353 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
John Porto7e93c622015-06-23 10:58:57 -07004354 _mov(DestLo, T_eax);
4355 _mov(DestHi, T_edx);
4356 return;
4357 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -08004358 RegNumT Eax;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004359 switch (Ty) {
4360 default:
John Porto3c275ce2015-12-22 08:14:00 -08004361 llvm::report_fatal_error("Bad type for cmpxchg");
4362 case IceType_i64:
4363 Eax = Traits::getRaxOrDie();
4364 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004365 case IceType_i32:
4366 Eax = Traits::RegisterSet::Reg_eax;
4367 break;
4368 case IceType_i16:
4369 Eax = Traits::RegisterSet::Reg_ax;
4370 break;
4371 case IceType_i8:
4372 Eax = Traits::RegisterSet::Reg_al;
4373 break;
4374 }
4375 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07004376 _mov(T_eax, Expected);
John Porto4a566862016-01-04 09:33:41 -08004377 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Andrew Scull97f460d2015-07-21 10:07:42 -07004378 Variable *DesiredReg = legalizeToReg(Desired);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004379 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004380 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
4381 _mov(DestPrev, T_eax);
4382}
4383
John Porto4a566862016-01-04 09:33:41 -08004384template <typename TraitsType>
4385bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
4386 Operand *PtrToMem,
4387 Operand *Expected,
4388 Operand *Desired) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07004389 if (Func->getOptLevel() == Opt_m1)
John Porto7e93c622015-06-23 10:58:57 -07004390 return false;
4391 // Peek ahead a few instructions and see how Dest is used.
4392 // It's very common to have:
4393 //
4394 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
4395 // [%y_phi = ...] // list of phi stores
4396 // %p = icmp eq i32 %x, %expected
4397 // br i1 %p, label %l1, label %l2
4398 //
4399 // which we can optimize into:
4400 //
4401 // %x = <cmpxchg code>
4402 // [%y_phi = ...] // list of phi stores
4403 // br eq, %l1, %l2
4404 InstList::iterator I = Context.getCur();
4405 // I is currently the InstIntrinsicCall. Peek past that.
4406 // This assumes that the atomic cmpxchg has not been lowered yet,
4407 // so that the instructions seen in the scan from "Cur" is simple.
4408 assert(llvm::isa<InstIntrinsicCall>(*I));
4409 Inst *NextInst = Context.getNextInst(I);
4410 if (!NextInst)
4411 return false;
4412 // There might be phi assignments right before the compare+branch, since this
4413 // could be a backward branch for a loop. This placement of assignments is
4414 // determined by placePhiStores().
John Portoe82b5602016-02-24 15:58:55 -08004415 CfgVector<InstAssign *> PhiAssigns;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004416 while (auto *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004417 if (PhiAssign->getDest() == Dest)
4418 return false;
4419 PhiAssigns.push_back(PhiAssign);
4420 NextInst = Context.getNextInst(I);
4421 if (!NextInst)
4422 return false;
4423 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004424 if (auto *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004425 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
4426 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
4427 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
4428 return false;
4429 }
4430 NextInst = Context.getNextInst(I);
4431 if (!NextInst)
4432 return false;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004433 if (auto *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004434 if (!NextBr->isUnconditional() &&
4435 NextCmp->getDest() == NextBr->getCondition() &&
4436 NextBr->isLastUse(NextCmp->getDest())) {
4437 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
4438 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
4439 // Lower the phi assignments now, before the branch (same placement
4440 // as before).
4441 InstAssign *PhiAssign = PhiAssigns[i];
4442 PhiAssign->setDeleted();
4443 lowerAssign(PhiAssign);
4444 Context.advanceNext();
4445 }
John Porto5d0acff2015-06-30 15:29:21 -07004446 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
4447 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07004448 // Skip over the old compare and branch, by deleting them.
4449 NextCmp->setDeleted();
4450 NextBr->setDeleted();
4451 Context.advanceNext();
4452 Context.advanceNext();
4453 return true;
4454 }
4455 }
4456 }
4457 return false;
4458}
4459
John Porto4a566862016-01-04 09:33:41 -08004460template <typename TraitsType>
4461void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest,
4462 uint32_t Operation, Operand *Ptr,
4463 Operand *Val) {
John Porto7e93c622015-06-23 10:58:57 -07004464 bool NeedsCmpxchg = false;
4465 LowerBinOp Op_Lo = nullptr;
4466 LowerBinOp Op_Hi = nullptr;
4467 switch (Operation) {
4468 default:
4469 Func->setError("Unknown AtomicRMW operation");
4470 return;
4471 case Intrinsics::AtomicAdd: {
John Porto1d235422015-08-12 12:37:53 -07004472 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004473 // All the fall-through paths must set this to true, but use this
4474 // for asserting.
4475 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004476 Op_Lo = &TargetX86Base<TraitsType>::_add;
4477 Op_Hi = &TargetX86Base<TraitsType>::_adc;
John Porto7e93c622015-06-23 10:58:57 -07004478 break;
4479 }
John Porto4a566862016-01-04 09:33:41 -08004480 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004481 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004482 Variable *T = nullptr;
4483 _mov(T, Val);
4484 _xadd(Addr, T, Locked);
4485 _mov(Dest, T);
4486 return;
4487 }
4488 case Intrinsics::AtomicSub: {
John Porto1d235422015-08-12 12:37:53 -07004489 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004490 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004491 Op_Lo = &TargetX86Base<TraitsType>::_sub;
4492 Op_Hi = &TargetX86Base<TraitsType>::_sbb;
John Porto7e93c622015-06-23 10:58:57 -07004493 break;
4494 }
John Porto4a566862016-01-04 09:33:41 -08004495 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004496 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004497 Variable *T = nullptr;
4498 _mov(T, Val);
4499 _neg(T);
4500 _xadd(Addr, T, Locked);
4501 _mov(Dest, T);
4502 return;
4503 }
4504 case Intrinsics::AtomicOr:
4505 // TODO(jvoung): If Dest is null or dead, then some of these
4506 // operations do not need an "exchange", but just a locked op.
4507 // That appears to be "worth" it for sub, or, and, and xor.
4508 // xadd is probably fine vs lock add for add, and xchg is fine
4509 // vs an atomic store.
4510 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004511 Op_Lo = &TargetX86Base<TraitsType>::_or;
4512 Op_Hi = &TargetX86Base<TraitsType>::_or;
John Porto7e93c622015-06-23 10:58:57 -07004513 break;
4514 case Intrinsics::AtomicAnd:
4515 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004516 Op_Lo = &TargetX86Base<TraitsType>::_and;
4517 Op_Hi = &TargetX86Base<TraitsType>::_and;
John Porto7e93c622015-06-23 10:58:57 -07004518 break;
4519 case Intrinsics::AtomicXor:
4520 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004521 Op_Lo = &TargetX86Base<TraitsType>::_xor;
4522 Op_Hi = &TargetX86Base<TraitsType>::_xor;
John Porto7e93c622015-06-23 10:58:57 -07004523 break;
4524 case Intrinsics::AtomicExchange:
John Porto1d235422015-08-12 12:37:53 -07004525 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004526 NeedsCmpxchg = true;
4527 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
4528 // just need to be moved to the ecx and ebx registers.
4529 Op_Lo = nullptr;
4530 Op_Hi = nullptr;
4531 break;
4532 }
John Porto4a566862016-01-04 09:33:41 -08004533 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07004534 Variable *T = nullptr;
4535 _mov(T, Val);
4536 _xchg(Addr, T);
4537 _mov(Dest, T);
4538 return;
4539 }
4540 // Otherwise, we need a cmpxchg loop.
4541 (void)NeedsCmpxchg;
4542 assert(NeedsCmpxchg);
4543 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
4544}
4545
John Porto4a566862016-01-04 09:33:41 -08004546template <typename TraitsType>
4547void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
4548 LowerBinOp Op_Hi,
4549 Variable *Dest,
4550 Operand *Ptr,
4551 Operand *Val) {
John Porto7e93c622015-06-23 10:58:57 -07004552 // Expand a more complex RMW operation as a cmpxchg loop:
4553 // For 64-bit:
4554 // mov eax, [ptr]
4555 // mov edx, [ptr + 4]
4556 // .LABEL:
4557 // mov ebx, eax
4558 // <Op_Lo> ebx, <desired_adj_lo>
4559 // mov ecx, edx
4560 // <Op_Hi> ecx, <desired_adj_hi>
4561 // lock cmpxchg8b [ptr]
4562 // jne .LABEL
4563 // mov <dest_lo>, eax
4564 // mov <dest_lo>, edx
4565 //
4566 // For 32-bit:
4567 // mov eax, [ptr]
4568 // .LABEL:
4569 // mov <reg>, eax
4570 // op <reg>, [desired_adj]
4571 // lock cmpxchg [ptr], <reg>
4572 // jne .LABEL
4573 // mov <dest>, eax
4574 //
4575 // If Op_{Lo,Hi} are nullptr, then just copy the value.
4576 Val = legalize(Val);
4577 Type Ty = Val->getType();
John Porto1d235422015-08-12 12:37:53 -07004578 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07004579 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
4580 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto4a566862016-01-04 09:33:41 -08004581 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07004582 _mov(T_eax, loOperand(Addr));
4583 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07004584 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
4585 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto4a566862016-01-04 09:33:41 -08004586 InstX86Label *Label = InstX86Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004587 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
4588 if (!IsXchg8b) {
4589 Context.insert(Label);
4590 _mov(T_ebx, T_eax);
4591 (this->*Op_Lo)(T_ebx, loOperand(Val));
4592 _mov(T_ecx, T_edx);
4593 (this->*Op_Hi)(T_ecx, hiOperand(Val));
4594 } else {
4595 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
4596 // It just needs the Val loaded into ebx and ecx.
4597 // That can also be done before the loop.
4598 _mov(T_ebx, loOperand(Val));
4599 _mov(T_ecx, hiOperand(Val));
4600 Context.insert(Label);
4601 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004602 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004603 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07004604 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07004605 if (!IsXchg8b) {
4606 // If Val is a variable, model the extended live range of Val through
4607 // the end of the loop, since it will be re-used by the loop.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004608 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
4609 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar));
4610 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
John Porto1d937a82015-12-17 06:19:34 -08004611 Context.insert<InstFakeUse>(ValLo);
4612 Context.insert<InstFakeUse>(ValHi);
John Porto7e93c622015-06-23 10:58:57 -07004613 }
4614 } else {
4615 // For xchg, the loop is slightly smaller and ebx/ecx are used.
John Porto1d937a82015-12-17 06:19:34 -08004616 Context.insert<InstFakeUse>(T_ebx);
4617 Context.insert<InstFakeUse>(T_ecx);
John Porto7e93c622015-06-23 10:58:57 -07004618 }
4619 // The address base (if any) is also reused in the loop.
4620 if (Variable *Base = Addr->getBase())
John Porto1d937a82015-12-17 06:19:34 -08004621 Context.insert<InstFakeUse>(Base);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004622 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4623 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004624 _mov(DestLo, T_eax);
4625 _mov(DestHi, T_edx);
4626 return;
4627 }
John Porto4a566862016-01-04 09:33:41 -08004628 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08004629 RegNumT Eax;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004630 switch (Ty) {
4631 default:
John Porto3c275ce2015-12-22 08:14:00 -08004632 llvm::report_fatal_error("Bad type for atomicRMW");
4633 case IceType_i64:
4634 Eax = Traits::getRaxOrDie();
4635 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004636 case IceType_i32:
4637 Eax = Traits::RegisterSet::Reg_eax;
4638 break;
4639 case IceType_i16:
4640 Eax = Traits::RegisterSet::Reg_ax;
4641 break;
4642 case IceType_i8:
4643 Eax = Traits::RegisterSet::Reg_al;
4644 break;
4645 }
4646 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07004647 _mov(T_eax, Addr);
John Porto4a566862016-01-04 09:33:41 -08004648 auto *Label = Context.insert<InstX86Label>(this);
John Porto7e93c622015-06-23 10:58:57 -07004649 // We want to pick a different register for T than Eax, so don't use
4650 // _mov(T == nullptr, T_eax).
4651 Variable *T = makeReg(Ty);
4652 _mov(T, T_eax);
4653 (this->*Op_Lo)(T, Val);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004654 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004655 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07004656 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07004657 // If Val is a variable, model the extended live range of Val through
4658 // the end of the loop, since it will be re-used by the loop.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004659 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
John Porto1d937a82015-12-17 06:19:34 -08004660 Context.insert<InstFakeUse>(ValVar);
John Porto7e93c622015-06-23 10:58:57 -07004661 }
4662 // The address base (if any) is also reused in the loop.
4663 if (Variable *Base = Addr->getBase())
John Porto1d937a82015-12-17 06:19:34 -08004664 Context.insert<InstFakeUse>(Base);
John Porto7e93c622015-06-23 10:58:57 -07004665 _mov(Dest, T_eax);
4666}
4667
Andrew Scull9612d322015-07-06 14:53:25 -07004668/// Lowers count {trailing, leading} zeros intrinsic.
4669///
4670/// We could do constant folding here, but that should have
4671/// been done by the front-end/middle-end optimizations.
John Porto4a566862016-01-04 09:33:41 -08004672template <typename TraitsType>
4673void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty,
4674 Variable *Dest,
4675 Operand *FirstVal,
4676 Operand *SecondVal) {
John Porto7e93c622015-06-23 10:58:57 -07004677 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
4678 // Then the instructions will handle the Val == 0 case much more simply
4679 // and won't require conversion from bit position to number of zeros.
4680 //
4681 // Otherwise:
4682 // bsr IF_NOT_ZERO, Val
John Porto34d276a2016-01-27 06:31:53 -08004683 // mov T_DEST, ((Ty == i32) ? 63 : 127)
John Porto7e93c622015-06-23 10:58:57 -07004684 // cmovne T_DEST, IF_NOT_ZERO
John Porto34d276a2016-01-27 06:31:53 -08004685 // xor T_DEST, ((Ty == i32) ? 31 : 63)
John Porto7e93c622015-06-23 10:58:57 -07004686 // mov DEST, T_DEST
4687 //
4688 // NOTE: T_DEST must be a register because cmov requires its dest to be a
4689 // register. Also, bsf and bsr require their dest to be a register.
4690 //
John Porto34d276a2016-01-27 06:31:53 -08004691 // The xor DEST, C(31|63) converts a bit position to # of leading zeroes.
John Porto7e93c622015-06-23 10:58:57 -07004692 // E.g., for 000... 00001100, bsr will say that the most significant bit
4693 // set is at position 3, while the number of leading zeros is 28. Xor is
John Porto34d276a2016-01-27 06:31:53 -08004694 // like (M - N) for N <= M, and converts 63 to 32, and 127 to 64 (for the
4695 // all-zeros case).
John Porto7e93c622015-06-23 10:58:57 -07004696 //
John Porto34d276a2016-01-27 06:31:53 -08004697 // X8632 only: Similar for 64-bit, but start w/ speculating that the upper 32
4698 // bits are all zero, and compute the result for that case (checking the
4699 // lower 32 bits). Then actually compute the result for the upper bits and
John Porto7e93c622015-06-23 10:58:57 -07004700 // cmov in the result from the lower computation if the earlier speculation
4701 // was correct.
4702 //
4703 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
4704 // bit position conversion, and the speculation is reversed.
John Porto34d276a2016-01-27 06:31:53 -08004705
4706 // TODO(jpp): refactor this method.
John Porto7e93c622015-06-23 10:58:57 -07004707 assert(Ty == IceType_i32 || Ty == IceType_i64);
John Porto3c275ce2015-12-22 08:14:00 -08004708 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32;
4709 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07004710 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
4711 if (Cttz) {
4712 _bsf(T, FirstValRM);
4713 } else {
4714 _bsr(T, FirstValRM);
4715 }
John Porto3c275ce2015-12-22 08:14:00 -08004716 Variable *T_Dest = makeReg(DestTy);
4717 Constant *_31 = Ctx->getConstantInt32(31);
4718 Constant *_32 = Ctx->getConstantInt(DestTy, 32);
John Porto34d276a2016-01-27 06:31:53 -08004719 Constant *_63 = Ctx->getConstantInt(DestTy, 63);
4720 Constant *_64 = Ctx->getConstantInt(DestTy, 64);
John Porto7e93c622015-06-23 10:58:57 -07004721 if (Cttz) {
John Porto34d276a2016-01-27 06:31:53 -08004722 if (DestTy == IceType_i64) {
4723 _mov(T_Dest, _64);
4724 } else {
4725 _mov(T_Dest, _32);
4726 }
John Porto7e93c622015-06-23 10:58:57 -07004727 } else {
John Porto34d276a2016-01-27 06:31:53 -08004728 Constant *_127 = Ctx->getConstantInt(DestTy, 127);
4729 if (DestTy == IceType_i64) {
4730 _mov(T_Dest, _127);
4731 } else {
4732 _mov(T_Dest, _63);
4733 }
John Porto7e93c622015-06-23 10:58:57 -07004734 }
John Porto5d0acff2015-06-30 15:29:21 -07004735 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07004736 if (!Cttz) {
John Porto34d276a2016-01-27 06:31:53 -08004737 if (DestTy == IceType_i64) {
4738 // Even though there's a _63 available at this point, that constant might
4739 // not be an i32, which will cause the xor emission to fail.
4740 Constant *_63 = Ctx->getConstantInt32(63);
4741 _xor(T_Dest, _63);
4742 } else {
4743 _xor(T_Dest, _31);
4744 }
John Porto7e93c622015-06-23 10:58:57 -07004745 }
John Porto1d235422015-08-12 12:37:53 -07004746 if (Traits::Is64Bit || Ty == IceType_i32) {
John Porto7e93c622015-06-23 10:58:57 -07004747 _mov(Dest, T_Dest);
4748 return;
4749 }
John Porto3c275ce2015-12-22 08:14:00 -08004750 _add(T_Dest, _32);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004751 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4752 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004753 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07004754 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07004755 Variable *T_Dest2 = makeReg(IceType_i32);
4756 if (Cttz) {
4757 _bsf(T_Dest2, SecondVar);
4758 } else {
4759 _bsr(T_Dest2, SecondVar);
John Porto3c275ce2015-12-22 08:14:00 -08004760 _xor(T_Dest2, _31);
John Porto7e93c622015-06-23 10:58:57 -07004761 }
4762 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07004763 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07004764 _mov(DestLo, T_Dest2);
4765 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4766}
4767
John Porto4a566862016-01-04 09:33:41 -08004768template <typename TraitsType>
4769void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,
4770 Variable *Base, Constant *Offset) {
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08004771 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4772 // legalize Mem properly.
4773 if (Offset)
4774 assert(!llvm::isa<ConstantRelocatable>(Offset));
4775
John Porto4a566862016-01-04 09:33:41 -08004776 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004777
4778 if (isVectorType(Ty))
4779 _movp(Dest, Mem);
4780 else if (Ty == IceType_f64)
4781 _movq(Dest, Mem);
4782 else
4783 _mov(Dest, Mem);
4784}
4785
John Porto4a566862016-01-04 09:33:41 -08004786template <typename TraitsType>
4787void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value,
4788 Variable *Base, Constant *Offset) {
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08004789 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4790 // legalize Mem properly.
4791 if (Offset)
4792 assert(!llvm::isa<ConstantRelocatable>(Offset));
4793
John Porto4a566862016-01-04 09:33:41 -08004794 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004795
4796 if (isVectorType(Ty))
4797 _storep(Value, Mem);
4798 else if (Ty == IceType_f64)
4799 _storeq(Value, Mem);
4800 else
4801 _store(Value, Mem);
4802}
4803
John Porto4a566862016-01-04 09:33:41 -08004804template <typename TraitsType>
4805void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest,
4806 Variable *Src, int32_t OffsetAmt) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004807 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
4808 // TODO(ascull): this or add nullptr test to _movp, _movq
4809 Variable *Data = makeReg(Ty);
4810
4811 typedLoad(Ty, Data, Src, Offset);
4812 typedStore(Ty, Data, Dest, Offset);
4813}
4814
John Porto4a566862016-01-04 09:33:41 -08004815template <typename TraitsType>
4816void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src,
4817 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004818 // There is a load and store for each chunk in the unroll
Andrew Scull9df4a372015-08-07 09:19:35 -07004819 constexpr uint32_t BytesPerStorep = 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07004820
4821 // Check if the operands are constants
4822 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4823 const bool IsCountConst = CountConst != nullptr;
4824 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4825
Andrew Scullcfa628b2015-08-20 14:23:05 -07004826 if (shouldOptimizeMemIntrins() && IsCountConst &&
4827 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004828 // Unlikely, but nothing to do if it does happen
4829 if (CountValue == 0)
4830 return;
4831
4832 Variable *SrcBase = legalizeToReg(Src);
4833 Variable *DestBase = legalizeToReg(Dest);
4834
Andrew Scullcfa628b2015-08-20 14:23:05 -07004835 // Find the largest type that can be used and use it as much as possible in
4836 // reverse order. Then handle any remainder with overlapping copies. Since
4837 // the remainder will be at the end, there will be reduced pressure on the
4838 // memory unit as the accesses to the same memory are far apart.
4839 Type Ty = largestTypeInSize(CountValue);
4840 uint32_t TyWidth = typeWidthInBytes(Ty);
Andrew Scull9df4a372015-08-07 09:19:35 -07004841
Andrew Scullcfa628b2015-08-20 14:23:05 -07004842 uint32_t RemainingBytes = CountValue;
4843 int32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
4844 while (RemainingBytes >= TyWidth) {
4845 copyMemory(Ty, DestBase, SrcBase, Offset);
4846 RemainingBytes -= TyWidth;
4847 Offset -= TyWidth;
Andrew Scull9df4a372015-08-07 09:19:35 -07004848 }
4849
Andrew Scullcfa628b2015-08-20 14:23:05 -07004850 if (RemainingBytes == 0)
Andrew Scull9df4a372015-08-07 09:19:35 -07004851 return;
Andrew Scull9df4a372015-08-07 09:19:35 -07004852
Andrew Scullcfa628b2015-08-20 14:23:05 -07004853 // Lower the remaining bytes. Adjust to larger types in order to make use
4854 // of overlaps in the copies.
4855 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
4856 Offset = CountValue - typeWidthInBytes(LeftOverTy);
4857 copyMemory(LeftOverTy, DestBase, SrcBase, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07004858 return;
4859 }
4860
4861 // Fall back on a function call
Karl Schimpf20070e82016-03-17 13:30:13 -07004862 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memcpy, nullptr, 3);
Andrew Scull9df4a372015-08-07 09:19:35 -07004863 Call->addArg(Dest);
4864 Call->addArg(Src);
4865 Call->addArg(Count);
4866 lowerCall(Call);
4867}
4868
John Porto4a566862016-01-04 09:33:41 -08004869template <typename TraitsType>
4870void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src,
4871 Operand *Count) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004872 // There is a load and store for each chunk in the unroll
4873 constexpr uint32_t BytesPerStorep = 16;
4874
4875 // Check if the operands are constants
4876 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4877 const bool IsCountConst = CountConst != nullptr;
4878 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4879
4880 if (shouldOptimizeMemIntrins() && IsCountConst &&
4881 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) {
4882 // Unlikely, but nothing to do if it does happen
4883 if (CountValue == 0)
4884 return;
4885
4886 Variable *SrcBase = legalizeToReg(Src);
4887 Variable *DestBase = legalizeToReg(Dest);
4888
4889 std::tuple<Type, Constant *, Variable *>
4890 Moves[Traits::MEMMOVE_UNROLL_LIMIT];
4891 Constant *Offset;
4892 Variable *Reg;
4893
4894 // Copy the data into registers as the source and destination could overlap
Andrew Scull57e12682015-09-16 11:30:19 -07004895 // so make sure not to clobber the memory. This also means overlapping
4896 // moves can be used as we are taking a safe snapshot of the memory.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004897 Type Ty = largestTypeInSize(CountValue);
4898 uint32_t TyWidth = typeWidthInBytes(Ty);
4899
4900 uint32_t RemainingBytes = CountValue;
4901 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;
4902 size_t N = 0;
4903 while (RemainingBytes >= TyWidth) {
4904 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4905 Offset = Ctx->getConstantInt32(OffsetAmt);
4906 Reg = makeReg(Ty);
4907 typedLoad(Ty, Reg, SrcBase, Offset);
4908 RemainingBytes -= TyWidth;
4909 OffsetAmt -= TyWidth;
4910 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4911 }
4912
4913 if (RemainingBytes != 0) {
4914 // Lower the remaining bytes. Adjust to larger types in order to make use
4915 // of overlaps in the copies.
4916 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4917 Ty = firstTypeThatFitsSize(RemainingBytes);
4918 Offset = Ctx->getConstantInt32(CountValue - typeWidthInBytes(Ty));
4919 Reg = makeReg(Ty);
4920 typedLoad(Ty, Reg, SrcBase, Offset);
4921 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4922 }
4923
4924 // Copy the data out into the destination memory
4925 for (size_t i = 0; i < N; ++i) {
4926 std::tie(Ty, Offset, Reg) = Moves[i];
4927 typedStore(Ty, Reg, DestBase, Offset);
4928 }
4929
4930 return;
4931 }
4932
4933 // Fall back on a function call
Karl Schimpf20070e82016-03-17 13:30:13 -07004934 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memmove, nullptr, 3);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004935 Call->addArg(Dest);
4936 Call->addArg(Src);
4937 Call->addArg(Count);
4938 lowerCall(Call);
4939}
4940
John Porto4a566862016-01-04 09:33:41 -08004941template <typename TraitsType>
4942void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val,
4943 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004944 constexpr uint32_t BytesPerStorep = 16;
4945 constexpr uint32_t BytesPerStoreq = 8;
4946 constexpr uint32_t BytesPerStorei32 = 4;
Andrew Scull713dbde2015-08-04 14:25:27 -07004947 assert(Val->getType() == IceType_i8);
4948
4949 // Check if the operands are constants
4950 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4951 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
4952 const bool IsCountConst = CountConst != nullptr;
4953 const bool IsValConst = ValConst != nullptr;
4954 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4955 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
4956
4957 // Unlikely, but nothing to do if it does happen
4958 if (IsCountConst && CountValue == 0)
4959 return;
4960
4961 // TODO(ascull): if the count is constant but val is not it would be possible
4962 // to inline by spreading the value across 4 bytes and accessing subregs e.g.
4963 // eax, ax and al.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004964 if (shouldOptimizeMemIntrins() && IsCountConst && IsValConst) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004965 Variable *Base = nullptr;
Andrew Scullcfa628b2015-08-20 14:23:05 -07004966 Variable *VecReg = nullptr;
Andrew Scull9df4a372015-08-07 09:19:35 -07004967 const uint32_t SpreadValue =
4968 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
Andrew Scull713dbde2015-08-04 14:25:27 -07004969
Andrew Scull9df4a372015-08-07 09:19:35 -07004970 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
Jim Stichnoth992f91d2015-08-10 11:18:38 -07004971 uint32_t OffsetAmt) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004972 assert(Base != nullptr);
4973 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07004974
Andrew Scull9df4a372015-08-07 09:19:35 -07004975 // TODO(ascull): is 64-bit better with vector or scalar movq?
John Porto4a566862016-01-04 09:33:41 -08004976 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07004977 if (isVectorType(Ty)) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004978 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004979 _storep(VecReg, Mem);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004980 } else if (Ty == IceType_f64) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004981 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004982 _storeq(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07004983 } else {
John Porto3c275ce2015-12-22 08:14:00 -08004984 assert(Ty != IceType_i64);
Andrew Scull9df4a372015-08-07 09:19:35 -07004985 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
Andrew Scull713dbde2015-08-04 14:25:27 -07004986 }
4987 };
4988
Andrew Scullcfa628b2015-08-20 14:23:05 -07004989 // Find the largest type that can be used and use it as much as possible in
4990 // reverse order. Then handle any remainder with overlapping copies. Since
4991 // the remainder will be at the end, there will be reduces pressure on the
4992 // memory unit as the access to the same memory are far apart.
4993 Type Ty;
Andrew Scull9df4a372015-08-07 09:19:35 -07004994 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
Andrew Scullcfa628b2015-08-20 14:23:05 -07004995 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
4996 // When the value is zero it can be loaded into a vector register cheaply
4997 // using the xor trick.
Andrew Scull9df4a372015-08-07 09:19:35 -07004998 Base = legalizeToReg(Dest);
4999 VecReg = makeVectorOfZeros(IceType_v16i8);
Andrew Scullcfa628b2015-08-20 14:23:05 -07005000 Ty = largestTypeInSize(CountValue);
5001 } else if (CountValue <= BytesPerStorei32 * Traits::MEMCPY_UNROLL_LIMIT) {
5002 // When the value is non-zero or the count is small we can't use vector
5003 // instructions so are limited to 32-bit stores.
5004 Base = legalizeToReg(Dest);
5005 constexpr uint32_t MaxSize = 4;
5006 Ty = largestTypeInSize(CountValue, MaxSize);
Andrew Scull713dbde2015-08-04 14:25:27 -07005007 }
5008
Andrew Scullcfa628b2015-08-20 14:23:05 -07005009 if (Base) {
5010 uint32_t TyWidth = typeWidthInBytes(Ty);
5011
5012 uint32_t RemainingBytes = CountValue;
5013 uint32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
5014 while (RemainingBytes >= TyWidth) {
5015 lowerSet(Ty, Offset);
5016 RemainingBytes -= TyWidth;
5017 Offset -= TyWidth;
Andrew Scull713dbde2015-08-04 14:25:27 -07005018 }
Andrew Scull9df4a372015-08-07 09:19:35 -07005019
Andrew Scullcfa628b2015-08-20 14:23:05 -07005020 if (RemainingBytes == 0)
5021 return;
5022
5023 // Lower the remaining bytes. Adjust to larger types in order to make use
5024 // of overlaps in the copies.
5025 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
5026 Offset = CountValue - typeWidthInBytes(LeftOverTy);
5027 lowerSet(LeftOverTy, Offset);
Andrew Scull713dbde2015-08-04 14:25:27 -07005028 return;
5029 }
5030 }
5031
5032 // Fall back on calling the memset function. The value operand needs to be
5033 // extended to a stack slot size because the PNaCl ABI requires arguments to
5034 // be at least 32 bits wide.
5035 Operand *ValExt;
5036 if (IsValConst) {
5037 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
5038 } else {
5039 Variable *ValExtVar = Func->makeVariable(stackSlotType());
5040 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
5041 ValExt = ValExtVar;
5042 }
Karl Schimpf20070e82016-03-17 13:30:13 -07005043 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memset, nullptr, 3);
Andrew Scull713dbde2015-08-04 14:25:27 -07005044 Call->addArg(Dest);
5045 Call->addArg(ValExt);
5046 Call->addArg(Count);
5047 lowerCall(Call);
5048}
5049
John Portoac2388c2016-01-22 07:10:56 -08005050class AddressOptimizer {
5051 AddressOptimizer() = delete;
5052 AddressOptimizer(const AddressOptimizer &) = delete;
5053 AddressOptimizer &operator=(const AddressOptimizer &) = delete;
John Porto7e93c622015-06-23 10:58:57 -07005054
John Portoac2388c2016-01-22 07:10:56 -08005055public:
5056 explicit AddressOptimizer(const Cfg *Func)
5057 : Func(Func), VMetadata(Func->getVMetadata()) {}
5058
5059 inline void dumpAddressOpt(const ConstantRelocatable *const Relocatable,
5060 int32_t Offset, const Variable *Base,
5061 const Variable *Index, uint16_t Shift,
5062 const Inst *Reason) const;
5063
5064 inline const Inst *matchAssign(Variable **Var,
5065 ConstantRelocatable **Relocatable,
5066 int32_t *Offset);
5067
5068 inline const Inst *matchCombinedBaseIndex(Variable **Base, Variable **Index,
5069 uint16_t *Shift);
5070
5071 inline const Inst *matchShiftedIndex(Variable **Index, uint16_t *Shift);
5072
5073 inline const Inst *matchOffsetBase(Variable **Base,
5074 ConstantRelocatable **Relocatable,
5075 int32_t *Offset);
5076
5077private:
5078 const Cfg *const Func;
5079 const VariablesMetadata *const VMetadata;
5080
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005081 static bool isAdd(const Inst *Instr) {
5082 if (auto *Arith = llvm::dyn_cast_or_null<const InstArithmetic>(Instr)) {
John Portoac2388c2016-01-22 07:10:56 -08005083 return (Arith->getOp() == InstArithmetic::Add);
5084 }
5085 return false;
5086 }
5087};
5088
5089void AddressOptimizer::dumpAddressOpt(
5090 const ConstantRelocatable *const Relocatable, int32_t Offset,
5091 const Variable *Base, const Variable *Index, uint16_t Shift,
5092 const Inst *Reason) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005093 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005094 return;
5095 if (!Func->isVerbose(IceV_AddrOpt))
5096 return;
5097 OstreamLocker L(Func->getContext());
5098 Ostream &Str = Func->getContext()->getStrDump();
5099 Str << "Instruction: ";
5100 Reason->dumpDecorated(Func);
5101 Str << " results in Base=";
5102 if (Base)
5103 Base->dump(Func);
5104 else
5105 Str << "<null>";
5106 Str << ", Index=";
5107 if (Index)
5108 Index->dump(Func);
5109 else
5110 Str << "<null>";
David Sehraa0b1a12015-10-27 16:55:40 -07005111 Str << ", Shift=" << Shift << ", Offset=" << Offset
5112 << ", Relocatable=" << Relocatable << "\n";
John Porto7e93c622015-06-23 10:58:57 -07005113}
5114
John Portoac2388c2016-01-22 07:10:56 -08005115const Inst *AddressOptimizer::matchAssign(Variable **Var,
5116 ConstantRelocatable **Relocatable,
5117 int32_t *Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07005118 // Var originates from Var=SrcVar ==> set Var:=SrcVar
John Portoac2388c2016-01-22 07:10:56 -08005119 if (*Var == nullptr)
5120 return nullptr;
5121 if (const Inst *VarAssign = VMetadata->getSingleDefinition(*Var)) {
5122 assert(!VMetadata->isMultiDef(*Var));
John Porto7e93c622015-06-23 10:58:57 -07005123 if (llvm::isa<InstAssign>(VarAssign)) {
5124 Operand *SrcOp = VarAssign->getSrc(0);
5125 assert(SrcOp);
David Sehraa0b1a12015-10-27 16:55:40 -07005126 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
John Porto7e93c622015-06-23 10:58:57 -07005127 if (!VMetadata->isMultiDef(SrcVar) &&
5128 // TODO: ensure SrcVar stays single-BB
5129 true) {
John Portoac2388c2016-01-22 07:10:56 -08005130 *Var = SrcVar;
5131 return VarAssign;
John Porto7e93c622015-06-23 10:58:57 -07005132 }
David Sehraa0b1a12015-10-27 16:55:40 -07005133 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5134 int32_t MoreOffset = Const->getValue();
John Portoac2388c2016-01-22 07:10:56 -08005135 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5136 return nullptr;
5137 *Var = nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005138 Offset += MoreOffset;
John Portoac2388c2016-01-22 07:10:56 -08005139 return VarAssign;
David Sehraa0b1a12015-10-27 16:55:40 -07005140 } else if (auto *AddReloc = llvm::dyn_cast<ConstantRelocatable>(SrcOp)) {
John Portoac2388c2016-01-22 07:10:56 -08005141 if (*Relocatable == nullptr) {
5142 // It is always safe to fold a relocatable through assignment -- the
5143 // assignment frees a slot in the address operand that can be used to
5144 // hold the Sandbox Pointer -- if any.
5145 *Var = nullptr;
5146 *Relocatable = AddReloc;
5147 return VarAssign;
David Sehraa0b1a12015-10-27 16:55:40 -07005148 }
John Porto7e93c622015-06-23 10:58:57 -07005149 }
5150 }
5151 }
John Portoac2388c2016-01-22 07:10:56 -08005152 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005153}
5154
John Portoac2388c2016-01-22 07:10:56 -08005155const Inst *AddressOptimizer::matchCombinedBaseIndex(Variable **Base,
5156 Variable **Index,
5157 uint16_t *Shift) {
John Porto7e93c622015-06-23 10:58:57 -07005158 // Index==nullptr && Base is Base=Var1+Var2 ==>
5159 // set Base=Var1, Index=Var2, Shift=0
John Portoac2388c2016-01-22 07:10:56 -08005160 if (*Base == nullptr)
5161 return nullptr;
5162 if (*Index != nullptr)
5163 return nullptr;
5164 auto *BaseInst = VMetadata->getSingleDefinition(*Base);
John Porto7e93c622015-06-23 10:58:57 -07005165 if (BaseInst == nullptr)
John Portoac2388c2016-01-22 07:10:56 -08005166 return nullptr;
5167 assert(!VMetadata->isMultiDef(*Base));
John Porto7e93c622015-06-23 10:58:57 -07005168 if (BaseInst->getSrcSize() < 2)
John Portoac2388c2016-01-22 07:10:56 -08005169 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005170 if (auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
John Porto7e93c622015-06-23 10:58:57 -07005171 if (VMetadata->isMultiDef(Var1))
John Portoac2388c2016-01-22 07:10:56 -08005172 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005173 if (auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
John Porto7e93c622015-06-23 10:58:57 -07005174 if (VMetadata->isMultiDef(Var2))
John Portoac2388c2016-01-22 07:10:56 -08005175 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005176 if (isAdd(BaseInst) &&
5177 // TODO: ensure Var1 and Var2 stay single-BB
5178 true) {
John Portoac2388c2016-01-22 07:10:56 -08005179 *Base = Var1;
5180 *Index = Var2;
5181 *Shift = 0; // should already have been 0
5182 return BaseInst;
John Porto7e93c622015-06-23 10:58:57 -07005183 }
5184 }
5185 }
John Portoac2388c2016-01-22 07:10:56 -08005186 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005187}
5188
John Portoac2388c2016-01-22 07:10:56 -08005189const Inst *AddressOptimizer::matchShiftedIndex(Variable **Index,
5190 uint16_t *Shift) {
John Porto7e93c622015-06-23 10:58:57 -07005191 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
5192 // Index=Var, Shift+=log2(Const)
John Portoac2388c2016-01-22 07:10:56 -08005193 if (*Index == nullptr)
5194 return nullptr;
5195 auto *IndexInst = VMetadata->getSingleDefinition(*Index);
John Porto7e93c622015-06-23 10:58:57 -07005196 if (IndexInst == nullptr)
John Portoac2388c2016-01-22 07:10:56 -08005197 return nullptr;
5198 assert(!VMetadata->isMultiDef(*Index));
John Porto7e93c622015-06-23 10:58:57 -07005199 if (IndexInst->getSrcSize() < 2)
John Portoac2388c2016-01-22 07:10:56 -08005200 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005201 if (auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst)) {
5202 if (auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
5203 if (auto *Const =
John Porto7e93c622015-06-23 10:58:57 -07005204 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
David Sehraa0b1a12015-10-27 16:55:40 -07005205 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
John Portoac2388c2016-01-22 07:10:56 -08005206 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005207 switch (ArithInst->getOp()) {
5208 default:
John Portoac2388c2016-01-22 07:10:56 -08005209 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005210 case InstArithmetic::Mul: {
5211 uint32_t Mult = Const->getValue();
John Porto7e93c622015-06-23 10:58:57 -07005212 uint32_t LogMult;
5213 switch (Mult) {
5214 case 1:
5215 LogMult = 0;
5216 break;
5217 case 2:
5218 LogMult = 1;
5219 break;
5220 case 4:
5221 LogMult = 2;
5222 break;
5223 case 8:
5224 LogMult = 3;
5225 break;
5226 default:
John Portoac2388c2016-01-22 07:10:56 -08005227 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005228 }
John Portoac2388c2016-01-22 07:10:56 -08005229 if (*Shift + LogMult <= 3) {
5230 *Index = Var;
5231 *Shift += LogMult;
5232 return IndexInst;
John Porto7e93c622015-06-23 10:58:57 -07005233 }
5234 }
David Sehraa0b1a12015-10-27 16:55:40 -07005235 case InstArithmetic::Shl: {
5236 uint32_t ShiftAmount = Const->getValue();
5237 switch (ShiftAmount) {
5238 case 0:
5239 case 1:
5240 case 2:
5241 case 3:
5242 break;
5243 default:
John Portoac2388c2016-01-22 07:10:56 -08005244 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005245 }
John Portoac2388c2016-01-22 07:10:56 -08005246 if (*Shift + ShiftAmount <= 3) {
5247 *Index = Var;
5248 *Shift += ShiftAmount;
5249 return IndexInst;
David Sehraa0b1a12015-10-27 16:55:40 -07005250 }
5251 }
5252 }
John Porto7e93c622015-06-23 10:58:57 -07005253 }
5254 }
5255 }
John Portoac2388c2016-01-22 07:10:56 -08005256 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005257}
5258
John Portoac2388c2016-01-22 07:10:56 -08005259const Inst *AddressOptimizer::matchOffsetBase(Variable **Base,
5260 ConstantRelocatable **Relocatable,
5261 int32_t *Offset) {
John Porto7e93c622015-06-23 10:58:57 -07005262 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5263 // set Base=Var, Offset+=Const
5264 // Base is Base=Var-Const ==>
5265 // set Base=Var, Offset-=Const
John Portoac2388c2016-01-22 07:10:56 -08005266 if (*Base == nullptr) {
5267 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005268 }
John Portoac2388c2016-01-22 07:10:56 -08005269 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
David Sehraa0b1a12015-10-27 16:55:40 -07005270 if (BaseInst == nullptr) {
John Portoac2388c2016-01-22 07:10:56 -08005271 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005272 }
John Portoac2388c2016-01-22 07:10:56 -08005273 assert(!VMetadata->isMultiDef(*Base));
David Sehraa0b1a12015-10-27 16:55:40 -07005274 if (auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
John Porto7e93c622015-06-23 10:58:57 -07005275 if (ArithInst->getOp() != InstArithmetic::Add &&
5276 ArithInst->getOp() != InstArithmetic::Sub)
John Portoac2388c2016-01-22 07:10:56 -08005277 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005278 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
David Sehraa0b1a12015-10-27 16:55:40 -07005279 Operand *Src0 = ArithInst->getSrc(0);
5280 Operand *Src1 = ArithInst->getSrc(1);
5281 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5282 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5283 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5284 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5285 auto *Reloc0 = llvm::dyn_cast<ConstantRelocatable>(Src0);
5286 auto *Reloc1 = llvm::dyn_cast<ConstantRelocatable>(Src1);
5287 Variable *NewBase = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005288 int32_t NewOffset = *Offset;
5289 ConstantRelocatable *NewRelocatable = *Relocatable;
David Sehraa0b1a12015-10-27 16:55:40 -07005290 if (Var0 && Var1)
5291 // TODO(sehr): merge base/index splitting into here.
John Portoac2388c2016-01-22 07:10:56 -08005292 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005293 if (!IsAdd && Var1)
John Portoac2388c2016-01-22 07:10:56 -08005294 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005295 if (Var0)
5296 NewBase = Var0;
5297 else if (Var1)
5298 NewBase = Var1;
5299 // Don't know how to add/subtract two relocatables.
John Portoac2388c2016-01-22 07:10:56 -08005300 if ((*Relocatable && (Reloc0 || Reloc1)) || (Reloc0 && Reloc1))
5301 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005302 // Don't know how to subtract a relocatable.
5303 if (!IsAdd && Reloc1)
John Portoac2388c2016-01-22 07:10:56 -08005304 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005305 // Incorporate ConstantRelocatables.
5306 if (Reloc0)
5307 NewRelocatable = Reloc0;
5308 else if (Reloc1)
5309 NewRelocatable = Reloc1;
5310 // Compute the updated constant offset.
5311 if (Const0) {
John Porto56958cb2016-01-14 09:18:18 -08005312 const int32_t MoreOffset =
5313 IsAdd ? Const0->getValue() : -Const0->getValue();
David Sehraa0b1a12015-10-27 16:55:40 -07005314 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
John Portoac2388c2016-01-22 07:10:56 -08005315 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005316 NewOffset += MoreOffset;
John Porto7e93c622015-06-23 10:58:57 -07005317 }
David Sehraa0b1a12015-10-27 16:55:40 -07005318 if (Const1) {
John Porto56958cb2016-01-14 09:18:18 -08005319 const int32_t MoreOffset =
5320 IsAdd ? Const1->getValue() : -Const1->getValue();
David Sehraa0b1a12015-10-27 16:55:40 -07005321 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
John Portoac2388c2016-01-22 07:10:56 -08005322 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005323 NewOffset += MoreOffset;
5324 }
John Portoac2388c2016-01-22 07:10:56 -08005325 *Base = NewBase;
5326 *Offset = NewOffset;
5327 *Relocatable = NewRelocatable;
5328 return BaseInst;
John Porto7e93c622015-06-23 10:58:57 -07005329 }
John Portoac2388c2016-01-22 07:10:56 -08005330 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005331}
5332
John Portoac2388c2016-01-22 07:10:56 -08005333template <typename TypeTraits>
5334typename TargetX86Base<TypeTraits>::X86OperandMem *
5335TargetX86Base<TypeTraits>::computeAddressOpt(const Inst *Instr, Type MemType,
5336 Operand *Addr) {
John Porto7e93c622015-06-23 10:58:57 -07005337 Func->resetCurrentNode();
5338 if (Func->isVerbose(IceV_AddrOpt)) {
5339 OstreamLocker L(Func->getContext());
5340 Ostream &Str = Func->getContext()->getStrDump();
5341 Str << "\nStarting computeAddressOpt for instruction:\n ";
5342 Instr->dumpDecorated(Func);
5343 }
John Portoac2388c2016-01-22 07:10:56 -08005344
5345 OptAddr NewAddr;
5346 NewAddr.Base = llvm::dyn_cast<Variable>(Addr);
5347 if (NewAddr.Base == nullptr)
5348 return nullptr;
5349
Andrew Scull57e12682015-09-16 11:30:19 -07005350 // If the Base has more than one use or is live across multiple blocks, then
5351 // don't go further. Alternatively (?), never consider a transformation that
5352 // would change a variable that is currently *not* live across basic block
5353 // boundaries into one that *is*.
John Portoac2388c2016-01-22 07:10:56 -08005354 if (Func->getVMetadata()->isMultiBlock(
5355 NewAddr.Base) /* || Base->getUseCount() > 1*/)
5356 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005357
John Portoac2388c2016-01-22 07:10:56 -08005358 AddressOptimizer AddrOpt(Func);
Karl Schimpfd4699942016-04-02 09:55:31 -07005359 const bool MockBounds = getFlags().getMockBoundsCheck();
David Sehraa0b1a12015-10-27 16:55:40 -07005360 const Inst *Reason = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005361 bool AddressWasOptimized = false;
5362 // The following unnamed struct identifies the address mode formation steps
5363 // that could potentially create an invalid memory operand (i.e., no free
5364 // slots for RebasePtr.) We add all those variables to this struct so that we
5365 // can use memset() to reset all members to false.
5366 struct {
5367 bool AssignBase = false;
5368 bool AssignIndex = false;
5369 bool OffsetFromBase = false;
5370 bool OffsetFromIndex = false;
5371 bool CombinedBaseIndex = false;
5372 } Skip;
5373 // This points to the boolean in Skip that represents the last folding
5374 // performed. This is used to disable a pattern match that generated an
5375 // invalid address. Without this, the algorithm would never finish.
5376 bool *SkipLastFolding = nullptr;
5377 // NewAddrCheckpoint is used to rollback the address being formed in case an
5378 // invalid address is formed.
5379 OptAddr NewAddrCheckpoint;
5380 Reason = Instr;
David Sehraa0b1a12015-10-27 16:55:40 -07005381 do {
John Portoac2388c2016-01-22 07:10:56 -08005382 if (SandboxingType != ST_None) {
5383 // When sandboxing, we defer the sandboxing of NewAddr to the Concrete
5384 // Target. If our optimization was overly aggressive, then we simply undo
5385 // what the previous iteration did, and set the previous pattern's skip
5386 // bit to true.
5387 if (!legalizeOptAddrForSandbox(&NewAddr)) {
5388 *SkipLastFolding = true;
5389 SkipLastFolding = nullptr;
5390 NewAddr = NewAddrCheckpoint;
5391 Reason = nullptr;
5392 }
5393 }
5394
David Sehraa0b1a12015-10-27 16:55:40 -07005395 if (Reason) {
John Portoac2388c2016-01-22 07:10:56 -08005396 AddrOpt.dumpAddressOpt(NewAddr.Relocatable, NewAddr.Offset, NewAddr.Base,
5397 NewAddr.Index, NewAddr.Shift, Reason);
David Sehraa0b1a12015-10-27 16:55:40 -07005398 AddressWasOptimized = true;
5399 Reason = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005400 SkipLastFolding = nullptr;
5401 memset(&Skip, 0, sizeof(Skip));
John Porto7e93c622015-06-23 10:58:57 -07005402 }
John Portoac2388c2016-01-22 07:10:56 -08005403
5404 NewAddrCheckpoint = NewAddr;
5405
David Sehraa0b1a12015-10-27 16:55:40 -07005406 // Update Base and Index to follow through assignments to definitions.
John Portoac2388c2016-01-22 07:10:56 -08005407 if (!Skip.AssignBase &&
5408 (Reason = AddrOpt.matchAssign(&NewAddr.Base, &NewAddr.Relocatable,
5409 &NewAddr.Offset))) {
5410 SkipLastFolding = &Skip.AssignBase;
David Sehraa0b1a12015-10-27 16:55:40 -07005411 // Assignments of Base from a Relocatable or ConstantInt32 can result
5412 // in Base becoming nullptr. To avoid code duplication in this loop we
5413 // prefer that Base be non-nullptr if possible.
John Portoac2388c2016-01-22 07:10:56 -08005414 if ((NewAddr.Base == nullptr) && (NewAddr.Index != nullptr) &&
5415 NewAddr.Shift == 0) {
5416 std::swap(NewAddr.Base, NewAddr.Index);
5417 }
David Sehraa0b1a12015-10-27 16:55:40 -07005418 continue;
5419 }
John Portoac2388c2016-01-22 07:10:56 -08005420 if (!Skip.AssignBase &&
5421 (Reason = AddrOpt.matchAssign(&NewAddr.Index, &NewAddr.Relocatable,
5422 &NewAddr.Offset))) {
5423 SkipLastFolding = &Skip.AssignIndex;
David Sehraa0b1a12015-10-27 16:55:40 -07005424 continue;
John Portoac2388c2016-01-22 07:10:56 -08005425 }
John Porto7e93c622015-06-23 10:58:57 -07005426
David Sehraa0b1a12015-10-27 16:55:40 -07005427 if (!MockBounds) {
5428 // Transition from:
5429 // <Relocatable + Offset>(Base) to
5430 // <Relocatable + Offset>(Base, Index)
John Portoac2388c2016-01-22 07:10:56 -08005431 if (!Skip.CombinedBaseIndex &&
5432 (Reason = AddrOpt.matchCombinedBaseIndex(
5433 &NewAddr.Base, &NewAddr.Index, &NewAddr.Shift))) {
5434 SkipLastFolding = &Skip.CombinedBaseIndex;
David Sehraa0b1a12015-10-27 16:55:40 -07005435 continue;
John Portoac2388c2016-01-22 07:10:56 -08005436 }
5437
David Sehraa0b1a12015-10-27 16:55:40 -07005438 // Recognize multiply/shift and update Shift amount.
5439 // Index becomes Index=Var<<Const && Const+Shift<=3 ==>
5440 // Index=Var, Shift+=Const
5441 // Index becomes Index=Const*Var && log2(Const)+Shift<=3 ==>
5442 // Index=Var, Shift+=log2(Const)
John Portoac2388c2016-01-22 07:10:56 -08005443 if ((Reason =
5444 AddrOpt.matchShiftedIndex(&NewAddr.Index, &NewAddr.Shift))) {
David Sehraa0b1a12015-10-27 16:55:40 -07005445 continue;
John Portoac2388c2016-01-22 07:10:56 -08005446 }
5447
David Sehraa0b1a12015-10-27 16:55:40 -07005448 // If Shift is zero, the choice of Base and Index was purely arbitrary.
5449 // Recognize multiply/shift and set Shift amount.
5450 // Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
5451 // swap(Index,Base)
5452 // Similar for Base=Const*Var and Base=Var<<Const
John Portoac2388c2016-01-22 07:10:56 -08005453 if (NewAddr.Shift == 0 &&
5454 (Reason = AddrOpt.matchShiftedIndex(&NewAddr.Base, &NewAddr.Shift))) {
5455 std::swap(NewAddr.Base, NewAddr.Index);
David Sehraa0b1a12015-10-27 16:55:40 -07005456 continue;
5457 }
5458 }
John Portoac2388c2016-01-22 07:10:56 -08005459
David Sehraa0b1a12015-10-27 16:55:40 -07005460 // Update Offset to reflect additions/subtractions with constants and
5461 // relocatables.
John Porto7e93c622015-06-23 10:58:57 -07005462 // TODO: consider overflow issues with respect to Offset.
John Portoac2388c2016-01-22 07:10:56 -08005463 if (!Skip.OffsetFromBase &&
5464 (Reason = AddrOpt.matchOffsetBase(&NewAddr.Base, &NewAddr.Relocatable,
5465 &NewAddr.Offset))) {
5466 SkipLastFolding = &Skip.OffsetFromBase;
David Sehraa0b1a12015-10-27 16:55:40 -07005467 continue;
John Portoac2388c2016-01-22 07:10:56 -08005468 }
5469 if (NewAddr.Shift == 0 && !Skip.OffsetFromIndex &&
5470 (Reason = AddrOpt.matchOffsetBase(&NewAddr.Index, &NewAddr.Relocatable,
5471 &NewAddr.Offset))) {
5472 SkipLastFolding = &Skip.OffsetFromIndex;
David Sehr69e92902015-11-04 14:46:29 -08005473 continue;
John Portoac2388c2016-01-22 07:10:56 -08005474 }
5475
David Sehraa0b1a12015-10-27 16:55:40 -07005476 // TODO(sehr, stichnot): Handle updates of Index with Shift != 0.
5477 // Index is Index=Var+Const ==>
5478 // set Index=Var, Offset+=(Const<<Shift)
5479 // Index is Index=Const+Var ==>
5480 // set Index=Var, Offset+=(Const<<Shift)
5481 // Index is Index=Var-Const ==>
5482 // set Index=Var, Offset-=(Const<<Shift)
5483 break;
5484 } while (Reason);
John Portoac2388c2016-01-22 07:10:56 -08005485
5486 if (!AddressWasOptimized) {
5487 return nullptr;
5488 }
5489
5490 // Undo any addition of RebasePtr. It will be added back when the mem
5491 // operand is sandboxed.
5492 if (NewAddr.Base == RebasePtr) {
5493 NewAddr.Base = nullptr;
5494 }
5495
5496 if (NewAddr.Index == RebasePtr) {
5497 NewAddr.Index = nullptr;
5498 NewAddr.Shift = 0;
5499 }
5500
5501 Constant *OffsetOp = nullptr;
5502 if (NewAddr.Relocatable == nullptr) {
5503 OffsetOp = Ctx->getConstantInt32(NewAddr.Offset);
5504 } else {
5505 OffsetOp =
5506 Ctx->getConstantSym(NewAddr.Relocatable->getOffset() + NewAddr.Offset,
Jim Stichnoth98ba0062016-03-07 09:26:22 -08005507 NewAddr.Relocatable->getName());
John Portoac2388c2016-01-22 07:10:56 -08005508 }
5509 // Vanilla ICE load instructions should not use the segment registers, and
5510 // computeAddressOpt only works at the level of Variables and Constants, not
5511 // other X86OperandMem, so there should be no mention of segment
5512 // registers there either.
5513 static constexpr auto SegmentReg =
5514 X86OperandMem::SegmentRegisters::DefaultSegment;
5515
5516 return X86OperandMem::create(Func, MemType, NewAddr.Base, OffsetOp,
5517 NewAddr.Index, NewAddr.Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07005518}
5519
Jim Stichnothad2989b2015-09-15 10:21:42 -07005520/// Add a mock bounds check on the memory address before using it as a load or
5521/// store operand. The basic idea is that given a memory operand [reg], we
5522/// would first add bounds-check code something like:
5523///
5524/// cmp reg, <lb>
5525/// jl out_of_line_error
5526/// cmp reg, <ub>
5527/// jg out_of_line_error
5528///
5529/// In reality, the specific code will depend on how <lb> and <ub> are
5530/// represented, e.g. an immediate, a global, or a function argument.
5531///
5532/// As such, we need to enforce that the memory operand does not have the form
5533/// [reg1+reg2], because then there is no simple cmp instruction that would
5534/// suffice. However, we consider [reg+offset] to be OK because the offset is
5535/// usually small, and so <ub> could have a safety buffer built in and then we
5536/// could instead branch to a custom out_of_line_error that does the precise
5537/// check and jumps back if it turns out OK.
5538///
5539/// For the purpose of mocking the bounds check, we'll do something like this:
5540///
5541/// cmp reg, 0
5542/// je label
5543/// cmp reg, 1
5544/// je label
5545/// label:
5546///
5547/// Also note that we don't need to add a bounds check to a dereference of a
5548/// simple global variable address.
John Porto4a566862016-01-04 09:33:41 -08005549template <typename TraitsType>
5550void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) {
Karl Schimpfd4699942016-04-02 09:55:31 -07005551 if (!getFlags().getMockBoundsCheck())
Jim Stichnothad2989b2015-09-15 10:21:42 -07005552 return;
John Porto4a566862016-01-04 09:33:41 -08005553 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) {
Jim Stichnothad2989b2015-09-15 10:21:42 -07005554 if (Mem->getIndex()) {
5555 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg");
5556 }
5557 Opnd = Mem->getBase();
5558 }
5559 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps
5560 // something else. We only care if it is Variable.
5561 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);
5562 if (Var == nullptr)
5563 return;
5564 // We use lowerStore() to copy out-args onto the stack. This creates a memory
5565 // operand with the stack pointer as the base register. Don't do bounds
5566 // checks on that.
Jim Stichnoth8aa39662016-02-10 11:20:30 -08005567 if (Var->getRegNum() == getStackReg())
Jim Stichnothad2989b2015-09-15 10:21:42 -07005568 return;
5569
John Porto4a566862016-01-04 09:33:41 -08005570 auto *Label = InstX86Label::create(Func, this);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005571 _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
5572 _br(Traits::Cond::Br_e, Label);
5573 _cmp(Opnd, Ctx->getConstantInt32(1));
5574 _br(Traits::Cond::Br_e, Label);
5575 Context.insert(Label);
5576}
5577
John Porto4a566862016-01-04 09:33:41 -08005578template <typename TraitsType>
5579void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07005580 // A Load instruction can be treated the same as an Assign instruction, after
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08005581 // the source operand is transformed into an X86OperandMem operand. Note that
5582 // the address mode optimization already creates an X86OperandMem operand, so
5583 // it doesn't need another level of transformation.
John Porto7e93c622015-06-23 10:58:57 -07005584 Variable *DestLoad = Load->getDest();
5585 Type Ty = DestLoad->getType();
5586 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005587 doMockBoundsCheck(Src0);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08005588 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
John Porto7e93c622015-06-23 10:58:57 -07005589 lowerAssign(Assign);
5590}
5591
John Porto4a566862016-01-04 09:33:41 -08005592template <typename TraitsType>
5593void TargetX86Base<TraitsType>::doAddressOptLoad() {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005594 Inst *Instr = Context.getCur();
5595 Operand *Addr = Instr->getSrc(0);
5596 Variable *Dest = Instr->getDest();
5597 if (auto *OptAddr = computeAddressOpt(Instr, Dest->getType(), Addr)) {
5598 Instr->setDeleted();
John Portoac2388c2016-01-22 07:10:56 -08005599 Context.insert<InstLoad>(Dest, OptAddr);
John Porto7e93c622015-06-23 10:58:57 -07005600 }
5601}
5602
John Porto4a566862016-01-04 09:33:41 -08005603template <typename TraitsType>
5604void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability,
5605 RandomNumberGenerator &RNG) {
Qining Luaee5fa82015-08-20 14:59:03 -07005606 RandomNumberGeneratorWrapper RNGW(RNG);
5607 if (RNGW.getTrueWithProbability(Probability)) {
5608 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
John Porto7e93c622015-06-23 10:58:57 -07005609 }
5610}
5611
John Porto4a566862016-01-04 09:33:41 -08005612template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005613void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Instr*/) {
John Porto7e93c622015-06-23 10:58:57 -07005614 Func->setError("Phi found in regular instruction list");
5615}
5616
John Porto4a566862016-01-04 09:33:41 -08005617template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005618void TargetX86Base<TraitsType>::lowerRet(const InstRet *Instr) {
David Sehr0c68bef2016-01-20 10:00:23 -08005619 Variable *Reg = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005620 if (Instr->hasRetValue()) {
5621 Operand *RetValue = legalize(Instr->getRetValue());
David Sehr0c68bef2016-01-20 10:00:23 -08005622 const Type ReturnType = RetValue->getType();
5623 assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) ||
5624 (ReturnType == IceType_i32) || (ReturnType == IceType_i64));
5625 Reg = moveReturnValueToRegister(RetValue, ReturnType);
5626 }
5627 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5628 // explicitly looks for a ret instruction as a marker for where to insert the
5629 // frame removal instructions.
5630 _ret(Reg);
5631 // Add a fake use of esp to make sure esp stays alive for the entire
5632 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5633 keepEspLiveAtExit();
5634}
5635
John Portoae15f0f2016-04-26 04:26:33 -07005636inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
5637 SizeT Index3) {
5638 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
5639 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
5640 assert(Mask < 256);
5641 return Mask;
5642}
5643
5644template <typename TraitsType>
5645Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
5646 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
5647 constexpr SizeT SrcBit = 1 << 2;
5648 assert((Index0 & SrcBit) == (Index1 & SrcBit));
5649 assert((Index0 & SrcBit) == (Index2 & SrcBit));
5650 assert((Index0 & SrcBit) == (Index3 & SrcBit));
5651 (void)SrcBit;
5652
5653 const Type SrcTy = Src->getType();
5654 auto *T = makeReg(SrcTy);
5655 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
5656 auto *Mask =
5657 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5658 _pshufd(T, SrcRM, Mask);
5659 return T;
5660}
5661
5662template <typename TraitsType>
5663Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
5664 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
5665 SizeT Index3) {
5666 constexpr SizeT SrcBit = 1 << 2;
5667 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
5668 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
5669 (void)SrcBit;
5670
5671 const Type SrcTy = Src0->getType();
5672 assert(Src1->getType() == SrcTy);
5673 auto *T = makeReg(SrcTy);
5674 auto *Src0R = legalizeToReg(Src0);
5675 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5676 auto *Mask =
5677 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5678 _movp(T, Src0R);
5679 _shufps(T, Src1RM, Mask);
5680 return T;
5681}
5682
5683template <typename TraitsType>
5684Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
5685 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
5686 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
5687 Index1, IGNORE_INDEX);
5688}
5689
5690inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
5691 SizeT Index3) {
5692 constexpr SizeT SrcBit = 1 << 2;
5693 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
5694 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
5695 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
5696 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
5697 return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
5698}
5699
David Sehr0c68bef2016-01-20 10:00:23 -08005700template <typename TraitsType>
John Portode29f122016-04-26 19:16:07 -07005701GlobalString TargetX86Base<TraitsType>::lowerShuffleVector_NewMaskName() {
5702 GlobalString FuncName = Func->getFunctionName();
5703 const SizeT Id = PshufbMaskCount++;
5704 if (!BuildDefs::dump() || !FuncName.hasStdString()) {
5705 return GlobalString::createWithString(
5706 Ctx,
5707 "$PS" + std::to_string(FuncName.getID()) + "_" + std::to_string(Id));
5708 }
5709 return GlobalString::createWithString(
5710 Ctx, "Pshufb$" + Func->getFunctionName() + "$" + std::to_string(Id));
5711}
5712
5713template <typename TraitsType>
5714ConstantRelocatable *
5715TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask(
5716 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
5717 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
5718 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
5719 int8_t Idx15) {
5720 static constexpr uint8_t NumElements = 16;
5721 const char Initializer[NumElements] = {
5722 Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7,
5723 Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15,
5724 };
5725
5726 static constexpr Type V4VectorType = IceType_v4i32;
5727 const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType);
5728 auto *Mask = VariableDeclaration::create(Func->getGlobalPool());
5729 GlobalString MaskName = lowerShuffleVector_NewMaskName();
5730 Mask->setIsConstant(true);
5731 Mask->addInitializer(VariableDeclaration::DataInitializer::create(
5732 Func->getGlobalPool(), Initializer, NumElements));
5733 Mask->setName(MaskName);
5734 // Mask needs to be 16-byte aligned, or pshufb will seg fault.
5735 Mask->setAlignment(MaskAlignment);
5736 Func->addGlobal(Mask);
5737
5738 constexpr RelocOffsetT Offset = 0;
5739 return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName));
5740}
5741
5742template <typename TraitsType>
5743void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb(
5744 Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1,
5745 int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6,
5746 int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11,
5747 int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) {
5748 const Type DestTy = Dest->getType();
5749 static constexpr bool NotRebased = false;
5750 static constexpr Variable *NoBase = nullptr;
5751 // We use void for the memory operand instead of DestTy because using the
5752 // latter causes a validation failure: the X86 Inst layer complains that
5753 // vector mem operands could be under aligned. Thus, using void we avoid the
5754 // validation error. Note that the mask global declaration is aligned, so it
5755 // can be used as an XMM mem operand.
5756 static constexpr Type MaskType = IceType_void;
5757#define IDX_IN_SRC(N, S) \
5758 ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS)
5759 auto *Mask0M = X86OperandMem::create(
5760 Func, MaskType, NoBase,
5761 lowerShuffleVector_CreatePshufbMask(
5762 IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0),
5763 IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0),
5764 IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0),
5765 IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0),
5766 IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0),
5767 IDX_IN_SRC(Idx15, 0)),
5768 NotRebased);
5769 auto *Mask1M = X86OperandMem::create(
5770 Func, MaskType, NoBase,
5771 lowerShuffleVector_CreatePshufbMask(
5772 IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1),
5773 IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1),
5774 IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1),
5775 IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1),
5776 IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1),
5777 IDX_IN_SRC(Idx15, 1)),
5778 NotRebased);
5779#undef IDX_IN_SRC
5780 auto *T0 = makeReg(DestTy);
5781 auto *T1 = makeReg(DestTy);
5782 auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
5783 _movp(T0, Src0RM);
5784 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5785 _movp(T1, Src1RM);
5786
5787 _pshufb(T1, Mask1M);
5788 _pshufb(T0, Mask0M);
5789 _por(T1, T0);
5790 _movp(Dest, T1);
5791}
5792
5793template <typename TraitsType>
John Portoa47c11c2016-04-21 05:53:42 -07005794void TargetX86Base<TraitsType>::lowerShuffleVector(
5795 const InstShuffleVector *Instr) {
5796 auto *Dest = Instr->getDest();
5797 const Type DestTy = Dest->getType();
John Portoae15f0f2016-04-26 04:26:33 -07005798 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5799 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5800 const SizeT NumElements = typeNumElements(DestTy);
John Portoa47c11c2016-04-21 05:53:42 -07005801
5802 auto *T = makeReg(DestTy);
5803
5804 switch (DestTy) {
5805 default:
John Portode29f122016-04-26 19:16:07 -07005806 llvm::report_fatal_error("Unexpected vector type.");
5807 case IceType_v16i1:
5808 case IceType_v16i8: {
5809 if (InstructionSet < Traits::SSE4_1) {
5810 // TODO(jpp): figure out how to lower with sse2.
5811 break;
5812 }
5813 static constexpr SizeT ExpectedNumElements = 16;
5814 assert(ExpectedNumElements == Instr->getNumIndexes());
5815 (void)ExpectedNumElements;
5816 const SizeT Index0 = Instr->getIndex(0)->getValue();
5817 const SizeT Index1 = Instr->getIndex(1)->getValue();
5818 const SizeT Index2 = Instr->getIndex(2)->getValue();
5819 const SizeT Index3 = Instr->getIndex(3)->getValue();
5820 const SizeT Index4 = Instr->getIndex(4)->getValue();
5821 const SizeT Index5 = Instr->getIndex(5)->getValue();
5822 const SizeT Index6 = Instr->getIndex(6)->getValue();
5823 const SizeT Index7 = Instr->getIndex(7)->getValue();
5824 const SizeT Index8 = Instr->getIndex(8)->getValue();
5825 const SizeT Index9 = Instr->getIndex(9)->getValue();
5826 const SizeT Index10 = Instr->getIndex(10)->getValue();
5827 const SizeT Index11 = Instr->getIndex(11)->getValue();
5828 const SizeT Index12 = Instr->getIndex(12)->getValue();
5829 const SizeT Index13 = Instr->getIndex(13)->getValue();
5830 const SizeT Index14 = Instr->getIndex(14)->getValue();
5831 const SizeT Index15 = Instr->getIndex(15)->getValue();
5832 lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2,
5833 Index3, Index4, Index5, Index6, Index7,
5834 Index8, Index9, Index10, Index11, Index12,
5835 Index13, Index14, Index15);
5836 return;
5837 }
5838 case IceType_v8i1:
5839 case IceType_v8i16: {
5840 if (InstructionSet < Traits::SSE4_1) {
5841 // TODO(jpp): figure out how to lower with sse2.
5842 break;
5843 }
5844 static constexpr SizeT ExpectedNumElements = 8;
5845 assert(ExpectedNumElements == Instr->getNumIndexes());
5846 (void)ExpectedNumElements;
5847 const SizeT Index0 = Instr->getIndex(0)->getValue();
5848 const SizeT Index1 = Instr->getIndex(1)->getValue();
5849 const SizeT Index2 = Instr->getIndex(2)->getValue();
5850 const SizeT Index3 = Instr->getIndex(3)->getValue();
5851 const SizeT Index4 = Instr->getIndex(4)->getValue();
5852 const SizeT Index5 = Instr->getIndex(5)->getValue();
5853 const SizeT Index6 = Instr->getIndex(6)->getValue();
5854 const SizeT Index7 = Instr->getIndex(7)->getValue();
5855#define TO_BYTE_INDEX(I) ((I) << 1)
5856 lowerShuffleVector_UsingPshufb(
5857 Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1,
5858 TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2),
5859 TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3),
5860 TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4),
5861 TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5),
5862 TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6),
5863 TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7),
5864 TO_BYTE_INDEX(Index7) + 1);
5865#undef TO_BYTE_INDEX
5866 return;
5867 }
John Portoae15f0f2016-04-26 04:26:33 -07005868 case IceType_v4i1:
5869 case IceType_v4i32:
5870 case IceType_v4f32: {
5871 static constexpr SizeT ExpectedNumElements = 4;
5872 assert(ExpectedNumElements == Instr->getNumIndexes());
5873 const SizeT Index0 = Instr->getIndex(0)->getValue();
5874 const SizeT Index1 = Instr->getIndex(1)->getValue();
5875 const SizeT Index2 = Instr->getIndex(2)->getValue();
5876 const SizeT Index3 = Instr->getIndex(3)->getValue();
5877 Variable *T = nullptr;
5878 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
5879#define CASE_SRCS_IN(S0, S1, S2, S3) \
5880 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
5881 CASE_SRCS_IN(0, 0, 0, 0) : {
5882 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
5883 Index3);
5884 }
5885 break;
5886 CASE_SRCS_IN(0, 0, 0, 1) : {
5887 assert(false && "Following code is untested but likely correct; test "
5888 "and remove assert.");
5889 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5890 Src1, Index3);
5891 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5892 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5893 }
5894 break;
5895 CASE_SRCS_IN(0, 0, 1, 0) : {
5896 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5897 Src0, Index3);
5898 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5899 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5900 }
5901 break;
5902 CASE_SRCS_IN(0, 0, 1, 1) : {
5903 assert(false && "Following code is untested but likely correct; test "
5904 "and remove assert.");
5905 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
5906 Index2, Index3);
5907 }
5908 break;
5909 CASE_SRCS_IN(0, 1, 0, 0) : {
5910 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5911 Src1, Index1);
5912 T = lowerShuffleVector_TwoFromSameSrc(
5913 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5914 }
5915 break;
5916 CASE_SRCS_IN(0, 1, 0, 1) : {
5917 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
5918 (Index3 - ExpectedNumElements) == 1) {
5919 assert(false && "Following code is untested but likely correct; test "
5920 "and remove assert.");
5921 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5922 auto *Src0R = legalizeToReg(Src0);
5923 T = makeReg(DestTy);
5924 _movp(T, Src0R);
5925 _punpckl(T, Src1RM);
5926 } else if (Index0 == Index2 && Index1 == Index3) {
5927 assert(false && "Following code is untested but likely correct; test "
5928 "and remove assert.");
5929 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5930 Src0, Index0, Src1, Index1);
5931 T = lowerShuffleVector_AllFromSameSrc(
5932 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5933 UNIFIED_INDEX_1);
5934 } else {
5935 assert(false && "Following code is untested but likely correct; test "
5936 "and remove assert.");
5937 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5938 Src0, Index0, Src1, Index1);
5939 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5940 Src0, Index2, Src1, Index3);
5941 T = lowerShuffleVector_TwoFromSameSrc(
5942 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5943 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5944 }
5945 }
5946 break;
5947 CASE_SRCS_IN(0, 1, 1, 0) : {
5948 if (Index0 == Index3 && Index1 == Index2) {
5949 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5950 Src0, Index0, Src1, Index1);
5951 T = lowerShuffleVector_AllFromSameSrc(
5952 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5953 UNIFIED_INDEX_0);
5954 } else {
5955 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5956 Src0, Index0, Src1, Index1);
5957 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5958 Src1, Index2, Src0, Index3);
5959 T = lowerShuffleVector_TwoFromSameSrc(
5960 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5961 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5962 }
5963 }
5964 break;
5965 CASE_SRCS_IN(0, 1, 1, 1) : {
5966 assert(false && "Following code is untested but likely correct; test "
5967 "and remove assert.");
5968 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5969 Src1, Index1);
5970 T = lowerShuffleVector_TwoFromSameSrc(
5971 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5972 }
5973 break;
5974 CASE_SRCS_IN(1, 0, 0, 0) : {
5975 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5976 Src0, Index1);
5977 T = lowerShuffleVector_TwoFromSameSrc(
5978 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5979 }
5980 break;
5981 CASE_SRCS_IN(1, 0, 0, 1) : {
5982 if (Index0 == Index3 && Index1 == Index2) {
5983 assert(false && "Following code is untested but likely correct; test "
5984 "and remove assert.");
5985 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5986 Src1, Index0, Src0, Index1);
5987 T = lowerShuffleVector_AllFromSameSrc(
5988 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5989 UNIFIED_INDEX_0);
5990 } else {
5991 assert(false && "Following code is untested but likely correct; test "
5992 "and remove assert.");
5993 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5994 Src1, Index0, Src0, Index1);
5995 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5996 Src0, Index2, Src1, Index3);
5997 T = lowerShuffleVector_TwoFromSameSrc(
5998 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5999 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6000 }
6001 }
6002 break;
6003 CASE_SRCS_IN(1, 0, 1, 0) : {
6004 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
6005 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
6006 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
6007 auto *Src0R = legalizeToReg(Src1);
6008 T = makeReg(DestTy);
6009 _movp(T, Src0R);
6010 _punpckl(T, Src1RM);
6011 } else if (Index0 == Index2 && Index1 == Index3) {
6012 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
6013 Src1, Index0, Src0, Index1);
6014 T = lowerShuffleVector_AllFromSameSrc(
6015 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
6016 UNIFIED_INDEX_1);
6017 } else {
6018 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
6019 Src1, Index0, Src0, Index1);
6020 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
6021 Src1, Index2, Src0, Index3);
6022 T = lowerShuffleVector_TwoFromSameSrc(
6023 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
6024 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6025 }
6026 }
6027 break;
6028 CASE_SRCS_IN(1, 0, 1, 1) : {
6029 assert(false && "Following code is untested but likely correct; test "
6030 "and remove assert.");
6031 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
6032 Src0, Index1);
6033 T = lowerShuffleVector_TwoFromSameSrc(
6034 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
6035 }
6036 break;
6037 CASE_SRCS_IN(1, 1, 0, 0) : {
6038 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
6039 Index2, Index3);
6040 }
6041 break;
6042 CASE_SRCS_IN(1, 1, 0, 1) : {
6043 assert(false && "Following code is untested but likely correct; test "
6044 "and remove assert.");
6045 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
6046 Src1, Index3);
6047 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
6048 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6049 }
6050 break;
6051 CASE_SRCS_IN(1, 1, 1, 0) : {
6052 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
6053 Src0, Index3);
6054 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
6055 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6056 }
6057 break;
6058 CASE_SRCS_IN(1, 1, 1, 1) : {
6059 assert(false && "Following code is untested but likely correct; test "
6060 "and remove assert.");
6061 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
6062 Index3);
6063 }
6064 break;
6065#undef CASE_SRCS_IN
6066 }
6067
6068 assert(T != nullptr);
6069 assert(T->getType() == DestTy);
6070 _movp(Dest, T);
6071 return;
6072 } break;
John Portoa47c11c2016-04-21 05:53:42 -07006073 }
6074
6075 // Unoptimized shuffle. Perform a series of inserts and extracts.
6076 Context.insert<InstFakeDef>(T);
John Portoa47c11c2016-04-21 05:53:42 -07006077 const Type ElementType = typeElementType(DestTy);
6078 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6079 auto *Index = Instr->getIndex(I);
6080 const SizeT Elem = Index->getValue();
6081 auto *ExtElmt = makeReg(ElementType);
6082 if (Elem < NumElements) {
6083 lowerExtractElement(
6084 InstExtractElement::create(Func, ExtElmt, Src0, Index));
6085 } else {
6086 lowerExtractElement(InstExtractElement::create(
6087 Func, ExtElmt, Src1,
6088 Ctx->getConstantInt32(Index->getValue() - NumElements)));
6089 }
6090 auto *NewT = makeReg(DestTy);
6091 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6092 Ctx->getConstantInt32(I)));
6093 T = NewT;
6094 }
6095 _movp(Dest, T);
6096}
6097
6098template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08006099void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
David Sehre3984282015-12-15 17:34:55 -08006100 Variable *Dest = Select->getDest();
John Porto7e93c622015-06-23 10:58:57 -07006101
David Sehre3984282015-12-15 17:34:55 -08006102 if (isVectorType(Dest->getType())) {
6103 lowerSelectVector(Select);
John Porto7e93c622015-06-23 10:58:57 -07006104 return;
6105 }
6106
David Sehre3984282015-12-15 17:34:55 -08006107 Operand *Condition = Select->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07006108 // Handle folding opportunities.
David Sehre3984282015-12-15 17:34:55 -08006109 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
John Porto7e93c622015-06-23 10:58:57 -07006110 assert(Producer->isDeleted());
Jim Stichnothcaeaa272016-01-10 12:53:44 -08006111 switch (BoolFolding<Traits>::getProducerKind(Producer)) {
John Porto7e93c622015-06-23 10:58:57 -07006112 default:
6113 break;
Jim Stichnothcaeaa272016-01-10 12:53:44 -08006114 case BoolFolding<Traits>::PK_Icmp32:
6115 case BoolFolding<Traits>::PK_Icmp64: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006116 lowerIcmpAndConsumer(llvm::cast<InstIcmp>(Producer), Select);
David Sehre3984282015-12-15 17:34:55 -08006117 return;
6118 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08006119 case BoolFolding<Traits>::PK_Fcmp: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006120 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select);
David Sehre3984282015-12-15 17:34:55 -08006121 return;
6122 }
John Porto7e93c622015-06-23 10:58:57 -07006123 }
6124 }
John Porto7e93c622015-06-23 10:58:57 -07006125
David Sehre3984282015-12-15 17:34:55 -08006126 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
6127 Operand *Zero = Ctx->getConstantZero(IceType_i32);
6128 _cmp(CmpResult, Zero);
6129 Operand *SrcT = Select->getTrueOperand();
6130 Operand *SrcF = Select->getFalseOperand();
John Porto4a566862016-01-04 09:33:41 -08006131 const BrCond Cond = Traits::Cond::Br_ne;
David Sehre3984282015-12-15 17:34:55 -08006132 lowerSelectMove(Dest, Cond, SrcT, SrcF);
6133}
6134
John Porto4a566862016-01-04 09:33:41 -08006135template <typename TraitsType>
6136void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,
6137 Operand *SrcT, Operand *SrcF) {
David Sehre3984282015-12-15 17:34:55 -08006138 Type DestTy = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07006139 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
Andrew Scull57e12682015-09-16 11:30:19 -07006140 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
6141 // explicit control flow.
John Porto7e93c622015-06-23 10:58:57 -07006142 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto4a566862016-01-04 09:33:41 -08006143 auto *Label = InstX86Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07006144 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
6145 _mov(Dest, SrcT);
6146 _br(Cond, Label);
6147 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
David Sehre3984282015-12-15 17:34:55 -08006148 _redefined(_mov(Dest, SrcF));
John Porto7e93c622015-06-23 10:58:57 -07006149 Context.insert(Label);
6150 return;
6151 }
6152 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
Andrew Scull57e12682015-09-16 11:30:19 -07006153 // But if SrcT is immediate, we might be able to do better, as the cmov
6154 // instruction doesn't allow an immediate operand:
John Porto7e93c622015-06-23 10:58:57 -07006155 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
6156 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
6157 std::swap(SrcT, SrcF);
John Porto4a566862016-01-04 09:33:41 -08006158 Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07006159 }
John Porto1d235422015-08-12 12:37:53 -07006160 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006161 SrcT = legalizeUndef(SrcT);
6162 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07006163 // Set the low portion.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006164 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08006165 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
John Porto7e93c622015-06-23 10:58:57 -07006166 // Set the high portion.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006167 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08006168 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
John Porto7e93c622015-06-23 10:58:57 -07006169 return;
6170 }
6171
John Porto1d235422015-08-12 12:37:53 -07006172 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
6173 (Traits::Is64Bit && DestTy == IceType_i64));
David Sehre3984282015-12-15 17:34:55 -08006174 lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
6175}
6176
John Porto4a566862016-01-04 09:33:41 -08006177template <typename TraitsType>
6178void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond,
6179 Operand *SrcT,
6180 Operand *SrcF) {
John Porto7e93c622015-06-23 10:58:57 -07006181 Variable *T = nullptr;
6182 SrcF = legalize(SrcF);
6183 _mov(T, SrcF);
6184 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
6185 _cmov(T, SrcT, Cond);
6186 _mov(Dest, T);
6187}
6188
John Porto4a566862016-01-04 09:33:41 -08006189template <typename TraitsType>
6190void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src,
6191 bool IsRedefinition) {
David Sehre3984282015-12-15 17:34:55 -08006192 assert(Dest->getType() == Src->getType());
6193 assert(!Dest->isRematerializable());
6194 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
6195 Src = legalize(Src);
6196 Operand *SrcLo = loOperand(Src);
6197 Operand *SrcHi = hiOperand(Src);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006198 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
6199 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08006200 Variable *T_Lo = nullptr, *T_Hi = nullptr;
6201 _mov(T_Lo, SrcLo);
6202 _redefined(_mov(DestLo, T_Lo), IsRedefinition);
6203 _mov(T_Hi, SrcHi);
6204 _redefined(_mov(DestHi, T_Hi), IsRedefinition);
6205 } else {
6206 Operand *SrcLegal;
6207 if (Dest->hasReg()) {
6208 // If Dest already has a physical register, then only basic legalization
6209 // is needed, as the source operand can be a register, immediate, or
6210 // memory.
6211 SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum());
6212 } else {
6213 // If Dest could be a stack operand, then RI must be a physical register
6214 // or a scalar integer immediate.
6215 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
6216 }
6217 if (isVectorType(Dest->getType())) {
6218 _redefined(_movp(Dest, SrcLegal), IsRedefinition);
6219 } else {
6220 _redefined(_mov(Dest, SrcLegal), IsRedefinition);
6221 }
6222 }
6223}
6224
John Porto4a566862016-01-04 09:33:41 -08006225template <typename TraitsType>
6226bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
6227 const InstFcmp *Fcmp, const InstSelect *Select) {
David Sehre3984282015-12-15 17:34:55 -08006228 Operand *CmpSrc0 = Fcmp->getSrc(0);
6229 Operand *CmpSrc1 = Fcmp->getSrc(1);
6230 Operand *SelectSrcT = Select->getTrueOperand();
6231 Operand *SelectSrcF = Select->getFalseOperand();
6232
6233 if (CmpSrc0->getType() != SelectSrcT->getType())
6234 return false;
6235
6236 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
6237 InstFcmp::FCond Condition = Fcmp->getCondition();
6238 switch (Condition) {
6239 default:
6240 return false;
6241 case InstFcmp::True:
6242 case InstFcmp::False:
6243 case InstFcmp::Ogt:
6244 case InstFcmp::Olt:
6245 (void)CmpSrc0;
6246 (void)CmpSrc1;
6247 (void)SelectSrcT;
6248 (void)SelectSrcF;
6249 break;
6250 }
6251 return false;
6252}
6253
John Porto4a566862016-01-04 09:33:41 -08006254template <typename TraitsType>
6255void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) {
David Sehre3984282015-12-15 17:34:55 -08006256 Variable *Dest = Icmp->getDest();
6257 if (isVectorType(Dest->getType())) {
6258 lowerIcmpVector(Icmp);
6259 } else {
6260 constexpr Inst *Consumer = nullptr;
6261 lowerIcmpAndConsumer(Icmp, Consumer);
6262 }
6263}
6264
John Porto4a566862016-01-04 09:33:41 -08006265template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006266void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Instr) {
6267 Variable *Dest = Instr->getDest();
David Sehre3984282015-12-15 17:34:55 -08006268 Type DestTy = Dest->getType();
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006269 Operand *SrcT = Instr->getTrueOperand();
6270 Operand *SrcF = Instr->getFalseOperand();
6271 Operand *Condition = Instr->getCondition();
David Sehre3984282015-12-15 17:34:55 -08006272
6273 if (!isVectorType(DestTy))
6274 llvm::report_fatal_error("Expected a vector select");
6275
6276 Type SrcTy = SrcT->getType();
6277 Variable *T = makeReg(SrcTy);
6278 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
6279 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
6280 if (InstructionSet >= Traits::SSE4_1) {
6281 // TODO(wala): If the condition operand is a constant, use blendps or
6282 // pblendw.
6283 //
6284 // Use blendvps or pblendvb to implement select.
6285 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
6286 SrcTy == IceType_v4f32) {
6287 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
6288 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
6289 _movp(xmm0, ConditionRM);
6290 _psll(xmm0, Ctx->getConstantInt8(31));
6291 _movp(T, SrcFRM);
6292 _blendvps(T, SrcTRM, xmm0);
6293 _movp(Dest, T);
6294 } else {
6295 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
6296 Type SignExtTy =
6297 Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8;
6298 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
6299 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
6300 _movp(T, SrcFRM);
6301 _pblendvb(T, SrcTRM, xmm0);
6302 _movp(Dest, T);
6303 }
6304 return;
6305 }
6306 // Lower select without Traits::SSE4.1:
6307 // a=d?b:c ==>
6308 // if elementtype(d) != i1:
6309 // d=sext(d);
6310 // a=(b&d)|(c&~d);
6311 Variable *T2 = makeReg(SrcTy);
6312 // Sign extend the condition operand if applicable.
6313 if (SrcTy == IceType_v4f32) {
6314 // The sext operation takes only integer arguments.
6315 Variable *T3 = Func->makeVariable(IceType_v4i32);
6316 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
6317 _movp(T, T3);
6318 } else if (typeElementType(SrcTy) != IceType_i1) {
6319 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6320 } else {
6321 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
6322 _movp(T, ConditionRM);
6323 }
6324 _movp(T2, T);
6325 _pand(T, SrcTRM);
6326 _pandn(T2, SrcFRM);
6327 _por(T, T2);
6328 _movp(Dest, T);
6329
6330 return;
6331}
6332
John Porto4a566862016-01-04 09:33:41 -08006333template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006334void TargetX86Base<TraitsType>::lowerStore(const InstStore *Instr) {
6335 Operand *Value = Instr->getData();
6336 Operand *Addr = Instr->getAddr();
John Porto4a566862016-01-04 09:33:41 -08006337 X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
Jim Stichnothad2989b2015-09-15 10:21:42 -07006338 doMockBoundsCheck(NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07006339 Type Ty = NewAddr->getType();
6340
John Porto1d235422015-08-12 12:37:53 -07006341 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006342 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07006343 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
John Porto4a566862016-01-04 09:33:41 -08006344 _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
Jim Stichnothb40595a2016-01-29 06:14:31 -08006345 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto4a566862016-01-04 09:33:41 -08006346 _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07006347 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07006348 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07006349 } else {
6350 Value = legalize(Value, Legal_Reg | Legal_Imm);
6351 _store(Value, NewAddr);
6352 }
6353}
6354
John Porto4a566862016-01-04 09:33:41 -08006355template <typename TraitsType>
6356void TargetX86Base<TraitsType>::doAddressOptStore() {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006357 auto *Instr = llvm::cast<InstStore>(Context.getCur());
6358 Operand *Addr = Instr->getAddr();
6359 Operand *Data = Instr->getData();
6360 if (auto *OptAddr = computeAddressOpt(Instr, Data->getType(), Addr)) {
6361 Instr->setDeleted();
John Portoac2388c2016-01-22 07:10:56 -08006362 auto *NewStore = Context.insert<InstStore>(Data, OptAddr);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006363 if (Instr->getDest())
6364 NewStore->setRmwBeacon(Instr->getRmwBeacon());
John Porto7e93c622015-06-23 10:58:57 -07006365 }
6366}
6367
John Porto4a566862016-01-04 09:33:41 -08006368template <typename TraitsType>
6369Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison,
6370 uint64_t Min, uint64_t Max) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006371 // TODO(ascull): 64-bit should not reach here but only because it is not
6372 // implemented yet. This should be able to handle the 64-bit case.
John Porto1d235422015-08-12 12:37:53 -07006373 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
Andrew Scull87f80c12015-07-20 10:19:16 -07006374 // Subtracting 0 is a nop so don't do it
6375 if (Min != 0) {
6376 // Avoid clobbering the comparison by copying it
6377 Variable *T = nullptr;
6378 _mov(T, Comparison);
6379 _sub(T, Ctx->getConstantInt32(Min));
6380 Comparison = T;
6381 }
6382
6383 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
6384
6385 return Comparison;
6386}
6387
John Porto4a566862016-01-04 09:33:41 -08006388template <typename TraitsType>
6389void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case,
6390 Operand *Comparison,
6391 bool DoneCmp,
6392 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006393 switch (Case.getKind()) {
6394 case CaseCluster::JumpTable: {
John Porto4a566862016-01-04 09:33:41 -08006395 InstX86Label *SkipJumpTable;
Andrew Scull87f80c12015-07-20 10:19:16 -07006396
6397 Operand *RangeIndex =
6398 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07006399 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006400 // Skip over jump table logic if comparison not in range and no default
John Porto4a566862016-01-04 09:33:41 -08006401 SkipJumpTable = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006402 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07006403 } else {
6404 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07006405 }
Andrew Scull87f80c12015-07-20 10:19:16 -07006406
6407 InstJumpTable *JumpTable = Case.getJumpTable();
6408 Context.insert(JumpTable);
6409
6410 // Make sure the index is a register of the same width as the base
6411 Variable *Index;
John Porto56958cb2016-01-14 09:18:18 -08006412 const Type PointerType = getPointerType();
6413 if (RangeIndex->getType() != PointerType) {
6414 Index = makeReg(PointerType);
Jim Stichnothe641e922016-02-29 09:54:55 -08006415 if (RangeIndex->getType() == IceType_i64) {
6416 assert(Traits::Is64Bit);
6417 _mov(Index, RangeIndex); // trunc
6418 } else {
6419 _movzx(Index, RangeIndex);
6420 }
Andrew Scull87f80c12015-07-20 10:19:16 -07006421 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07006422 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07006423 }
6424
6425 constexpr RelocOffsetT RelocOffset = 0;
John Portoac2388c2016-01-22 07:10:56 -08006426 constexpr Variable *NoBase = nullptr;
John Porto03077212016-04-05 06:30:21 -07006427 auto JTName = GlobalString::createWithString(Ctx, JumpTable->getName());
Jim Stichnoth467ffe52016-03-29 15:01:06 -07006428 Constant *Offset = Ctx->getConstantSym(RelocOffset, JTName);
John Porto56958cb2016-01-14 09:18:18 -08006429 uint16_t Shift = typeWidthInBytesLog2(PointerType);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006430 constexpr auto Segment = X86OperandMem::SegmentRegisters::DefaultSegment;
John Porto56958cb2016-01-14 09:18:18 -08006431
Andrew Scull87f80c12015-07-20 10:19:16 -07006432 Variable *Target = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08006433 if (Traits::Is64Bit && NeedSandboxing) {
John Porto56958cb2016-01-14 09:18:18 -08006434 assert(Index != nullptr && Index->getType() == IceType_i32);
6435 }
John Portoac2388c2016-01-22 07:10:56 -08006436 auto *TargetInMemory = X86OperandMem::create(Func, PointerType, NoBase,
6437 Offset, Index, Shift, Segment);
Andrew Scull86df4e92015-07-30 13:54:44 -07006438 _mov(Target, TargetInMemory);
John Porto56958cb2016-01-14 09:18:18 -08006439
Andrew Scull86df4e92015-07-30 13:54:44 -07006440 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07006441
Andrew Scull86df4e92015-07-30 13:54:44 -07006442 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07006443 Context.insert(SkipJumpTable);
6444 return;
6445 }
6446 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07006447 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006448 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07006449 if (!DoneCmp) {
6450 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07006451 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07006452 }
6453 _br(Traits::Cond::Br_e, Case.getTarget());
6454 } else if (DoneCmp && Case.isPairRange()) {
6455 // Range of two items with first item aleady compared against
6456 _br(Traits::Cond::Br_e, Case.getTarget());
6457 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
6458 _cmp(Comparison, Value);
6459 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07006460 } else {
6461 // Range
6462 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07006463 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07006464 }
Andrew Scull86df4e92015-07-30 13:54:44 -07006465 if (DefaultTarget != nullptr)
6466 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006467 return;
6468 }
6469 }
6470}
6471
John Porto4a566862016-01-04 09:33:41 -08006472template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006473void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Instr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006474 // Group cases together and navigate through them with a binary search
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006475 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Instr);
6476 Operand *Src0 = Instr->getComparison();
6477 CfgNode *DefaultTarget = Instr->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07006478
6479 assert(CaseClusters.size() != 0); // Should always be at least one
6480
John Porto1d235422015-08-12 12:37:53 -07006481 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006482 Src0 = legalize(Src0); // get Base/Index into physical registers
6483 Operand *Src0Lo = loOperand(Src0);
6484 Operand *Src0Hi = hiOperand(Src0);
6485 if (CaseClusters.back().getHigh() > UINT32_MAX) {
6486 // TODO(ascull): handle 64-bit case properly (currently naive version)
6487 // This might be handled by a higher level lowering of switches.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006488 SizeT NumCases = Instr->getNumCases();
Andrew Scull87f80c12015-07-20 10:19:16 -07006489 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07006490 Src0Lo = legalizeToReg(Src0Lo);
6491 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07006492 } else {
6493 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
6494 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
6495 }
6496 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006497 Constant *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6498 Constant *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
John Porto4a566862016-01-04 09:33:41 -08006499 InstX86Label *Label = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006500 _cmp(Src0Lo, ValueLo);
6501 _br(Traits::Cond::Br_ne, Label);
6502 _cmp(Src0Hi, ValueHi);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006503 _br(Traits::Cond::Br_e, Instr->getLabel(I));
Andrew Scull87f80c12015-07-20 10:19:16 -07006504 Context.insert(Label);
6505 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006506 _br(Instr->getLabelDefault());
Andrew Scull87f80c12015-07-20 10:19:16 -07006507 return;
6508 } else {
6509 // All the values are 32-bit so just check the operand is too and then
6510 // fall through to the 32-bit implementation. This is a common case.
6511 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
6512 Constant *Zero = Ctx->getConstantInt32(0);
6513 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07006514 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006515 Src0 = Src0Lo;
6516 }
John Porto7e93c622015-06-23 10:58:57 -07006517 }
6518
Andrew Scull87f80c12015-07-20 10:19:16 -07006519 // 32-bit lowering
6520
6521 if (CaseClusters.size() == 1) {
6522 // Jump straight to default if needed. Currently a common case as jump
6523 // tables occur on their own.
6524 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07006525 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006526 return;
6527 }
6528
6529 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07006530 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07006531
6532 // A span is over the clusters
6533 struct SearchSpan {
John Porto4a566862016-01-04 09:33:41 -08006534 SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label)
Andrew Scull87f80c12015-07-20 10:19:16 -07006535 : Begin(Begin), Size(Size), Label(Label) {}
6536
6537 SizeT Begin;
6538 SizeT Size;
John Porto4a566862016-01-04 09:33:41 -08006539 InstX86Label *Label;
Andrew Scull87f80c12015-07-20 10:19:16 -07006540 };
Andrew Scull8447bba2015-07-23 11:41:18 -07006541 // The stack will only grow to the height of the tree so 12 should be plenty
6542 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07006543 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
6544 bool DoneCmp = false;
6545
6546 while (!SearchSpanStack.empty()) {
6547 SearchSpan Span = SearchSpanStack.top();
6548 SearchSpanStack.pop();
6549
6550 if (Span.Label != nullptr)
6551 Context.insert(Span.Label);
6552
6553 switch (Span.Size) {
6554 case 0:
6555 llvm::report_fatal_error("Invalid SearchSpan size");
6556 break;
6557
6558 case 1:
6559 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07006560 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006561 DoneCmp = false;
6562 break;
6563
Andrew Scull86df4e92015-07-30 13:54:44 -07006564 case 2: {
6565 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
6566 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
6567
6568 // Placing a range last may allow register clobbering during the range
6569 // test. That means there is no need to clone the register. If it is a
6570 // unit range the comparison may have already been done in the binary
6571 // search (DoneCmp) and so it should be placed first. If this is a range
6572 // of two items and the comparison with the low value has already been
6573 // done, comparing with the other element is cheaper than a range test.
6574 // If the low end of the range is zero then there is no subtraction and
6575 // nothing to be gained.
6576 if (!CaseA->isUnitRange() &&
6577 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
6578 std::swap(CaseA, CaseB);
6579 DoneCmp = false;
6580 }
6581
6582 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07006583 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07006584 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
6585 SearchSpanStack.empty() ? nullptr : DefaultTarget);
6586 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07006587
6588 default:
6589 // Pick the middle item and branch b or ae
6590 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
6591 const CaseCluster &Pivot = CaseClusters[PivotIndex];
6592 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
John Porto4a566862016-01-04 09:33:41 -08006593 InstX86Label *Label = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006594 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07006595 // TODO(ascull): does it alway have to be far?
John Porto4a566862016-01-04 09:33:41 -08006596 _br(Traits::Cond::Br_b, Label, InstX86Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07006597 // Lower the left and (pivot+right) sides, falling through to the right
6598 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
6599 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
6600 DoneCmp = true;
6601 break;
6602 }
6603 }
6604
Andrew Scull86df4e92015-07-30 13:54:44 -07006605 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07006606}
6607
Andrew Scull9612d322015-07-06 14:53:25 -07006608/// The following pattern occurs often in lowered C and C++ code:
6609///
6610/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
6611/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
6612///
6613/// We can eliminate the sext operation by copying the result of pcmpeqd,
Andrew Scull57e12682015-09-16 11:30:19 -07006614/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
6615/// sext operation.
John Porto4a566862016-01-04 09:33:41 -08006616template <typename TraitsType>
6617void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction(
John Porto7e93c622015-06-23 10:58:57 -07006618 Variable *SignExtendedResult) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08006619 if (auto *NextCast =
John Porto7e93c622015-06-23 10:58:57 -07006620 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
6621 if (NextCast->getCastKind() == InstCast::Sext &&
6622 NextCast->getSrc(0) == SignExtendedResult) {
6623 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07006624 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07006625 // Skip over the instruction.
6626 Context.advanceNext();
6627 }
6628 }
6629}
6630
John Porto4a566862016-01-04 09:33:41 -08006631template <typename TraitsType>
6632void TargetX86Base<TraitsType>::lowerUnreachable(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006633 const InstUnreachable * /*Instr*/) {
John Porto7e93c622015-06-23 10:58:57 -07006634 _ud2();
David Sehr21fd1032015-11-13 16:32:37 -08006635 // Add a fake use of esp to make sure esp adjustments after the unreachable
6636 // do not get dead-code eliminated.
6637 keepEspLiveAtExit();
John Porto7e93c622015-06-23 10:58:57 -07006638}
6639
John Porto4a566862016-01-04 09:33:41 -08006640template <typename TraitsType>
Eric Holk67c7c412016-04-15 13:05:37 -07006641void TargetX86Base<TraitsType>::lowerBreakpoint(
6642 const InstBreakpoint * /*Instr*/) {
6643 _int3();
6644}
6645
6646template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08006647void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) {
Andrew Scull57e12682015-09-16 11:30:19 -07006648 // If the beacon variable's live range does not end in this instruction, then
6649 // it must end in the modified Store instruction that follows. This means
6650 // that the original Store instruction is still there, either because the
6651 // value being stored is used beyond the Store instruction, or because dead
6652 // code elimination did not happen. In either case, we cancel RMW lowering
6653 // (and the caller deletes the RMW instruction).
John Porto7e93c622015-06-23 10:58:57 -07006654 if (!RMW->isLastUse(RMW->getBeacon()))
6655 return;
6656 Operand *Src = RMW->getData();
6657 Type Ty = Src->getType();
John Porto4a566862016-01-04 09:33:41 -08006658 X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07006659 doMockBoundsCheck(Addr);
John Porto1d235422015-08-12 12:37:53 -07006660 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006661 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07006662 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
6663 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006664 auto *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr));
6665 auto *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07006666 switch (RMW->getOp()) {
6667 default:
6668 // TODO(stichnot): Implement other arithmetic operators.
6669 break;
6670 case InstArithmetic::Add:
6671 _add_rmw(AddrLo, SrcLo);
6672 _adc_rmw(AddrHi, SrcHi);
6673 return;
6674 case InstArithmetic::Sub:
6675 _sub_rmw(AddrLo, SrcLo);
6676 _sbb_rmw(AddrHi, SrcHi);
6677 return;
6678 case InstArithmetic::And:
6679 _and_rmw(AddrLo, SrcLo);
6680 _and_rmw(AddrHi, SrcHi);
6681 return;
6682 case InstArithmetic::Or:
6683 _or_rmw(AddrLo, SrcLo);
6684 _or_rmw(AddrHi, SrcHi);
6685 return;
6686 case InstArithmetic::Xor:
6687 _xor_rmw(AddrLo, SrcLo);
6688 _xor_rmw(AddrHi, SrcHi);
6689 return;
6690 }
6691 } else {
John Porto1d235422015-08-12 12:37:53 -07006692 // x86-32: i8, i16, i32
6693 // x86-64: i8, i16, i32, i64
John Porto7e93c622015-06-23 10:58:57 -07006694 switch (RMW->getOp()) {
6695 default:
6696 // TODO(stichnot): Implement other arithmetic operators.
6697 break;
6698 case InstArithmetic::Add:
6699 Src = legalize(Src, Legal_Reg | Legal_Imm);
6700 _add_rmw(Addr, Src);
6701 return;
6702 case InstArithmetic::Sub:
6703 Src = legalize(Src, Legal_Reg | Legal_Imm);
6704 _sub_rmw(Addr, Src);
6705 return;
6706 case InstArithmetic::And:
6707 Src = legalize(Src, Legal_Reg | Legal_Imm);
6708 _and_rmw(Addr, Src);
6709 return;
6710 case InstArithmetic::Or:
6711 Src = legalize(Src, Legal_Reg | Legal_Imm);
6712 _or_rmw(Addr, Src);
6713 return;
6714 case InstArithmetic::Xor:
6715 Src = legalize(Src, Legal_Reg | Legal_Imm);
6716 _xor_rmw(Addr, Src);
6717 return;
6718 }
6719 }
6720 llvm::report_fatal_error("Couldn't lower RMW instruction");
6721}
6722
John Porto4a566862016-01-04 09:33:41 -08006723template <typename TraitsType>
6724void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) {
6725 if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07006726 lowerRMW(RMW);
6727 } else {
6728 TargetLowering::lowerOther(Instr);
6729 }
6730}
6731
Andrew Scull57e12682015-09-16 11:30:19 -07006732/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
6733/// integrity of liveness analysis. Undef values are also turned into zeroes,
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006734/// since loOperand() and hiOperand() don't expect Undef input. Also, in
John Portoac2388c2016-01-22 07:10:56 -08006735/// Non-SFI mode, add a FakeUse(RebasePtr) for every pooled constant operand.
John Porto4a566862016-01-04 09:33:41 -08006736template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() {
Karl Schimpfd4699942016-04-02 09:55:31 -07006737 if (getFlags().getUseNonsfi()) {
John Portoac2388c2016-01-22 07:10:56 -08006738 assert(RebasePtr);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006739 CfgNode *Node = Context.getNode();
John Portoac2388c2016-01-22 07:10:56 -08006740 uint32_t RebasePtrUseCount = 0;
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006741 for (Inst &I : Node->getPhis()) {
6742 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
6743 if (Phi->isDeleted())
6744 continue;
6745 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6746 Operand *Src = Phi->getSrc(I);
6747 // TODO(stichnot): This over-counts for +0.0, and under-counts for other
6748 // kinds of pooling.
6749 if (llvm::isa<ConstantRelocatable>(Src) ||
6750 llvm::isa<ConstantFloat>(Src) || llvm::isa<ConstantDouble>(Src)) {
John Portoac2388c2016-01-22 07:10:56 -08006751 ++RebasePtrUseCount;
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006752 }
6753 }
6754 }
John Portoac2388c2016-01-22 07:10:56 -08006755 if (RebasePtrUseCount) {
6756 Node->getInsts().push_front(InstFakeUse::create(Func, RebasePtr));
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006757 }
6758 }
John Porto1d235422015-08-12 12:37:53 -07006759 if (Traits::Is64Bit) {
6760 // On x86-64 we don't need to prelower phis -- the architecture can handle
6761 // 64-bit integer natively.
6762 return;
6763 }
6764
Andrew Scull57e12682015-09-16 11:30:19 -07006765 // Pause constant blinding or pooling, blinding or pooling will be done later
6766 // during phi lowering assignments
John Porto7e93c622015-06-23 10:58:57 -07006767 BoolFlagSaver B(RandomizationPoolingPaused, true);
John Porto4a566862016-01-04 09:33:41 -08006768 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(
Jan Voung53483692015-07-16 10:47:46 -07006769 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07006770}
6771
John Porto4a566862016-01-04 09:33:41 -08006772template <typename TraitsType>
6773void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
David Sehr26217e32015-11-26 13:03:50 -08006774 uint32_t StackArgumentsSize = 0;
6775 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006776 RuntimeHelper HelperID = RuntimeHelper::H_Num;
David Sehr26217e32015-11-26 13:03:50 -08006777 Variable *Dest = Arith->getDest();
6778 Type DestTy = Dest->getType();
6779 if (!Traits::Is64Bit && DestTy == IceType_i64) {
6780 switch (Arith->getOp()) {
6781 default:
6782 return;
6783 case InstArithmetic::Udiv:
Karl Schimpf20070e82016-03-17 13:30:13 -07006784 HelperID = RuntimeHelper::H_udiv_i64;
David Sehr26217e32015-11-26 13:03:50 -08006785 break;
6786 case InstArithmetic::Sdiv:
Karl Schimpf20070e82016-03-17 13:30:13 -07006787 HelperID = RuntimeHelper::H_sdiv_i64;
David Sehr26217e32015-11-26 13:03:50 -08006788 break;
6789 case InstArithmetic::Urem:
Karl Schimpf20070e82016-03-17 13:30:13 -07006790 HelperID = RuntimeHelper::H_urem_i64;
David Sehr26217e32015-11-26 13:03:50 -08006791 break;
6792 case InstArithmetic::Srem:
Karl Schimpf20070e82016-03-17 13:30:13 -07006793 HelperID = RuntimeHelper::H_srem_i64;
David Sehr26217e32015-11-26 13:03:50 -08006794 break;
6795 }
6796 } else if (isVectorType(DestTy)) {
6797 Variable *Dest = Arith->getDest();
6798 Operand *Src0 = Arith->getSrc(0);
6799 Operand *Src1 = Arith->getSrc(1);
6800 switch (Arith->getOp()) {
6801 default:
6802 return;
6803 case InstArithmetic::Mul:
6804 if (DestTy == IceType_v16i8) {
6805 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
6806 Arith->setDeleted();
6807 }
6808 return;
6809 case InstArithmetic::Shl:
6810 case InstArithmetic::Lshr:
6811 case InstArithmetic::Ashr:
6812 case InstArithmetic::Udiv:
6813 case InstArithmetic::Urem:
6814 case InstArithmetic::Sdiv:
6815 case InstArithmetic::Srem:
6816 case InstArithmetic::Frem:
6817 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
6818 Arith->setDeleted();
6819 return;
6820 }
6821 } else {
6822 switch (Arith->getOp()) {
6823 default:
6824 return;
6825 case InstArithmetic::Frem:
6826 if (isFloat32Asserting32Or64(DestTy))
Karl Schimpf20070e82016-03-17 13:30:13 -07006827 HelperID = RuntimeHelper::H_frem_f32;
David Sehr26217e32015-11-26 13:03:50 -08006828 else
Karl Schimpf20070e82016-03-17 13:30:13 -07006829 HelperID = RuntimeHelper::H_frem_f64;
David Sehr26217e32015-11-26 13:03:50 -08006830 }
6831 }
6832 constexpr SizeT MaxSrcs = 2;
Karl Schimpf20070e82016-03-17 13:30:13 -07006833 InstCall *Call = makeHelperCall(HelperID, Dest, MaxSrcs);
David Sehr26217e32015-11-26 13:03:50 -08006834 Call->addArg(Arith->getSrc(0));
6835 Call->addArg(Arith->getSrc(1));
6836 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6837 Context.insert(Call);
6838 Arith->setDeleted();
6839 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
6840 InstCast::OpKind CastKind = Cast->getCastKind();
6841 Operand *Src0 = Cast->getSrc(0);
6842 const Type SrcType = Src0->getType();
6843 Variable *Dest = Cast->getDest();
6844 const Type DestTy = Dest->getType();
Karl Schimpf20070e82016-03-17 13:30:13 -07006845 RuntimeHelper HelperID = RuntimeHelper::H_Num;
David Sehrb19d39c2016-01-13 14:17:37 -08006846 Variable *CallDest = Dest;
David Sehr26217e32015-11-26 13:03:50 -08006847 switch (CastKind) {
6848 default:
6849 return;
6850 case InstCast::Fptosi:
6851 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006852 HelperID = isFloat32Asserting32Or64(SrcType)
6853 ? RuntimeHelper::H_fptosi_f32_i64
6854 : RuntimeHelper::H_fptosi_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006855 } else {
6856 return;
6857 }
6858 break;
6859 case InstCast::Fptoui:
6860 if (isVectorType(DestTy)) {
6861 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);
Karl Schimpf20070e82016-03-17 13:30:13 -07006862 HelperID = RuntimeHelper::H_fptoui_4xi32_f32;
David Sehr26217e32015-11-26 13:03:50 -08006863 } else if (DestTy == IceType_i64 ||
6864 (!Traits::Is64Bit && DestTy == IceType_i32)) {
6865 if (Traits::Is64Bit) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006866 HelperID = isFloat32Asserting32Or64(SrcType)
6867 ? RuntimeHelper::H_fptoui_f32_i64
6868 : RuntimeHelper::H_fptoui_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006869 } else if (isInt32Asserting32Or64(DestTy)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006870 HelperID = isFloat32Asserting32Or64(SrcType)
6871 ? RuntimeHelper::H_fptoui_f32_i32
6872 : RuntimeHelper::H_fptoui_f64_i32;
David Sehr26217e32015-11-26 13:03:50 -08006873 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07006874 HelperID = isFloat32Asserting32Or64(SrcType)
6875 ? RuntimeHelper::H_fptoui_f32_i64
6876 : RuntimeHelper::H_fptoui_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006877 }
6878 } else {
6879 return;
6880 }
6881 break;
6882 case InstCast::Sitofp:
6883 if (!Traits::Is64Bit && SrcType == IceType_i64) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006884 HelperID = isFloat32Asserting32Or64(DestTy)
6885 ? RuntimeHelper::H_sitofp_i64_f32
6886 : RuntimeHelper::H_sitofp_i64_f64;
David Sehr26217e32015-11-26 13:03:50 -08006887 } else {
6888 return;
6889 }
6890 break;
6891 case InstCast::Uitofp:
6892 if (isVectorType(SrcType)) {
6893 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);
Karl Schimpf20070e82016-03-17 13:30:13 -07006894 HelperID = RuntimeHelper::H_uitofp_4xi32_4xf32;
David Sehr26217e32015-11-26 13:03:50 -08006895 } else if (SrcType == IceType_i64 ||
6896 (!Traits::Is64Bit && SrcType == IceType_i32)) {
6897 if (isInt32Asserting32Or64(SrcType)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006898 HelperID = isFloat32Asserting32Or64(DestTy)
6899 ? RuntimeHelper::H_uitofp_i32_f32
6900 : RuntimeHelper::H_uitofp_i32_f64;
David Sehr26217e32015-11-26 13:03:50 -08006901 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07006902 HelperID = isFloat32Asserting32Or64(DestTy)
6903 ? RuntimeHelper::H_uitofp_i64_f32
6904 : RuntimeHelper::H_uitofp_i64_f64;
David Sehr26217e32015-11-26 13:03:50 -08006905 }
6906 } else {
6907 return;
6908 }
6909 break;
6910 case InstCast::Bitcast: {
6911 if (DestTy == Src0->getType())
6912 return;
6913 switch (DestTy) {
6914 default:
6915 return;
6916 case IceType_i8:
6917 assert(Src0->getType() == IceType_v8i1);
Karl Schimpf20070e82016-03-17 13:30:13 -07006918 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
David Sehrb19d39c2016-01-13 14:17:37 -08006919 CallDest = Func->makeVariable(IceType_i32);
David Sehr26217e32015-11-26 13:03:50 -08006920 break;
6921 case IceType_i16:
6922 assert(Src0->getType() == IceType_v16i1);
Karl Schimpf20070e82016-03-17 13:30:13 -07006923 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
David Sehrb19d39c2016-01-13 14:17:37 -08006924 CallDest = Func->makeVariable(IceType_i32);
David Sehr26217e32015-11-26 13:03:50 -08006925 break;
6926 case IceType_v8i1: {
6927 assert(Src0->getType() == IceType_i8);
Karl Schimpf20070e82016-03-17 13:30:13 -07006928 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
David Sehr26217e32015-11-26 13:03:50 -08006929 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
6930 // Arguments to functions are required to be at least 32 bits wide.
John Porto1d937a82015-12-17 06:19:34 -08006931 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
David Sehr26217e32015-11-26 13:03:50 -08006932 Src0 = Src0AsI32;
6933 } break;
6934 case IceType_v16i1: {
6935 assert(Src0->getType() == IceType_i16);
Karl Schimpf20070e82016-03-17 13:30:13 -07006936 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
David Sehr26217e32015-11-26 13:03:50 -08006937 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
6938 // Arguments to functions are required to be at least 32 bits wide.
John Porto1d937a82015-12-17 06:19:34 -08006939 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
David Sehr26217e32015-11-26 13:03:50 -08006940 Src0 = Src0AsI32;
6941 } break;
6942 }
6943 } break;
6944 }
6945 constexpr SizeT MaxSrcs = 1;
Karl Schimpf20070e82016-03-17 13:30:13 -07006946 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
David Sehr26217e32015-11-26 13:03:50 -08006947 Call->addArg(Src0);
6948 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6949 Context.insert(Call);
David Sehrb19d39c2016-01-13 14:17:37 -08006950 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper call
6951 // result to the appropriate type as necessary.
6952 if (CallDest->getType() != Dest->getType())
6953 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
David Sehr26217e32015-11-26 13:03:50 -08006954 Cast->setDeleted();
6955 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
John Portoe82b5602016-02-24 15:58:55 -08006956 CfgVector<Type> ArgTypes;
David Sehr26217e32015-11-26 13:03:50 -08006957 Type ReturnType = IceType_void;
6958 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {
6959 default:
6960 return;
6961 case Intrinsics::Ctpop: {
6962 Operand *Val = Intrinsic->getArg(0);
6963 Type ValTy = Val->getType();
6964 if (ValTy == IceType_i64)
6965 ArgTypes = {IceType_i64};
6966 else
6967 ArgTypes = {IceType_i32};
6968 ReturnType = IceType_i32;
6969 } break;
6970 case Intrinsics::Longjmp:
6971 ArgTypes = {IceType_i32, IceType_i32};
6972 ReturnType = IceType_void;
6973 break;
6974 case Intrinsics::Memcpy:
6975 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
6976 ReturnType = IceType_void;
6977 break;
6978 case Intrinsics::Memmove:
6979 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
6980 ReturnType = IceType_void;
6981 break;
6982 case Intrinsics::Memset:
6983 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
6984 ReturnType = IceType_void;
6985 break;
6986 case Intrinsics::NaClReadTP:
6987 ReturnType = IceType_i32;
6988 break;
6989 case Intrinsics::Setjmp:
6990 ArgTypes = {IceType_i32};
6991 ReturnType = IceType_i32;
6992 break;
6993 }
6994 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
6995 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) {
6996 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6997 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) {
6998 if (!Ret->hasRetValue())
6999 return;
7000 Operand *RetValue = Ret->getRetValue();
7001 Type ReturnType = RetValue->getType();
7002 if (!isScalarFloatingType(ReturnType))
7003 return;
7004 StackArgumentsSize = typeWidthInBytes(ReturnType);
7005 } else {
7006 return;
7007 }
7008 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);
7009 updateMaxOutArgsSizeBytes(StackArgumentsSize);
7010}
7011
John Porto4a566862016-01-04 09:33:41 -08007012template <typename TraitsType>
7013uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
John Portoe82b5602016-02-24 15:58:55 -08007014 const CfgVector<Type> &ArgTypes, Type ReturnType) {
David Sehr4163b9f2015-11-20 21:09:31 -08007015 uint32_t OutArgumentsSizeBytes = 0;
7016 uint32_t XmmArgCount = 0;
7017 uint32_t GprArgCount = 0;
David Sehr26217e32015-11-26 13:03:50 -08007018 for (Type Ty : ArgTypes) {
David Sehr4163b9f2015-11-20 21:09:31 -08007019 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
7020 assert(typeWidthInBytes(Ty) >= 4);
7021 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
7022 ++XmmArgCount;
Jim Stichnothc5777272016-06-20 06:46:07 -07007023 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
7024 XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
7025 ++XmmArgCount;
David Sehr4163b9f2015-11-20 21:09:31 -08007026 } else if (isScalarIntegerType(Ty) &&
7027 GprArgCount < Traits::X86_MAX_GPR_ARGS) {
7028 // The 64 bit ABI allows some integers to be passed in GPRs.
7029 ++GprArgCount;
7030 } else {
David Sehr26217e32015-11-26 13:03:50 -08007031 if (isVectorType(Ty)) {
David Sehr4163b9f2015-11-20 21:09:31 -08007032 OutArgumentsSizeBytes =
7033 Traits::applyStackAlignment(OutArgumentsSizeBytes);
7034 }
David Sehr26217e32015-11-26 13:03:50 -08007035 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty);
David Sehr4163b9f2015-11-20 21:09:31 -08007036 }
7037 }
7038 if (Traits::Is64Bit)
7039 return OutArgumentsSizeBytes;
7040 // The 32 bit ABI requires floating point values to be returned on the x87 FP
7041 // stack. Ensure there is enough space for the fstp/movs for floating returns.
David Sehr26217e32015-11-26 13:03:50 -08007042 if (isScalarFloatingType(ReturnType)) {
David Sehr4163b9f2015-11-20 21:09:31 -08007043 OutArgumentsSizeBytes =
7044 std::max(OutArgumentsSizeBytes,
David Sehr26217e32015-11-26 13:03:50 -08007045 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));
David Sehr4163b9f2015-11-20 21:09:31 -08007046 }
7047 return OutArgumentsSizeBytes;
7048}
7049
John Porto4a566862016-01-04 09:33:41 -08007050template <typename TraitsType>
7051uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
7052 const InstCall *Instr) {
David Sehr26217e32015-11-26 13:03:50 -08007053 // Build a vector of the arguments' types.
John Portoe82b5602016-02-24 15:58:55 -08007054 const SizeT NumArgs = Instr->getNumArgs();
7055 CfgVector<Type> ArgTypes;
7056 ArgTypes.reserve(NumArgs);
7057 for (SizeT i = 0; i < NumArgs; ++i) {
David Sehr26217e32015-11-26 13:03:50 -08007058 Operand *Arg = Instr->getArg(i);
7059 ArgTypes.emplace_back(Arg->getType());
7060 }
7061 // Compute the return type (if any);
7062 Type ReturnType = IceType_void;
7063 Variable *Dest = Instr->getDest();
7064 if (Dest != nullptr)
7065 ReturnType = Dest->getType();
7066 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
7067}
7068
John Porto4a566862016-01-04 09:33:41 -08007069template <typename TraitsType>
7070Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007071 RegNumT RegNum) {
Jim Stichnoth99165662015-11-13 14:20:40 -08007072 Variable *Reg = makeReg(Ty, RegNum);
7073 switch (Ty) {
7074 case IceType_i1:
7075 case IceType_i8:
7076 case IceType_i16:
7077 case IceType_i32:
7078 case IceType_i64:
7079 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
7080 _mov(Reg, Ctx->getConstantZero(Ty));
7081 break;
7082 case IceType_f32:
7083 case IceType_f64:
John Porto1d937a82015-12-17 06:19:34 -08007084 Context.insert<InstFakeDef>(Reg);
David Sehre3984282015-12-15 17:34:55 -08007085 _xorps(Reg, Reg);
Jim Stichnoth99165662015-11-13 14:20:40 -08007086 break;
7087 default:
7088 // All vector types use the same pxor instruction.
7089 assert(isVectorType(Ty));
John Porto1d937a82015-12-17 06:19:34 -08007090 Context.insert<InstFakeDef>(Reg);
Jim Stichnoth99165662015-11-13 14:20:40 -08007091 _pxor(Reg, Reg);
7092 break;
7093 }
7094 return Reg;
7095}
7096
Andrew Scull57e12682015-09-16 11:30:19 -07007097// There is no support for loading or emitting vector constants, so the vector
7098// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
7099// initialized with register operations.
John Porto7e93c622015-06-23 10:58:57 -07007100//
Andrew Scull57e12682015-09-16 11:30:19 -07007101// TODO(wala): Add limited support for vector constants so that complex
7102// initialization in registers is unnecessary.
John Porto7e93c622015-06-23 10:58:57 -07007103
John Porto4a566862016-01-04 09:33:41 -08007104template <typename TraitsType>
7105Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007106 RegNumT RegNum) {
Jim Stichnoth99165662015-11-13 14:20:40 -08007107 return makeZeroedRegister(Ty, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07007108}
7109
John Porto4a566862016-01-04 09:33:41 -08007110template <typename TraitsType>
7111Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007112 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007113 Variable *MinusOnes = makeReg(Ty, RegNum);
7114 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
John Porto1d937a82015-12-17 06:19:34 -08007115 Context.insert<InstFakeDef>(MinusOnes);
David Sehrb19d39c2016-01-13 14:17:37 -08007116 if (Ty == IceType_f64)
7117 // Making a vector of minus ones of type f64 is currently only used for the
7118 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq
7119 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the
7120 // same job and only requires SSE2.
7121 _pcmpeq(MinusOnes, MinusOnes, IceType_f32);
7122 else
7123 _pcmpeq(MinusOnes, MinusOnes);
John Porto7e93c622015-06-23 10:58:57 -07007124 return MinusOnes;
7125}
7126
John Porto4a566862016-01-04 09:33:41 -08007127template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007128Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007129 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
7130 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
7131 _psub(Dest, MinusOne);
7132 return Dest;
7133}
7134
John Porto4a566862016-01-04 09:33:41 -08007135template <typename TraitsType>
7136Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007137 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007138 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
7139 Ty == IceType_v16i8);
7140 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
7141 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
7142 SizeT Shift =
7143 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
7144 _psll(Reg, Ctx->getConstantInt8(Shift));
7145 return Reg;
7146 } else {
7147 // SSE has no left shift operation for vectors of 8 bit integers.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07007148 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
John Porto7e93c622015-06-23 10:58:57 -07007149 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
7150 Variable *Reg = makeReg(Ty, RegNum);
7151 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
7152 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
7153 return Reg;
7154 }
7155}
7156
Andrew Scull57e12682015-09-16 11:30:19 -07007157/// Construct a mask in a register that can be and'ed with a floating-point
7158/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
7159/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
7160/// ones logically right shifted one bit.
7161// TODO(stichnot): Fix the wala
7162// TODO: above, to represent vector constants in memory.
John Porto4a566862016-01-04 09:33:41 -08007163template <typename TraitsType>
7164Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007165 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007166 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
7167 _psrl(Reg, Ctx->getConstantInt8(1));
7168 return Reg;
7169}
7170
John Porto4a566862016-01-04 09:33:41 -08007171template <typename TraitsType>
7172typename TargetX86Base<TraitsType>::X86OperandMem *
7173TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
7174 uint32_t Offset) {
John Porto7e93c622015-06-23 10:58:57 -07007175 // Ensure that Loc is a stack slot.
Andrew Scull11c9a322015-08-28 14:24:14 -07007176 assert(Slot->mustNotHaveReg());
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007177 assert(Slot->getRegNum().hasNoValue());
John Porto7e93c622015-06-23 10:58:57 -07007178 // Compute the location of Loc in memory.
Andrew Scull57e12682015-09-16 11:30:19 -07007179 // TODO(wala,stichnot): lea should not
7180 // be required. The address of the stack slot is known at compile time
7181 // (although not until after addProlog()).
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07007182 constexpr Type PointerType = IceType_i32;
John Porto7e93c622015-06-23 10:58:57 -07007183 Variable *Loc = makeReg(PointerType);
7184 _lea(Loc, Slot);
7185 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto4a566862016-01-04 09:33:41 -08007186 return X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07007187}
7188
Jim Stichnothc59288b2015-11-09 11:38:40 -08007189/// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
7190/// Src is assumed to already be legalized. If the source operand is known to
7191/// be a memory or immediate operand, a simple mov will suffice. But if the
7192/// source operand can be a physical register, then it must first be copied into
7193/// a physical register that is truncable to 8-bit, then truncated into a
7194/// physical register that can receive a truncation, and finally copied into the
7195/// result 8-bit register (which in general can be any 8-bit register). For
7196/// example, moving %ebp into %ah may be accomplished as:
7197/// movl %ebp, %edx
7198/// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
7199/// movb %dl, %ah
7200/// On the other hand, moving a memory or immediate operand into ah:
7201/// movb 4(%ebp), %ah
7202/// movb $my_imm, %ah
7203///
7204/// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
7205/// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007206/// use RegNum=RegNumT() and then let the caller do a separate copy into
Jim Stichnothc59288b2015-11-09 11:38:40 -08007207/// Reg_ah.
7208///
7209/// Note #2. ConstantRelocatable operands are also put through this process
7210/// (not truncated directly) because our ELF emitter does R_386_32 relocations
7211/// but not R_386_8 relocations.
7212///
7213/// Note #3. If Src is a Variable, the result will be an infinite-weight i8
7214/// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
7215/// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
7216/// to the pinsrb instruction.
John Porto4a566862016-01-04 09:33:41 -08007217template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007218Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, RegNumT RegNum) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08007219 Type Ty = Src->getType();
7220 assert(isScalarIntegerType(Ty));
7221 assert(Ty != IceType_i1);
7222 Variable *Reg = makeReg(IceType_i8, RegNum);
7223 Reg->setRegClass(RCX86_IsTrunc8Rcvr);
7224 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
7225 Variable *SrcTruncable = makeReg(Ty);
7226 switch (Ty) {
7227 case IceType_i64:
7228 SrcTruncable->setRegClass(RCX86_Is64To8);
7229 break;
7230 case IceType_i32:
7231 SrcTruncable->setRegClass(RCX86_Is32To8);
7232 break;
7233 case IceType_i16:
7234 SrcTruncable->setRegClass(RCX86_Is16To8);
7235 break;
7236 default:
7237 // i8 - just use default register class
7238 break;
7239 }
7240 Variable *SrcRcvr = makeReg(IceType_i8);
7241 SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
7242 _mov(SrcTruncable, Src);
7243 _mov(SrcRcvr, SrcTruncable);
7244 Src = SrcRcvr;
7245 }
7246 _mov(Reg, Src);
7247 return Reg;
7248}
7249
Andrew Scull9612d322015-07-06 14:53:25 -07007250/// Helper for legalize() to emit the right code to lower an operand to a
7251/// register of the appropriate type.
John Porto4a566862016-01-04 09:33:41 -08007252template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007253Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007254 Type Ty = Src->getType();
7255 Variable *Reg = makeReg(Ty, RegNum);
7256 if (isVectorType(Ty)) {
7257 _movp(Reg, Src);
7258 } else {
7259 _mov(Reg, Src);
7260 }
7261 return Reg;
7262}
7263
John Porto4a566862016-01-04 09:33:41 -08007264template <typename TraitsType>
7265Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007266 RegNumT RegNum) {
Karl Schimpfd4699942016-04-02 09:55:31 -07007267 const bool UseNonsfi = getFlags().getUseNonsfi();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007268 const Type Ty = From->getType();
Andrew Scull57e12682015-09-16 11:30:19 -07007269 // Assert that a physical register is allowed. To date, all calls to
7270 // legalize() allow a physical register. If a physical register needs to be
7271 // explicitly disallowed, then new code will need to be written to force a
7272 // spill.
John Porto7e93c622015-06-23 10:58:57 -07007273 assert(Allowed & Legal_Reg);
Andrew Scull57e12682015-09-16 11:30:19 -07007274 // If we're asking for a specific physical register, make sure we're not
7275 // allowing any other operand kinds. (This could be future work, e.g. allow
7276 // the shl shift amount to be either an immediate or in ecx.)
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007277 assert(RegNum.hasNoValue() || Allowed == Legal_Reg);
John Porto7e93c622015-06-23 10:58:57 -07007278
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007279 // Substitute with an available infinite-weight variable if possible. Only do
7280 // this when we are not asking for a specific register, and when the
7281 // substitution is not locked to a specific register, and when the types
7282 // match, in order to capture the vast majority of opportunities and avoid
7283 // corner cases in the lowering.
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007284 if (RegNum.hasNoValue()) {
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007285 if (Variable *Subst = getContext().availabilityGet(From)) {
7286 // At this point we know there is a potential substitution available.
7287 if (Subst->mustHaveReg() && !Subst->hasReg()) {
7288 // At this point we know the substitution will have a register.
7289 if (From->getType() == Subst->getType()) {
7290 // At this point we know the substitution's register is compatible.
7291 return Subst;
7292 }
7293 }
7294 }
7295 }
7296
John Porto4a566862016-01-04 09:33:41 -08007297 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07007298 // Before doing anything with a Mem operand, we need to ensure that the
7299 // Base and Index components are in physical registers.
John Porto7e93c622015-06-23 10:58:57 -07007300 Variable *Base = Mem->getBase();
7301 Variable *Index = Mem->getIndex();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007302 Constant *Offset = Mem->getOffset();
John Porto7e93c622015-06-23 10:58:57 -07007303 Variable *RegBase = nullptr;
7304 Variable *RegIndex = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08007305 uint16_t Shift = Mem->getShift();
John Porto7e93c622015-06-23 10:58:57 -07007306 if (Base) {
David Sehr4318a412015-11-11 15:01:55 -08007307 RegBase = llvm::cast<Variable>(
7308 legalize(Base, Legal_Reg | Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07007309 }
7310 if (Index) {
John Porto56958cb2016-01-14 09:18:18 -08007311 // TODO(jpp): perhaps we should only allow Legal_Reg if
7312 // Base->isRematerializable.
David Sehr4318a412015-11-11 15:01:55 -08007313 RegIndex = llvm::cast<Variable>(
7314 legalize(Index, Legal_Reg | Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07007315 }
John Portoac2388c2016-01-22 07:10:56 -08007316
John Porto7e93c622015-06-23 10:58:57 -07007317 if (Base != RegBase || Index != RegIndex) {
John Porto56958cb2016-01-14 09:18:18 -08007318 Mem = X86OperandMem::create(Func, Ty, RegBase, Offset, RegIndex, Shift,
John Portoac2388c2016-01-22 07:10:56 -08007319 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07007320 }
7321
John Portoac2388c2016-01-22 07:10:56 -08007322 // For all Memory Operands, we do randomization/pooling here.
John Porto7e93c622015-06-23 10:58:57 -07007323 From = randomizeOrPoolImmediate(Mem);
7324
7325 if (!(Allowed & Legal_Mem)) {
7326 From = copyToReg(From, RegNum);
7327 }
7328 return From;
7329 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007330
John Porto7e93c622015-06-23 10:58:57 -07007331 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
7332 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07007333 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07007334 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07007335 return From;
7336 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07007337 }
7338 // There should be no constants of vector type (other than undef).
7339 assert(!isVectorType(Ty));
7340
John Porto1d235422015-08-12 12:37:53 -07007341 // If the operand is a 64 bit constant integer we need to legalize it to a
7342 // register in x86-64.
7343 if (Traits::Is64Bit) {
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07007344 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Const)) {
7345 if (!Utils::IsInt(32, C64->getValue())) {
7346 if (RegNum.hasValue()) {
7347 assert(Traits::getGprForType(IceType_i64, RegNum) == RegNum);
7348 }
7349 return copyToReg(Const, RegNum);
John Porto008f4ce2015-12-24 13:22:18 -08007350 }
John Porto1d235422015-08-12 12:37:53 -07007351 }
7352 }
7353
Andrew Scull57e12682015-09-16 11:30:19 -07007354 // If the operand is an 32 bit constant integer, we should check whether we
7355 // need to randomize it or pool it.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08007356 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
John Porto7e93c622015-06-23 10:58:57 -07007357 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
7358 if (NewConst != Const) {
7359 return NewConst;
7360 }
7361 }
7362
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007363 if (auto *CR = llvm::dyn_cast<ConstantRelocatable>(Const)) {
John Portoac2388c2016-01-22 07:10:56 -08007364 // If the operand is a ConstantRelocatable, and Legal_AddrAbs is not
7365 // specified, and UseNonsfi is indicated, we need to add RebasePtr.
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007366 if (UseNonsfi && !(Allowed & Legal_AddrAbs)) {
7367 assert(Ty == IceType_i32);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007368 Variable *NewVar = makeReg(Ty, RegNum);
John Portoac2388c2016-01-22 07:10:56 -08007369 auto *Mem = Traits::X86OperandMem::create(Func, Ty, nullptr, CR);
7370 // LEAs are not automatically sandboxed, thus we explicitly invoke
7371 // _sandbox_mem_reference.
7372 _lea(NewVar, _sandbox_mem_reference(Mem));
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007373 From = NewVar;
7374 }
John Portoac2388c2016-01-22 07:10:56 -08007375 } else if (isScalarFloatingType(Ty)) {
7376 // Convert a scalar floating point constant into an explicit memory
7377 // operand.
Jim Stichnoth99165662015-11-13 14:20:40 -08007378 if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
John Portoccea7932015-11-17 04:58:36 -08007379 if (Utils::isPositiveZero(ConstFloat->getValue()))
Jim Stichnoth99165662015-11-13 14:20:40 -08007380 return makeZeroedRegister(Ty, RegNum);
7381 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
John Portoccea7932015-11-17 04:58:36 -08007382 if (Utils::isPositiveZero(ConstDouble->getValue()))
Jim Stichnoth99165662015-11-13 14:20:40 -08007383 return makeZeroedRegister(Ty, RegNum);
7384 }
John Portoac2388c2016-01-22 07:10:56 -08007385
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007386 auto *CFrom = llvm::cast<Constant>(From);
7387 assert(CFrom->getShouldBePooled());
7388 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007389 auto *Mem = X86OperandMem::create(Func, Ty, nullptr, Offset);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007390 From = Mem;
John Porto7e93c622015-06-23 10:58:57 -07007391 }
John Portoac2388c2016-01-22 07:10:56 -08007392
John Porto7e93c622015-06-23 10:58:57 -07007393 bool NeedsReg = false;
7394 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
John Portoac2388c2016-01-22 07:10:56 -08007395 // Immediate specifically not allowed.
John Porto7e93c622015-06-23 10:58:57 -07007396 NeedsReg = true;
7397 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
7398 // On x86, FP constants are lowered to mem operands.
7399 NeedsReg = true;
7400 if (NeedsReg) {
7401 From = copyToReg(From, RegNum);
7402 }
7403 return From;
7404 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007405
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07007406 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07007407 // Check if the variable is guaranteed a physical register. This can happen
7408 // either when the variable is pre-colored or when it is assigned infinite
7409 // weight.
Andrew Scull11c9a322015-08-28 14:24:14 -07007410 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
David Sehr4318a412015-11-11 15:01:55 -08007411 bool MustRematerialize =
7412 (Var->isRematerializable() && !(Allowed & Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07007413 // We need a new physical register for the operand if:
David Sehr4318a412015-11-11 15:01:55 -08007414 // - Mem is not allowed and Var isn't guaranteed a physical register, or
7415 // - RegNum is required and Var->getRegNum() doesn't match, or
7416 // - Var is a rematerializable variable and rematerializable pass-through is
7417 // not allowed (in which case we need an lea instruction).
7418 if (MustRematerialize) {
7419 assert(Ty == IceType_i32);
7420 Variable *NewVar = makeReg(Ty, RegNum);
7421 // Since Var is rematerializable, the offset will be added when the lea is
7422 // emitted.
7423 constexpr Constant *NoOffset = nullptr;
John Porto4a566862016-01-04 09:33:41 -08007424 auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset);
David Sehr4318a412015-11-11 15:01:55 -08007425 _lea(NewVar, Mem);
7426 From = NewVar;
7427 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007428 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
John Porto7e93c622015-06-23 10:58:57 -07007429 From = copyToReg(From, RegNum);
7430 }
7431 return From;
7432 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007433
7434 llvm::report_fatal_error("Unhandled operand kind in legalize()");
John Porto7e93c622015-06-23 10:58:57 -07007435 return From;
7436}
7437
Andrew Scull9612d322015-07-06 14:53:25 -07007438/// Provide a trivial wrapper to legalize() for this common usage.
John Porto4a566862016-01-04 09:33:41 -08007439template <typename TraitsType>
7440Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007441 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007442 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
7443}
7444
Jan Voungfbdd2442015-07-15 12:36:20 -07007445/// Legalize undef values to concrete values.
John Porto4a566862016-01-04 09:33:41 -08007446template <typename TraitsType>
7447Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007448 RegNumT RegNum) {
Jan Voungfbdd2442015-07-15 12:36:20 -07007449 Type Ty = From->getType();
7450 if (llvm::isa<ConstantUndef>(From)) {
7451 // Lower undefs to zero. Another option is to lower undefs to an
Andrew Scull57e12682015-09-16 11:30:19 -07007452 // uninitialized register; however, using an uninitialized register results
7453 // in less predictable code.
Jan Voungfbdd2442015-07-15 12:36:20 -07007454 //
Andrew Scull57e12682015-09-16 11:30:19 -07007455 // If in the future the implementation is changed to lower undef values to
7456 // uninitialized registers, a FakeDef will be needed:
John Porto1d937a82015-12-17 06:19:34 -08007457 // Context.insert<InstFakeDef>(Reg);
Jan Voungfbdd2442015-07-15 12:36:20 -07007458 // This is in order to ensure that the live range of Reg is not
Andrew Scull57e12682015-09-16 11:30:19 -07007459 // overestimated. If the constant being lowered is a 64 bit value, then
7460 // the result should be split and the lo and hi components will need to go
7461 // in uninitialized registers.
Jan Voungfbdd2442015-07-15 12:36:20 -07007462 if (isVectorType(Ty))
7463 return makeVectorOfZeros(Ty, RegNum);
7464 return Ctx->getConstantZero(Ty);
7465 }
7466 return From;
7467}
7468
Andrew Scull57e12682015-09-16 11:30:19 -07007469/// For the cmp instruction, if Src1 is an immediate, or known to be a physical
7470/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
7471/// copied into a physical register. (Actually, either Src0 or Src1 can be
7472/// chosen for the physical register, but unfortunately we have to commit to one
7473/// or the other before register allocation.)
John Porto4a566862016-01-04 09:33:41 -08007474template <typename TraitsType>
7475Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0,
7476 Operand *Src1) {
John Porto7e93c622015-06-23 10:58:57 -07007477 bool IsSrc1ImmOrReg = false;
7478 if (llvm::isa<Constant>(Src1)) {
7479 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07007480 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07007481 if (Var->hasReg())
7482 IsSrc1ImmOrReg = true;
7483 }
7484 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
7485}
7486
John Porto4a566862016-01-04 09:33:41 -08007487template <typename TraitsType>
7488typename TargetX86Base<TraitsType>::X86OperandMem *
7489TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty,
7490 bool DoLegalize) {
7491 auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd);
John Porto921856d2015-07-07 11:56:26 -07007492 // It may be the case that address mode optimization already creates an
John Porto4a566862016-01-04 09:33:41 -08007493 // X86OperandMem, so in that case it wouldn't need another level of
John Porto921856d2015-07-07 11:56:26 -07007494 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07007495 if (!Mem) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08007496 auto *Base = llvm::dyn_cast<Variable>(Opnd);
7497 auto *Offset = llvm::dyn_cast<Constant>(Opnd);
John Porto7e93c622015-06-23 10:58:57 -07007498 assert(Base || Offset);
7499 if (Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07007500 // During memory operand building, we do not blind or pool the constant
7501 // offset, we will work on the whole memory operand later as one entity
7502 // later, this save one instruction. By turning blinding and pooling off,
7503 // we guarantee legalize(Offset) will return a Constant*.
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007504 if (!llvm::isa<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07007505 BoolFlagSaver B(RandomizationPoolingPaused, true);
7506
7507 Offset = llvm::cast<Constant>(legalize(Offset));
7508 }
7509
7510 assert(llvm::isa<ConstantInteger32>(Offset) ||
7511 llvm::isa<ConstantRelocatable>(Offset));
7512 }
John Porto56958cb2016-01-14 09:18:18 -08007513 // Not completely sure whether it's OK to leave IsRebased unset when
7514 // creating the mem operand. If DoLegalize is true, it will definitely be
7515 // applied during the legalize() call, but perhaps not during the
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007516 // randomizeOrPoolImmediate() call. In any case, the emit routines will
7517 // assert that PIC legalization has been applied.
John Porto4a566862016-01-04 09:33:41 -08007518 Mem = X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07007519 }
Andrew Scull57e12682015-09-16 11:30:19 -07007520 // Do legalization, which contains randomization/pooling or do
7521 // randomization/pooling.
John Porto4a566862016-01-04 09:33:41 -08007522 return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem)
7523 : randomizeOrPoolImmediate(Mem));
John Porto7e93c622015-06-23 10:58:57 -07007524}
7525
John Porto4a566862016-01-04 09:33:41 -08007526template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007527Variable *TargetX86Base<TraitsType>::makeReg(Type Type, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007528 // There aren't any 64-bit integer registers for x86-32.
John Porto1d235422015-08-12 12:37:53 -07007529 assert(Traits::Is64Bit || Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07007530 Variable *Reg = Func->makeVariable(Type);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007531 if (RegNum.hasValue())
John Porto7e93c622015-06-23 10:58:57 -07007532 Reg->setRegNum(RegNum);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007533 else
7534 Reg->setMustHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07007535 return Reg;
7536}
7537
John Porto4a566862016-01-04 09:33:41 -08007538template <typename TraitsType>
7539const Type TargetX86Base<TraitsType>::TypeForSize[] = {
John Porto3c275ce2015-12-22 08:14:00 -08007540 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
John Porto4a566862016-01-04 09:33:41 -08007541template <typename TraitsType>
7542Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size,
7543 uint32_t MaxSize) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07007544 assert(Size != 0);
7545 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7546 uint32_t MaxIndex = MaxSize == NoSizeLimit
7547 ? llvm::array_lengthof(TypeForSize) - 1
7548 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7549 return TypeForSize[std::min(TyIndex, MaxIndex)];
7550}
7551
John Porto4a566862016-01-04 09:33:41 -08007552template <typename TraitsType>
7553Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size,
7554 uint32_t MaxSize) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07007555 assert(Size != 0);
7556 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7557 if (!llvm::isPowerOf2_32(Size))
7558 ++TyIndex;
7559 uint32_t MaxIndex = MaxSize == NoSizeLimit
7560 ? llvm::array_lengthof(TypeForSize) - 1
7561 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7562 return TypeForSize[std::min(TyIndex, MaxIndex)];
7563}
7564
John Porto4a566862016-01-04 09:33:41 -08007565template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07007566 if (Func->getOptLevel() == Opt_m1)
John Porto7e93c622015-06-23 10:58:57 -07007567 return;
Jim Stichnoth230d4102015-09-25 17:40:32 -07007568 markRedefinitions();
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007569 Context.availabilityUpdate();
John Porto7e93c622015-06-23 10:58:57 -07007570}
7571
John Porto4a566862016-01-04 09:33:41 -08007572template <typename TraitsType>
7573void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007574 llvm::SmallVectorImpl<RegNumT> &Permutation,
John Portoe82b5602016-02-24 15:58:55 -08007575 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
Karl Schimpfd4699942016-04-02 09:55:31 -07007576 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters,
7577 Salt);
John Porto7e93c622015-06-23 10:58:57 -07007578}
7579
John Porto4a566862016-01-04 09:33:41 -08007580template <typename TraitsType>
7581void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007582 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007583 return;
7584 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007585 Str << "$" << C->getValue();
John Porto7e93c622015-06-23 10:58:57 -07007586}
7587
John Porto4a566862016-01-04 09:33:41 -08007588template <typename TraitsType>
7589void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const {
John Porto1d235422015-08-12 12:37:53 -07007590 if (!Traits::Is64Bit) {
7591 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
7592 } else {
7593 if (!BuildDefs::dump())
7594 return;
7595 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007596 Str << "$" << C->getValue();
John Porto1d235422015-08-12 12:37:53 -07007597 }
John Porto7e93c622015-06-23 10:58:57 -07007598}
7599
John Porto4a566862016-01-04 09:33:41 -08007600template <typename TraitsType>
7601void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007602 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007603 return;
7604 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007605 Str << C->getLabelName();
John Porto7e93c622015-06-23 10:58:57 -07007606}
7607
John Porto4a566862016-01-04 09:33:41 -08007608template <typename TraitsType>
7609void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007610 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007611 return;
7612 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007613 Str << C->getLabelName();
John Porto7e93c622015-06-23 10:58:57 -07007614}
7615
John Porto4a566862016-01-04 09:33:41 -08007616template <typename TraitsType>
7617void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const {
John Porto7e93c622015-06-23 10:58:57 -07007618 llvm::report_fatal_error("undef value encountered by emitter.");
7619}
7620
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007621template <class Machine>
7622void TargetX86Base<Machine>::emit(const ConstantRelocatable *C) const {
7623 if (!BuildDefs::dump())
7624 return;
Karl Schimpfd4699942016-04-02 09:55:31 -07007625 assert(!getFlags().getUseNonsfi() ||
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007626 C->getName().toString() == GlobalOffsetTable);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007627 Ostream &Str = Ctx->getStrEmit();
7628 Str << "$";
7629 emitWithoutPrefix(C);
7630}
7631
Andrew Scull9612d322015-07-06 14:53:25 -07007632/// Randomize or pool an Immediate.
John Porto4a566862016-01-04 09:33:41 -08007633template <typename TraitsType>
7634Operand *
7635TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007636 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007637 assert(llvm::isa<ConstantInteger32>(Immediate) ||
7638 llvm::isa<ConstantRelocatable>(Immediate));
Karl Schimpfd4699942016-04-02 09:55:31 -07007639 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
John Porto7e93c622015-06-23 10:58:57 -07007640 RandomizationPoolingPaused == true) {
7641 // Immediates randomization/pooling off or paused
7642 return Immediate;
7643 }
John Porto56958cb2016-01-14 09:18:18 -08007644
7645 if (Traits::Is64Bit && NeedSandboxing) {
7646 // Immediate randomization/pooling is currently disabled for x86-64
7647 // sandboxing for it could generate invalid memory operands.
7648 assert(false &&
7649 "Constant pooling/randomization is disabled for x8664 sandbox.");
7650 return Immediate;
John Porto7e93c622015-06-23 10:58:57 -07007651 }
John Porto56958cb2016-01-14 09:18:18 -08007652
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007653 if (!Immediate->shouldBeRandomizedOrPooled()) {
John Porto56958cb2016-01-14 09:18:18 -08007654 // the constant Immediate is not eligible for blinding/pooling
7655 return Immediate;
7656 }
7657 Ctx->statsUpdateRPImms();
Karl Schimpfd4699942016-04-02 09:55:31 -07007658 switch (getFlags().getRandomizeAndPoolImmediatesOption()) {
John Porto56958cb2016-01-14 09:18:18 -08007659 default:
7660 llvm::report_fatal_error("Unsupported -randomize-pool-immediates option");
7661 case RPI_Randomize: {
7662 // blind the constant
7663 // FROM:
7664 // imm
7665 // TO:
7666 // insert: mov imm+cookie, Reg
7667 // insert: lea -cookie[Reg], Reg
7668 // => Reg
7669 // If we have already assigned a phy register, we must come from
7670 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
7671 // assigned register as this assignment is that start of its use-def
7672 // chain. So we add RegNum argument here. Note we use 'lea' instruction
7673 // instead of 'xor' to avoid affecting the flags.
7674 Variable *Reg = makeReg(IceType_i32, RegNum);
7675 auto *Integer = llvm::cast<ConstantInteger32>(Immediate);
7676 uint32_t Value = Integer->getValue();
7677 uint32_t Cookie = Func->getConstantBlindingCookie();
7678 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
7679 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
7680 _lea(Reg, X86OperandMem::create(Func, IceType_i32, Reg, Offset));
7681 if (Immediate->getType() == IceType_i32) {
7682 return Reg;
7683 }
7684 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
7685 _mov(TruncReg, Reg);
7686 return TruncReg;
7687 }
7688 case RPI_Pool: {
7689 // pool the constant
7690 // FROM:
7691 // imm
7692 // TO:
7693 // insert: mov $label, Reg
7694 // => Reg
Karl Schimpfd4699942016-04-02 09:55:31 -07007695 assert(getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007696 assert(Immediate->getShouldBePooled());
John Porto56958cb2016-01-14 09:18:18 -08007697 // if we have already assigned a phy register, we must come from
7698 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
7699 // assigned register as this assignment is that start of its use-def
7700 // chain. So we add RegNum argument here.
7701 Variable *Reg = makeReg(Immediate->getType(), RegNum);
John Porto56958cb2016-01-14 09:18:18 -08007702 constexpr RelocOffsetT Offset = 0;
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007703 Constant *Symbol = Ctx->getConstantSym(Offset, Immediate->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007704 constexpr Variable *NoBase = nullptr;
7705 X86OperandMem *MemOperand =
7706 X86OperandMem::create(Func, Immediate->getType(), NoBase, Symbol);
John Porto56958cb2016-01-14 09:18:18 -08007707 _mov(Reg, MemOperand);
7708 return Reg;
7709 }
7710 }
John Porto7e93c622015-06-23 10:58:57 -07007711}
7712
John Porto4a566862016-01-04 09:33:41 -08007713template <typename TraitsType>
7714typename TargetX86Base<TraitsType>::X86OperandMem *
7715TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007716 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007717 assert(MemOperand);
Karl Schimpfd4699942016-04-02 09:55:31 -07007718 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
John Porto7e93c622015-06-23 10:58:57 -07007719 RandomizationPoolingPaused == true) {
7720 // immediates randomization/pooling is turned off
7721 return MemOperand;
7722 }
7723
John Porto56958cb2016-01-14 09:18:18 -08007724 if (Traits::Is64Bit && NeedSandboxing) {
7725 // Immediate randomization/pooling is currently disabled for x86-64
7726 // sandboxing for it could generate invalid memory operands.
7727 assert(false &&
7728 "Constant pooling/randomization is disabled for x8664 sandbox.");
7729 return MemOperand;
7730 }
7731
Andrew Scull57e12682015-09-16 11:30:19 -07007732 // If this memory operand is already a randomized one, we do not randomize it
7733 // again.
John Porto7e93c622015-06-23 10:58:57 -07007734 if (MemOperand->getRandomized())
7735 return MemOperand;
7736
John Porto56958cb2016-01-14 09:18:18 -08007737 auto *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset());
John Porto7e93c622015-06-23 10:58:57 -07007738
John Porto56958cb2016-01-14 09:18:18 -08007739 if (C == nullptr) {
7740 return MemOperand;
John Porto7e93c622015-06-23 10:58:57 -07007741 }
John Porto7e93c622015-06-23 10:58:57 -07007742
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007743 if (!C->shouldBeRandomizedOrPooled()) {
John Porto56958cb2016-01-14 09:18:18 -08007744 return MemOperand;
7745 }
7746
7747 // The offset of this mem operand should be blinded or pooled
7748 Ctx->statsUpdateRPImms();
Karl Schimpfd4699942016-04-02 09:55:31 -07007749 switch (getFlags().getRandomizeAndPoolImmediatesOption()) {
John Porto56958cb2016-01-14 09:18:18 -08007750 default:
7751 llvm::report_fatal_error("Unsupported -randomize-pool-immediates option");
7752 case RPI_Randomize: {
7753 // blind the constant offset
7754 // FROM:
7755 // offset[base, index, shift]
7756 // TO:
7757 // insert: lea offset+cookie[base], RegTemp
7758 // => -cookie[RegTemp, index, shift]
7759 uint32_t Value =
7760 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())->getValue();
7761 uint32_t Cookie = Func->getConstantBlindingCookie();
7762 Constant *Mask1 =
7763 Ctx->getConstantInt(MemOperand->getOffset()->getType(), Cookie + Value);
7764 Constant *Mask2 =
7765 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
7766
7767 X86OperandMem *TempMemOperand = X86OperandMem::create(
7768 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
7769 // If we have already assigned a physical register, we must come from
7770 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
7771 // the assigned register as this assignment is that start of its
7772 // use-def chain. So we add RegNum argument here.
7773 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
7774 _lea(RegTemp, TempMemOperand);
7775
7776 X86OperandMem *NewMemOperand = X86OperandMem::create(
7777 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
7778 MemOperand->getShift(), MemOperand->getSegmentRegister(),
7779 MemOperand->getIsRebased());
7780
7781 // Label this memory operand as randomized, so we won't randomize it
7782 // again in case we call legalize() multiple times on this memory
7783 // operand.
7784 NewMemOperand->setRandomized(true);
7785 return NewMemOperand;
7786 }
7787 case RPI_Pool: {
7788 // pool the constant offset
7789 // FROM:
7790 // offset[base, index, shift]
7791 // TO:
7792 // insert: mov $label, RegTemp
7793 // insert: lea [base, RegTemp], RegTemp
7794 // =>[RegTemp, index, shift]
7795
7796 // Memory operand should never exist as source operands in phi lowering
7797 // assignments, so there is no need to reuse any registers here. For
7798 // phi lowering, we should not ask for new physical registers in
7799 // general. However, if we do meet Memory Operand during phi lowering,
7800 // we should not blind or pool the immediates for now.
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007801 if (RegNum.hasValue())
John Porto56958cb2016-01-14 09:18:18 -08007802 return MemOperand;
7803 Variable *RegTemp = makeReg(IceType_i32);
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007804 assert(MemOperand->getOffset()->getShouldBePooled());
John Porto56958cb2016-01-14 09:18:18 -08007805 constexpr RelocOffsetT SymOffset = 0;
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007806 Constant *Symbol =
7807 Ctx->getConstantSym(SymOffset, MemOperand->getOffset()->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007808 constexpr Variable *NoBase = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08007809 X86OperandMem *SymbolOperand = X86OperandMem::create(
John Portoac2388c2016-01-22 07:10:56 -08007810 Func, MemOperand->getOffset()->getType(), NoBase, Symbol);
John Porto56958cb2016-01-14 09:18:18 -08007811 _mov(RegTemp, SymbolOperand);
7812 // If we have a base variable here, we should add the lea instruction
7813 // to add the value of the base variable to RegTemp. If there is no
7814 // base variable, we won't need this lea instruction.
7815 if (MemOperand->getBase()) {
7816 X86OperandMem *CalculateOperand = X86OperandMem::create(
7817 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, RegTemp,
7818 0, MemOperand->getSegmentRegister());
7819 _lea(RegTemp, CalculateOperand);
7820 }
7821 X86OperandMem *NewMemOperand = X86OperandMem::create(
7822 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),
7823 MemOperand->getShift(), MemOperand->getSegmentRegister());
7824 return NewMemOperand;
7825 }
7826 }
7827}
David Sehr6b80cf12016-01-21 23:16:58 -08007828
7829template <typename TraitsType>
7830void TargetX86Base<TraitsType>::emitJumpTable(
John Porto03077212016-04-05 06:30:21 -07007831 const Cfg *, const InstJumpTable *JumpTable) const {
David Sehr6b80cf12016-01-21 23:16:58 -08007832 if (!BuildDefs::dump())
7833 return;
7834 Ostream &Str = Ctx->getStrEmit();
Karl Schimpfd4699942016-04-02 09:55:31 -07007835 const bool UseNonsfi = getFlags().getUseNonsfi();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007836 const char *Prefix = UseNonsfi ? ".data.rel.ro." : ".rodata.";
John Porto03077212016-04-05 06:30:21 -07007837 Str << "\t.section\t" << Prefix << JumpTable->getSectionName()
7838 << ",\"a\",@progbits\n"
7839 "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"
7840 << JumpTable->getName() << ":";
David Sehr6b80cf12016-01-21 23:16:58 -08007841
7842 // On X86 ILP32 pointers are 32-bit hence the use of .long
7843 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
7844 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
7845 Str << "\n";
7846}
7847
7848template <typename TraitsType>
7849template <typename T>
7850void TargetDataX86<TraitsType>::emitConstantPool(GlobalContext *Ctx) {
7851 if (!BuildDefs::dump())
7852 return;
7853 Ostream &Str = Ctx->getStrEmit();
7854 Type Ty = T::Ty;
7855 SizeT Align = typeAlignInBytes(Ty);
7856 ConstantList Pool = Ctx->getConstantPool(Ty);
7857
7858 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
7859 << "\n";
7860 Str << "\t.align\t" << Align << "\n";
7861
7862 // If reorder-pooled-constants option is set to true, we need to shuffle the
7863 // constant pool before emitting it.
Karl Schimpfd4699942016-04-02 09:55:31 -07007864 if (getFlags().getReorderPooledConstants() && !Pool.empty()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007865 // Use the constant's kind value as the salt for creating random number
7866 // generator.
7867 Operand::OperandKind K = (*Pool.begin())->getKind();
Karl Schimpfd4699942016-04-02 09:55:31 -07007868 RandomNumberGenerator RNG(getFlags().getRandomSeed(),
David Sehr6b80cf12016-01-21 23:16:58 -08007869 RPE_PooledConstantReordering, K);
7870 RandomShuffle(Pool.begin(), Pool.end(),
7871 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });
7872 }
7873
7874 for (Constant *C : Pool) {
7875 if (!C->getShouldBePooled())
7876 continue;
7877 auto *Const = llvm::cast<typename T::IceType>(C);
7878 typename T::IceType::PrimType Value = Const->getValue();
7879 // Use memcpy() to copy bits from Value into RawValue in a way that avoids
7880 // breaking strict-aliasing rules.
7881 typename T::PrimitiveIntType RawValue;
7882 memcpy(&RawValue, &Value, sizeof(Value));
7883 char buf[30];
7884 int CharsPrinted =
7885 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
7886 assert(CharsPrinted >= 0);
7887 assert((size_t)CharsPrinted < llvm::array_lengthof(buf));
7888 (void)CharsPrinted; // avoid warnings if asserts are disabled
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007889 Str << Const->getLabelName();
David Sehr6b80cf12016-01-21 23:16:58 -08007890 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t/* " << T::TypeName << " "
7891 << Value << " */\n";
7892 }
7893}
7894
7895template <typename TraitsType>
7896void TargetDataX86<TraitsType>::lowerConstants() {
Karl Schimpfd4699942016-04-02 09:55:31 -07007897 if (getFlags().getDisableTranslation())
David Sehr6b80cf12016-01-21 23:16:58 -08007898 return;
Karl Schimpfd4699942016-04-02 09:55:31 -07007899 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007900 case FT_Elf: {
7901 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7902
7903 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
7904 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
7905 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
7906
7907 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7908 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7909 } break;
7910 case FT_Asm:
7911 case FT_Iasm: {
7912 OstreamLocker L(Ctx);
7913
7914 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
7915 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
7916 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
7917
7918 emitConstantPool<PoolTypeConverter<float>>(Ctx);
7919 emitConstantPool<PoolTypeConverter<double>>(Ctx);
7920 } break;
7921 }
7922}
7923
7924template <typename TraitsType>
7925void TargetDataX86<TraitsType>::lowerJumpTables() {
Karl Schimpfd4699942016-04-02 09:55:31 -07007926 const bool IsPIC = getFlags().getUseNonsfi();
7927 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007928 case FT_Elf: {
7929 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7930 for (const JumpTableData &JT : Ctx->getJumpTables())
7931 Writer->writeJumpTable(JT, Traits::FK_Abs, IsPIC);
7932 } break;
7933 case FT_Asm:
7934 // Already emitted from Cfg
7935 break;
7936 case FT_Iasm: {
7937 if (!BuildDefs::dump())
7938 return;
7939 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007940 const char *Prefix = IsPIC ? ".data.rel.ro." : ".rodata.";
David Sehr6b80cf12016-01-21 23:16:58 -08007941 for (const JumpTableData &JT : Ctx->getJumpTables()) {
John Porto03077212016-04-05 06:30:21 -07007942 Str << "\t.section\t" << Prefix << JT.getSectionName()
7943 << ",\"a\",@progbits\n"
7944 "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"
7945 << JT.getName().toString() << ":";
David Sehr6b80cf12016-01-21 23:16:58 -08007946
7947 // On X8664 ILP32 pointers are 32-bit hence the use of .long
7948 for (intptr_t TargetOffset : JT.getTargetOffsets())
7949 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
7950 Str << "\n";
7951 }
7952 } break;
7953 }
7954}
7955
7956template <typename TraitsType>
7957void TargetDataX86<TraitsType>::lowerGlobals(
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007958 const VariableDeclarationList &Vars, const std::string &SectionSuffix) {
Karl Schimpfd4699942016-04-02 09:55:31 -07007959 const bool IsPIC = getFlags().getUseNonsfi();
7960 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007961 case FT_Elf: {
7962 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7963 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC);
7964 } break;
7965 case FT_Asm:
7966 case FT_Iasm: {
David Sehr6b80cf12016-01-21 23:16:58 -08007967 OstreamLocker L(Ctx);
7968 for (const VariableDeclaration *Var : Vars) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07007969 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
David Sehr6b80cf12016-01-21 23:16:58 -08007970 emitGlobal(*Var, SectionSuffix);
7971 }
7972 }
7973 } break;
7974 }
7975}
John Porto4a566862016-01-04 09:33:41 -08007976} // end of namespace X86NAMESPACE
John Porto7e93c622015-06-23 10:58:57 -07007977} // end of namespace Ice
7978
7979#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H