blob: ecb4ae6b483adaa6ad1c512493c25004a9f0cafc [file] [log] [blame]
John Porto7e93c622015-06-23 10:58:57 -07001//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
Jim Stichnoth92a6e5b2015-12-02 16:52:44 -080011/// \brief Implements the TargetLoweringX86Base class, which consists almost
Andrew Scull57e12682015-09-16 11:30:19 -070012/// entirely of the lowering sequence for each high-level instruction.
Andrew Scull9612d322015-07-06 14:53:25 -070013///
John Porto7e93c622015-06-23 10:58:57 -070014//===----------------------------------------------------------------------===//
15
16#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18
John Porto7e93c622015-06-23 10:58:57 -070019#include "IceCfg.h"
20#include "IceCfgNode.h"
21#include "IceClFlags.h"
22#include "IceDefs.h"
23#include "IceELFObjectWriter.h"
24#include "IceGlobalInits.h"
John Portoec3f5652015-08-31 15:07:09 -070025#include "IceInstVarIter.h"
Jim Stichnothb9a84722016-08-01 13:18:36 -070026#include "IceInstX86Base.h"
John Porto7e93c622015-06-23 10:58:57 -070027#include "IceLiveness.h"
28#include "IceOperand.h"
Jan Voung53483692015-07-16 10:47:46 -070029#include "IcePhiLoweringImpl.h"
John Porto7e93c622015-06-23 10:58:57 -070030#include "IceUtils.h"
Jim Stichnothb9a84722016-08-01 13:18:36 -070031#include "IceVariableSplitting.h"
32
John Porto67f8de92015-06-25 10:14:17 -070033#include "llvm/Support/MathExtras.h"
John Porto7e93c622015-06-23 10:58:57 -070034
Andrew Scull87f80c12015-07-20 10:19:16 -070035#include <stack>
36
John Porto7e93c622015-06-23 10:58:57 -070037namespace Ice {
David Sehr6b80cf12016-01-21 23:16:58 -080038namespace X86 {
39template <typename T> struct PoolTypeConverter {};
40
41template <> struct PoolTypeConverter<float> {
42 using PrimitiveIntType = uint32_t;
43 using IceType = ConstantFloat;
44 static const Type Ty = IceType_f32;
45 static const char *TypeName;
46 static const char *AsmTag;
47 static const char *PrintfString;
48};
49
50template <> struct PoolTypeConverter<double> {
51 using PrimitiveIntType = uint64_t;
52 using IceType = ConstantDouble;
53 static const Type Ty = IceType_f64;
54 static const char *TypeName;
55 static const char *AsmTag;
56 static const char *PrintfString;
57};
58
59// Add converter for int type constant pooling
60template <> struct PoolTypeConverter<uint32_t> {
61 using PrimitiveIntType = uint32_t;
62 using IceType = ConstantInteger32;
63 static const Type Ty = IceType_i32;
64 static const char *TypeName;
65 static const char *AsmTag;
66 static const char *PrintfString;
67};
68
69// Add converter for int type constant pooling
70template <> struct PoolTypeConverter<uint16_t> {
71 using PrimitiveIntType = uint32_t;
72 using IceType = ConstantInteger32;
73 static const Type Ty = IceType_i16;
74 static const char *TypeName;
75 static const char *AsmTag;
76 static const char *PrintfString;
77};
78
79// Add converter for int type constant pooling
80template <> struct PoolTypeConverter<uint8_t> {
81 using PrimitiveIntType = uint32_t;
82 using IceType = ConstantInteger32;
83 static const Type Ty = IceType_i8;
84 static const char *TypeName;
85 static const char *AsmTag;
86 static const char *PrintfString;
87};
88} // end of namespace X86
89
John Porto4a566862016-01-04 09:33:41 -080090namespace X86NAMESPACE {
John Porto7e93c622015-06-23 10:58:57 -070091
Eric Holkd6cf6b32016-02-17 11:09:48 -080092using Utils::BoolFlagSaver;
John Porto7e93c622015-06-23 10:58:57 -070093
John Porto4a566862016-01-04 09:33:41 -080094template <typename Traits> class BoolFoldingEntry {
John Porto7e93c622015-06-23 10:58:57 -070095 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
96
97public:
98 BoolFoldingEntry() = default;
99 explicit BoolFoldingEntry(Inst *I);
100 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
Andrew Scull9612d322015-07-06 14:53:25 -0700101 /// Instr is the instruction producing the i1-type variable of interest.
John Porto7e93c622015-06-23 10:58:57 -0700102 Inst *Instr = nullptr;
Andrew Scull9612d322015-07-06 14:53:25 -0700103 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
John Porto7e93c622015-06-23 10:58:57 -0700104 bool IsComplex = false;
Andrew Scull9612d322015-07-06 14:53:25 -0700105 /// IsLiveOut is initialized conservatively to true, and is set to false when
Andrew Scull57e12682015-09-16 11:30:19 -0700106 /// we encounter an instruction that ends Var's live range. We disable the
107 /// folding optimization when Var is live beyond this basic block. Note that
Andrew Scull9612d322015-07-06 14:53:25 -0700108 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
109 /// always be true and the folding optimization will never be performed.
John Porto7e93c622015-06-23 10:58:57 -0700110 bool IsLiveOut = true;
111 // NumUses counts the number of times Var is used as a source operand in the
Andrew Scull57e12682015-09-16 11:30:19 -0700112 // basic block. If IsComplex is true and there is more than one use of Var,
John Porto7e93c622015-06-23 10:58:57 -0700113 // then the folding optimization is disabled for Var.
114 uint32_t NumUses = 0;
115};
116
John Porto4a566862016-01-04 09:33:41 -0800117template <typename Traits> class BoolFolding {
John Porto7e93c622015-06-23 10:58:57 -0700118public:
119 enum BoolFoldingProducerKind {
120 PK_None,
John Porto1d235422015-08-12 12:37:53 -0700121 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
John Porto7e93c622015-06-23 10:58:57 -0700122 PK_Icmp32,
123 PK_Icmp64,
124 PK_Fcmp,
David Sehrdaf096c2015-11-11 10:56:58 -0800125 PK_Trunc,
John Porto7b3d9cb2015-11-11 14:26:57 -0800126 PK_Arith // A flag-setting arithmetic instruction.
John Porto7e93c622015-06-23 10:58:57 -0700127 };
128
Andrew Scull9612d322015-07-06 14:53:25 -0700129 /// Currently the actual enum values are not used (other than CK_None), but we
John Porto921856d2015-07-07 11:56:26 -0700130 /// go ahead and produce them anyway for symmetry with the
Andrew Scull9612d322015-07-06 14:53:25 -0700131 /// BoolFoldingProducerKind.
John Porto7e93c622015-06-23 10:58:57 -0700132 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
133
134private:
135 BoolFolding(const BoolFolding &) = delete;
136 BoolFolding &operator=(const BoolFolding &) = delete;
137
138public:
139 BoolFolding() = default;
140 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
141 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
142 static bool hasComplexLowering(const Inst *Instr);
David Sehre3984282015-12-15 17:34:55 -0800143 static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
144 BoolFoldingConsumerKind ConsumerKind);
John Porto7e93c622015-06-23 10:58:57 -0700145 void init(CfgNode *Node);
146 const Inst *getProducerFor(const Operand *Opnd) const;
147 void dump(const Cfg *Func) const;
148
149private:
Andrew Scull9612d322015-07-06 14:53:25 -0700150 /// Returns true if Producers contains a valid entry for the given VarNum.
John Porto7e93c622015-06-23 10:58:57 -0700151 bool containsValid(SizeT VarNum) const {
152 auto Element = Producers.find(VarNum);
153 return Element != Producers.end() && Element->second.Instr != nullptr;
154 }
155 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700156 void invalidateProducersOnStore(const Inst *Instr);
Andrew Scull9612d322015-07-06 14:53:25 -0700157 /// Producers maps Variable::Number to a BoolFoldingEntry.
John Portoe82b5602016-02-24 15:58:55 -0800158 CfgUnorderedMap<SizeT, BoolFoldingEntry<Traits>> Producers;
John Porto7e93c622015-06-23 10:58:57 -0700159};
160
John Porto4a566862016-01-04 09:33:41 -0800161template <typename Traits>
162BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I)
163 : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {}
John Porto7e93c622015-06-23 10:58:57 -0700164
John Porto4a566862016-01-04 09:33:41 -0800165template <typename Traits>
166typename BoolFolding<Traits>::BoolFoldingProducerKind
167BoolFolding<Traits>::getProducerKind(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700168 if (llvm::isa<InstIcmp>(Instr)) {
John Porto4a566862016-01-04 09:33:41 -0800169 if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -0700170 return PK_Icmp32;
David Sehrd9810252015-10-16 13:23:17 -0700171 return PK_Icmp64;
John Porto7e93c622015-06-23 10:58:57 -0700172 }
John Porto7e93c622015-06-23 10:58:57 -0700173 if (llvm::isa<InstFcmp>(Instr))
174 return PK_Fcmp;
David Sehrdaf096c2015-11-11 10:56:58 -0800175 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
John Porto4a566862016-01-04 09:33:41 -0800176 if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
David Sehrdaf096c2015-11-11 10:56:58 -0800177 switch (Arith->getOp()) {
178 default:
179 return PK_None;
180 case InstArithmetic::And:
181 case InstArithmetic::Or:
182 return PK_Arith;
183 }
184 }
185 }
186 return PK_None; // TODO(stichnot): remove this
187
John Porto7e93c622015-06-23 10:58:57 -0700188 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
189 switch (Cast->getCastKind()) {
190 default:
191 return PK_None;
192 case InstCast::Trunc:
193 return PK_Trunc;
194 }
195 }
196 return PK_None;
197}
198
John Porto4a566862016-01-04 09:33:41 -0800199template <typename Traits>
200typename BoolFolding<Traits>::BoolFoldingConsumerKind
201BoolFolding<Traits>::getConsumerKind(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700202 if (llvm::isa<InstBr>(Instr))
203 return CK_Br;
204 if (llvm::isa<InstSelect>(Instr))
205 return CK_Select;
206 return CK_None; // TODO(stichnot): remove this
207
208 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
209 switch (Cast->getCastKind()) {
210 default:
211 return CK_None;
212 case InstCast::Sext:
213 return CK_Sext;
214 case InstCast::Zext:
215 return CK_Zext;
216 }
217 }
218 return CK_None;
219}
220
John Porto921856d2015-07-07 11:56:26 -0700221/// Returns true if the producing instruction has a "complex" lowering sequence.
222/// This generally means that its lowering sequence requires more than one
223/// conditional branch, namely 64-bit integer compares and some floating-point
Andrew Scull57e12682015-09-16 11:30:19 -0700224/// compares. When this is true, and there is more than one consumer, we prefer
John Porto921856d2015-07-07 11:56:26 -0700225/// to disable the folding optimization because it minimizes branches.
John Porto4a566862016-01-04 09:33:41 -0800226template <typename Traits>
227bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) {
John Porto7e93c622015-06-23 10:58:57 -0700228 switch (getProducerKind(Instr)) {
229 default:
230 return false;
231 case PK_Icmp64:
John Porto4a566862016-01-04 09:33:41 -0800232 return !Traits::Is64Bit;
John Porto7e93c622015-06-23 10:58:57 -0700233 case PK_Fcmp:
John Porto4a566862016-01-04 09:33:41 -0800234 return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
235 Traits::Cond::Br_None;
John Porto7e93c622015-06-23 10:58:57 -0700236 }
237}
238
John Porto4a566862016-01-04 09:33:41 -0800239template <typename Traits>
240bool BoolFolding<Traits>::isValidFolding(
241 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind,
242 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) {
David Sehre3984282015-12-15 17:34:55 -0800243 switch (ProducerKind) {
244 default:
245 return false;
246 case PK_Icmp32:
247 case PK_Icmp64:
248 case PK_Fcmp:
249 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
250 case PK_Arith:
251 return ConsumerKind == CK_Br;
252 }
253}
254
John Porto4a566862016-01-04 09:33:41 -0800255template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
John Porto7e93c622015-06-23 10:58:57 -0700256 Producers.clear();
257 for (Inst &Instr : Node->getInsts()) {
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700258 if (Instr.isDeleted())
259 continue;
260 invalidateProducersOnStore(&Instr);
John Porto7e93c622015-06-23 10:58:57 -0700261 // Check whether Instr is a valid producer.
262 Variable *Var = Instr.getDest();
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700263 if (Var // only consider instructions with an actual dest var
John Porto7e93c622015-06-23 10:58:57 -0700264 && Var->getType() == IceType_i1 // only bool-type dest vars
265 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
John Porto4a566862016-01-04 09:33:41 -0800266 Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
John Porto7e93c622015-06-23 10:58:57 -0700267 }
268 // Check each src variable against the map.
John Portoec3f5652015-08-31 15:07:09 -0700269 FOREACH_VAR_IN_INST(Var, Instr) {
270 SizeT VarNum = Var->getIndex();
David Sehre3984282015-12-15 17:34:55 -0800271 if (!containsValid(VarNum))
272 continue;
273 // All valid consumers use Var as the first source operand
274 if (IndexOfVarOperandInInst(Var) != 0) {
275 setInvalid(VarNum);
276 continue;
277 }
278 // Consumer instructions must be white-listed
John Porto4a566862016-01-04 09:33:41 -0800279 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind =
280 getConsumerKind(&Instr);
David Sehre3984282015-12-15 17:34:55 -0800281 if (ConsumerKind == CK_None) {
282 setInvalid(VarNum);
283 continue;
284 }
John Porto4a566862016-01-04 09:33:41 -0800285 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind =
286 getProducerKind(Producers[VarNum].Instr);
David Sehre3984282015-12-15 17:34:55 -0800287 if (!isValidFolding(ProducerKind, ConsumerKind)) {
288 setInvalid(VarNum);
289 continue;
290 }
291 // Avoid creating multiple copies of complex producer instructions.
292 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
293 setInvalid(VarNum);
294 continue;
295 }
296 ++Producers[VarNum].NumUses;
297 if (Instr.isLastUse(Var)) {
298 Producers[VarNum].IsLiveOut = false;
John Porto7e93c622015-06-23 10:58:57 -0700299 }
300 }
301 }
302 for (auto &I : Producers) {
303 // Ignore entries previously marked invalid.
304 if (I.second.Instr == nullptr)
305 continue;
306 // Disable the producer if its dest may be live beyond this block.
307 if (I.second.IsLiveOut) {
308 setInvalid(I.first);
309 continue;
310 }
Andrew Scull57e12682015-09-16 11:30:19 -0700311 // Mark as "dead" rather than outright deleting. This is so that other
John Porto921856d2015-07-07 11:56:26 -0700312 // peephole style optimizations during or before lowering have access to
Andrew Scull57e12682015-09-16 11:30:19 -0700313 // this instruction in undeleted form. See for example
John Porto921856d2015-07-07 11:56:26 -0700314 // tryOptimizedCmpxchgCmpBr().
John Porto7e93c622015-06-23 10:58:57 -0700315 I.second.Instr->setDead();
316 }
317}
318
John Porto4a566862016-01-04 09:33:41 -0800319template <typename Traits>
320const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const {
John Porto7e93c622015-06-23 10:58:57 -0700321 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
322 if (Var == nullptr)
323 return nullptr;
324 SizeT VarNum = Var->getIndex();
325 auto Element = Producers.find(VarNum);
326 if (Element == Producers.end())
327 return nullptr;
328 return Element->second.Instr;
329}
330
John Porto4a566862016-01-04 09:33:41 -0800331template <typename Traits>
332void BoolFolding<Traits>::dump(const Cfg *Func) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -0700333 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
John Porto7e93c622015-06-23 10:58:57 -0700334 return;
335 OstreamLocker L(Func->getContext());
336 Ostream &Str = Func->getContext()->getStrDump();
337 for (auto &I : Producers) {
338 if (I.second.Instr == nullptr)
339 continue;
340 Str << "Found foldable producer:\n ";
341 I.second.Instr->dump(Func);
342 Str << "\n";
343 }
344}
345
Jim Stichnothf1f773d2016-04-21 16:54:33 -0700346/// If the given instruction has potential memory side effects (e.g. store, rmw,
347/// or a call instruction with potential memory side effects), then we must not
348/// allow a pre-store Producer instruction with memory operands to be folded
349/// into a post-store Consumer instruction. If this is detected, the Producer
350/// is invalidated.
351///
352/// We use the Producer's IsLiveOut field to determine whether any potential
353/// Consumers come after this store instruction. The IsLiveOut field is
354/// initialized to true, and BoolFolding::init() sets IsLiveOut to false when it
355/// sees the variable's definitive last use (indicating the variable is not in
356/// the node's live-out set). Thus if we see here that IsLiveOut is false, we
357/// know that there can be no consumers after the store, and therefore we know
358/// the folding is safe despite the store instruction.
359template <typename Traits>
360void BoolFolding<Traits>::invalidateProducersOnStore(const Inst *Instr) {
361 if (!Instr->isMemoryWrite())
362 return;
363 for (auto &ProducerPair : Producers) {
364 if (!ProducerPair.second.IsLiveOut)
365 continue;
366 Inst *PInst = ProducerPair.second.Instr;
367 if (PInst == nullptr)
368 continue;
369 bool HasMemOperand = false;
370 const SizeT SrcSize = PInst->getSrcSize();
371 for (SizeT I = 0; I < SrcSize; ++I) {
372 if (llvm::isa<typename Traits::X86OperandMem>(PInst->getSrc(I))) {
373 HasMemOperand = true;
374 break;
375 }
376 }
377 if (!HasMemOperand)
378 continue;
379 setInvalid(ProducerPair.first);
380 }
381}
382
John Porto4a566862016-01-04 09:33:41 -0800383template <typename TraitsType>
384void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) {
John Porto7e93c622015-06-23 10:58:57 -0700385 FoldingInfo.init(Node);
386 FoldingInfo.dump(Func);
387}
388
John Porto4a566862016-01-04 09:33:41 -0800389template <typename TraitsType>
390TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func)
John Portoac2388c2016-01-22 07:10:56 -0800391 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {
John Porto7e93c622015-06-23 10:58:57 -0700392 static_assert(
393 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
394 (TargetInstructionSet::X86InstructionSet_End -
395 TargetInstructionSet::X86InstructionSet_Begin),
396 "Traits::InstructionSet range different from TargetInstructionSet");
Karl Schimpfd4699942016-04-02 09:55:31 -0700397 if (getFlags().getTargetInstructionSet() !=
John Porto7e93c622015-06-23 10:58:57 -0700398 TargetInstructionSet::BaseInstructionSet) {
John Porto4a566862016-01-04 09:33:41 -0800399 InstructionSet = static_cast<InstructionSetEnum>(
Karl Schimpfd4699942016-04-02 09:55:31 -0700400 (getFlags().getTargetInstructionSet() -
John Porto7e93c622015-06-23 10:58:57 -0700401 TargetInstructionSet::X86InstructionSet_Begin) +
402 Traits::InstructionSet::Begin);
403 }
Jim Stichnoth94844f12015-11-04 16:06:16 -0800404}
405
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800406template <typename TraitsType>
Karl Schimpf5403f5d2016-01-15 11:07:46 -0800407void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) {
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800408 RegNumT::setLimit(Traits::RegisterSet::Reg_NUM);
Karl Schimpfd4699942016-04-02 09:55:31 -0700409 Traits::initRegisterSet(getFlags(), &TypeToRegisterSet, &RegisterAliases);
Jim Stichnothb40595a2016-01-29 06:14:31 -0800410 for (size_t i = 0; i < TypeToRegisterSet.size(); ++i)
411 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
Karl Schimpf5403f5d2016-01-15 11:07:46 -0800412 filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM,
413 TypeToRegisterSet.data(), TypeToRegisterSet.size(),
Jim Stichnoth2544d4d2016-01-22 13:07:46 -0800414 Traits::getRegName, getRegClassName);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800415 PcRelFixup = Traits::FK_PcRel;
Karl Schimpfd4699942016-04-02 09:55:31 -0700416 AbsFixup = getFlags().getUseNonsfi() ? Traits::FK_Gotoff : Traits::FK_Abs;
John Porto7e93c622015-06-23 10:58:57 -0700417}
418
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700419template <typename TraitsType>
420bool TargetX86Base<TraitsType>::shouldBePooled(const Constant *C) {
421 if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(C)) {
422 return !Utils::isPositiveZero(ConstFloat->getValue());
423 }
424 if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
425 return !Utils::isPositiveZero(ConstDouble->getValue());
426 }
Karl Schimpfd4699942016-04-02 09:55:31 -0700427 if (getFlags().getRandomizeAndPoolImmediatesOption() != RPI_Pool) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700428 return false;
429 }
430 return C->shouldBeRandomizedOrPooled();
431}
432
John Porto4a566862016-01-04 09:33:41 -0800433template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
John Porto7e93c622015-06-23 10:58:57 -0700434 TimerMarker T(TimerStack::TT_O2, Func);
435
John Portoac2388c2016-01-22 07:10:56 -0800436 if (SandboxingType != ST_None) {
437 initRebasePtr();
John Porto56958cb2016-01-14 09:18:18 -0800438 }
439
John Porto5e0a8a72015-11-20 13:50:36 -0800440 genTargetHelperCalls();
David Sehr26217e32015-11-26 13:03:50 -0800441 Func->dump("After target helper call insertion");
John Porto5e0a8a72015-11-20 13:50:36 -0800442
David Sehr4318a412015-11-11 15:01:55 -0800443 // Merge Alloca instructions, and lay out the stack.
444 static constexpr bool SortAndCombineAllocas = true;
445 Func->processAllocas(SortAndCombineAllocas);
446 Func->dump("After Alloca processing");
447
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700448 // Run this early so it can be used to focus optimizations on potentially hot
449 // code.
450 // TODO(stichnot,ascull): currently only used for regalloc not
451 // expensive high level optimizations which could be focused on potentially
452 // hot code.
453 Func->generateLoopInfo();
454 Func->dump("After loop analysis");
455 if (getFlags().getLoopInvariantCodeMotion()) {
456 Func->loopInvariantCodeMotion();
457 Func->dump("After LICM");
458 }
459
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700460 if (getFlags().getEnableExperimental()) {
461 Func->localCSE();
462 Func->dump("After Local CSE");
463 }
Manasij Mukherjee45f51a22016-06-27 16:12:37 -0700464 if (getFlags().getEnableShortCircuit()) {
465 Func->shortCircuitJumps();
466 Func->dump("After Short Circuiting");
467 }
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700468
Karl Schimpfd4699942016-04-02 09:55:31 -0700469 if (!getFlags().getEnablePhiEdgeSplit()) {
John Porto7e93c622015-06-23 10:58:57 -0700470 // Lower Phi instructions.
471 Func->placePhiLoads();
472 if (Func->hasError())
473 return;
474 Func->placePhiStores();
475 if (Func->hasError())
476 return;
477 Func->deletePhis();
478 if (Func->hasError())
479 return;
480 Func->dump("After Phi lowering");
481 }
482
483 // Address mode optimization.
484 Func->getVMetadata()->init(VMK_SingleDefs);
485 Func->doAddressOpt();
John Portoa47c11c2016-04-21 05:53:42 -0700486 Func->materializeVectorShuffles();
John Porto7e93c622015-06-23 10:58:57 -0700487
Andrew Scull57e12682015-09-16 11:30:19 -0700488 // Find read-modify-write opportunities. Do this after address mode
John Porto7e93c622015-06-23 10:58:57 -0700489 // optimization so that doAddressOpt() doesn't need to be applied to RMW
490 // instructions as well.
491 findRMW();
492 Func->dump("After RMW transform");
493
494 // Argument lowering
495 Func->doArgLowering();
496
Andrew Scull57e12682015-09-16 11:30:19 -0700497 // Target lowering. This requires liveness analysis for some parts of the
498 // lowering decisions, such as compare/branch fusing. If non-lightweight
John Porto921856d2015-07-07 11:56:26 -0700499 // liveness analysis is used, the instructions need to be renumbered first
500 // TODO: This renumbering should only be necessary if we're actually
501 // calculating live intervals, which we only do for register allocation.
John Porto7e93c622015-06-23 10:58:57 -0700502 Func->renumberInstructions();
503 if (Func->hasError())
504 return;
505
Andrew Scull57e12682015-09-16 11:30:19 -0700506 // TODO: It should be sufficient to use the fastest liveness calculation,
507 // i.e. livenessLightweight(). However, for some reason that slows down the
508 // rest of the translation. Investigate.
John Porto7e93c622015-06-23 10:58:57 -0700509 Func->liveness(Liveness_Basic);
510 if (Func->hasError())
511 return;
512 Func->dump("After x86 address mode opt");
513
514 // Disable constant blinding or pooling for load optimization.
515 {
516 BoolFlagSaver B(RandomizationPoolingPaused, true);
517 doLoadOpt();
518 }
519 Func->genCode();
520 if (Func->hasError())
521 return;
John Portoac2388c2016-01-22 07:10:56 -0800522 if (SandboxingType != ST_None) {
523 initSandbox();
524 }
John Porto7e93c622015-06-23 10:58:57 -0700525 Func->dump("After x86 codegen");
Jim Stichnothb9a84722016-08-01 13:18:36 -0700526 splitBlockLocalVariables(Func);
John Porto7e93c622015-06-23 10:58:57 -0700527
Andrew Scullaa6c1092015-09-03 17:50:30 -0700528 // Register allocation. This requires instruction renumbering and full
529 // liveness analysis. Loops must be identified before liveness so variable
530 // use weights are correct.
John Porto7e93c622015-06-23 10:58:57 -0700531 Func->renumberInstructions();
532 if (Func->hasError())
533 return;
534 Func->liveness(Liveness_Intervals);
535 if (Func->hasError())
536 return;
John Porto921856d2015-07-07 11:56:26 -0700537 // The post-codegen dump is done here, after liveness analysis and associated
538 // cleanup, to make the dump cleaner and more useful.
John Porto7e93c622015-06-23 10:58:57 -0700539 Func->dump("After initial x8632 codegen");
Jim Stichnoth2943d772016-06-21 11:22:17 -0700540 // Validate the live range computations. The expensive validation call is
541 // deliberately only made when assertions are enabled.
542 assert(Func->validateLiveness());
John Porto7e93c622015-06-23 10:58:57 -0700543 Func->getVMetadata()->init(VMK_All);
544 regAlloc(RAK_Global);
545 if (Func->hasError())
546 return;
547 Func->dump("After linear scan regalloc");
548
Karl Schimpfd4699942016-04-02 09:55:31 -0700549 if (getFlags().getEnablePhiEdgeSplit()) {
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700550 Func->advancedPhiLowering();
John Porto7e93c622015-06-23 10:58:57 -0700551 Func->dump("After advanced Phi lowering");
552 }
553
554 // Stack frame mapping.
555 Func->genFrame();
556 if (Func->hasError())
557 return;
558 Func->dump("After stack frame mapping");
559
560 Func->contractEmptyNodes();
561 Func->reorderNodes();
562
Qining Lu969f6a32015-07-31 09:58:34 -0700563 // Shuffle basic block order if -reorder-basic-blocks is enabled.
564 Func->shuffleNodes();
565
Andrew Scull57e12682015-09-16 11:30:19 -0700566 // Branch optimization. This needs to be done just before code emission. In
John Porto921856d2015-07-07 11:56:26 -0700567 // particular, no transformations that insert or reorder CfgNodes should be
Andrew Scull57e12682015-09-16 11:30:19 -0700568 // done after branch optimization. We go ahead and do it before nop insertion
John Porto921856d2015-07-07 11:56:26 -0700569 // to reduce the amount of work needed for searching for opportunities.
John Porto7e93c622015-06-23 10:58:57 -0700570 Func->doBranchOpt();
571 Func->dump("After branch optimization");
572
Qining Lu969f6a32015-07-31 09:58:34 -0700573 // Nop insertion if -nop-insertion is enabled.
574 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700575
576 // Mark nodes that require sandbox alignment
John Porto56958cb2016-01-14 09:18:18 -0800577 if (NeedSandboxing) {
Andrew Scull86df4e92015-07-30 13:54:44 -0700578 Func->markNodesForSandboxing();
John Porto56958cb2016-01-14 09:18:18 -0800579 }
John Porto7e93c622015-06-23 10:58:57 -0700580}
581
John Porto4a566862016-01-04 09:33:41 -0800582template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() {
John Porto7e93c622015-06-23 10:58:57 -0700583 TimerMarker T(TimerStack::TT_Om1, Func);
584
John Portoac2388c2016-01-22 07:10:56 -0800585 if (SandboxingType != ST_None) {
586 initRebasePtr();
John Porto56958cb2016-01-14 09:18:18 -0800587 }
588
John Porto5e0a8a72015-11-20 13:50:36 -0800589 genTargetHelperCalls();
590
David Sehr4318a412015-11-11 15:01:55 -0800591 // Do not merge Alloca instructions, and lay out the stack.
592 static constexpr bool SortAndCombineAllocas = false;
593 Func->processAllocas(SortAndCombineAllocas);
594 Func->dump("After Alloca processing");
595
John Porto7e93c622015-06-23 10:58:57 -0700596 Func->placePhiLoads();
597 if (Func->hasError())
598 return;
599 Func->placePhiStores();
600 if (Func->hasError())
601 return;
602 Func->deletePhis();
603 if (Func->hasError())
604 return;
605 Func->dump("After Phi lowering");
606
607 Func->doArgLowering();
John Porto7e93c622015-06-23 10:58:57 -0700608 Func->genCode();
609 if (Func->hasError())
610 return;
John Portoac2388c2016-01-22 07:10:56 -0800611 if (SandboxingType != ST_None) {
612 initSandbox();
613 }
John Porto7e93c622015-06-23 10:58:57 -0700614 Func->dump("After initial x8632 codegen");
615
616 regAlloc(RAK_InfOnly);
617 if (Func->hasError())
618 return;
619 Func->dump("After regalloc of infinite-weight variables");
620
621 Func->genFrame();
622 if (Func->hasError())
623 return;
624 Func->dump("After stack frame mapping");
625
Qining Lu969f6a32015-07-31 09:58:34 -0700626 // Shuffle basic block order if -reorder-basic-blocks is enabled.
627 Func->shuffleNodes();
628
629 // Nop insertion if -nop-insertion is enabled.
630 Func->doNopInsertion();
Andrew Scull86df4e92015-07-30 13:54:44 -0700631
632 // Mark nodes that require sandbox alignment
John Porto56958cb2016-01-14 09:18:18 -0800633 if (NeedSandboxing)
Andrew Scull86df4e92015-07-30 13:54:44 -0700634 Func->markNodesForSandboxing();
John Porto7e93c622015-06-23 10:58:57 -0700635}
636
John Porto5aeed952015-07-21 13:39:09 -0700637inline bool canRMW(const InstArithmetic *Arith) {
John Porto7e93c622015-06-23 10:58:57 -0700638 Type Ty = Arith->getDest()->getType();
John Porto921856d2015-07-07 11:56:26 -0700639 // X86 vector instructions write to a register and have no RMW option.
John Porto7e93c622015-06-23 10:58:57 -0700640 if (isVectorType(Ty))
641 return false;
642 bool isI64 = Ty == IceType_i64;
643
644 switch (Arith->getOp()) {
645 // Not handled for lack of simple lowering:
646 // shift on i64
647 // mul, udiv, urem, sdiv, srem, frem
648 // Not handled for lack of RMW instructions:
649 // fadd, fsub, fmul, fdiv (also vector types)
650 default:
651 return false;
652 case InstArithmetic::Add:
653 case InstArithmetic::Sub:
654 case InstArithmetic::And:
655 case InstArithmetic::Or:
656 case InstArithmetic::Xor:
657 return true;
658 case InstArithmetic::Shl:
659 case InstArithmetic::Lshr:
660 case InstArithmetic::Ashr:
661 return false; // TODO(stichnot): implement
662 return !isI64;
663 }
664}
665
John Porto4a566862016-01-04 09:33:41 -0800666template <typename TraitsType>
John Porto7e93c622015-06-23 10:58:57 -0700667bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
668 if (A == B)
669 return true;
John Porto4a566862016-01-04 09:33:41 -0800670 if (auto *MemA =
671 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
672 A)) {
673 if (auto *MemB =
674 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
675 B)) {
John Porto7e93c622015-06-23 10:58:57 -0700676 return MemA->getBase() == MemB->getBase() &&
677 MemA->getOffset() == MemB->getOffset() &&
678 MemA->getIndex() == MemB->getIndex() &&
679 MemA->getShift() == MemB->getShift() &&
680 MemA->getSegmentRegister() == MemB->getSegmentRegister();
681 }
682 }
683 return false;
684}
685
John Porto4a566862016-01-04 09:33:41 -0800686template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
Jim Stichnothb88d8c82016-03-11 15:33:00 -0800687 TimerMarker _(TimerStack::TT_findRMW, Func);
John Porto7e93c622015-06-23 10:58:57 -0700688 Func->dump("Before RMW");
Andrew Scull00741a02015-09-16 19:04:09 -0700689 if (Func->isVerbose(IceV_RMW))
690 Func->getContext()->lockStr();
John Porto7e93c622015-06-23 10:58:57 -0700691 for (CfgNode *Node : Func->getNodes()) {
692 // Walk through the instructions, considering each sequence of 3
Andrew Scull57e12682015-09-16 11:30:19 -0700693 // instructions, and look for the particular RMW pattern. Note that this
694 // search can be "broken" (false negatives) if there are intervening
695 // deleted instructions, or intervening instructions that could be safely
696 // moved out of the way to reveal an RMW pattern.
John Porto7e93c622015-06-23 10:58:57 -0700697 auto E = Node->getInsts().end();
698 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
699 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
700 // Make I3 skip over deleted instructions.
701 while (I3 != E && I3->isDeleted())
702 ++I3;
703 if (I1 == E || I2 == E || I3 == E)
704 continue;
705 assert(!I1->isDeleted());
706 assert(!I2->isDeleted());
707 assert(!I3->isDeleted());
Andrew Scull00741a02015-09-16 19:04:09 -0700708 auto *Load = llvm::dyn_cast<InstLoad>(I1);
709 auto *Arith = llvm::dyn_cast<InstArithmetic>(I2);
710 auto *Store = llvm::dyn_cast<InstStore>(I3);
711 if (!Load || !Arith || !Store)
712 continue;
713 // Look for:
714 // a = Load addr
715 // b = <op> a, other
716 // Store b, addr
717 // Change to:
718 // a = Load addr
719 // b = <op> a, other
720 // x = FakeDef
721 // RMW <op>, addr, other, x
722 // b = Store b, addr, x
Jim Stichnoth230d4102015-09-25 17:40:32 -0700723 // Note that inferTwoAddress() makes sure setDestRedefined() gets called
724 // on the updated Store instruction, to avoid liveness problems later.
Andrew Scull00741a02015-09-16 19:04:09 -0700725 //
726 // With this transformation, the Store instruction acquires a Dest
727 // variable and is now subject to dead code elimination if there are no
Jim Stichnoth230d4102015-09-25 17:40:32 -0700728 // more uses of "b". Variable "x" is a beacon for determining whether the
729 // Store instruction gets dead-code eliminated. If the Store instruction
730 // is eliminated, then it must be the case that the RMW instruction ends
731 // x's live range, and therefore the RMW instruction will be retained and
732 // later lowered. On the other hand, if the RMW instruction does not end
733 // x's live range, then the Store instruction must still be present, and
734 // therefore the RMW instruction is ignored during lowering because it is
735 // redundant with the Store instruction.
Andrew Scull00741a02015-09-16 19:04:09 -0700736 //
737 // Note that if "a" has further uses, the RMW transformation may still
738 // trigger, resulting in two loads and one store, which is worse than the
739 // original one load and one store. However, this is probably rare, and
740 // caching probably keeps it just as fast.
John Porto4a566862016-01-04 09:33:41 -0800741 if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(),
742 Store->getAddr()))
Andrew Scull00741a02015-09-16 19:04:09 -0700743 continue;
744 Operand *ArithSrcFromLoad = Arith->getSrc(0);
745 Operand *ArithSrcOther = Arith->getSrc(1);
746 if (ArithSrcFromLoad != Load->getDest()) {
747 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
748 continue;
749 std::swap(ArithSrcFromLoad, ArithSrcOther);
John Porto7e93c622015-06-23 10:58:57 -0700750 }
Andrew Scull00741a02015-09-16 19:04:09 -0700751 if (Arith->getDest() != Store->getData())
752 continue;
753 if (!canRMW(Arith))
754 continue;
755 if (Func->isVerbose(IceV_RMW)) {
756 Ostream &Str = Func->getContext()->getStrDump();
757 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
758 Load->dump(Func);
759 Str << "\n ";
760 Arith->dump(Func);
761 Str << "\n ";
762 Store->dump(Func);
763 Str << "\n";
764 }
765 Variable *Beacon = Func->makeVariable(IceType_i32);
766 Beacon->setMustNotHaveReg();
767 Store->setRmwBeacon(Beacon);
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800768 auto *BeaconDef = InstFakeDef::create(Func, Beacon);
Andrew Scull00741a02015-09-16 19:04:09 -0700769 Node->getInsts().insert(I3, BeaconDef);
John Porto4a566862016-01-04 09:33:41 -0800770 auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(),
771 Beacon, Arith->getOp());
Andrew Scull00741a02015-09-16 19:04:09 -0700772 Node->getInsts().insert(I3, RMW);
John Porto7e93c622015-06-23 10:58:57 -0700773 }
774 }
Andrew Scull00741a02015-09-16 19:04:09 -0700775 if (Func->isVerbose(IceV_RMW))
776 Func->getContext()->unlockStr();
John Porto7e93c622015-06-23 10:58:57 -0700777}
778
779// Converts a ConstantInteger32 operand into its constant value, or
780// MemoryOrderInvalid if the operand is not a ConstantInteger32.
John Porto5aeed952015-07-21 13:39:09 -0700781inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700782 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
John Porto7e93c622015-06-23 10:58:57 -0700783 return Integer->getValue();
784 return Intrinsics::MemoryOrderInvalid;
785}
786
Andrew Scull57e12682015-09-16 11:30:19 -0700787/// Determines whether the dest of a Load instruction can be folded into one of
788/// the src operands of a 2-operand instruction. This is true as long as the
789/// load dest matches exactly one of the binary instruction's src operands.
790/// Replaces Src0 or Src1 with LoadSrc if the answer is true.
John Porto5aeed952015-07-21 13:39:09 -0700791inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
792 Operand *&Src0, Operand *&Src1) {
John Porto7e93c622015-06-23 10:58:57 -0700793 if (Src0 == LoadDest && Src1 != LoadDest) {
794 Src0 = LoadSrc;
795 return true;
796 }
797 if (Src0 != LoadDest && Src1 == LoadDest) {
798 Src1 = LoadSrc;
799 return true;
800 }
801 return false;
802}
803
John Porto4a566862016-01-04 09:33:41 -0800804template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
Jim Stichnothb88d8c82016-03-11 15:33:00 -0800805 TimerMarker _(TimerStack::TT_loadOpt, Func);
John Porto7e93c622015-06-23 10:58:57 -0700806 for (CfgNode *Node : Func->getNodes()) {
807 Context.init(Node);
808 while (!Context.atEnd()) {
809 Variable *LoadDest = nullptr;
810 Operand *LoadSrc = nullptr;
811 Inst *CurInst = Context.getCur();
812 Inst *Next = Context.getNextInst();
Andrew Scull57e12682015-09-16 11:30:19 -0700813 // Determine whether the current instruction is a Load instruction or
814 // equivalent.
John Porto7e93c622015-06-23 10:58:57 -0700815 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
816 // An InstLoad always qualifies.
817 LoadDest = Load->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700818 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700819 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
820 LoadDest->getType(), DoLegalize);
821 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700822 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
823 // ordering, and can be implemented in a single instruction (i.e., not
824 // i64 on x86-32).
John Porto7e93c622015-06-23 10:58:57 -0700825 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
826 if (ID == Intrinsics::AtomicLoad &&
John Porto1d235422015-08-12 12:37:53 -0700827 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
John Porto7e93c622015-06-23 10:58:57 -0700828 Intrinsics::isMemoryOrderValid(
829 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
830 LoadDest = Intrin->getDest();
Jim Stichnoth5bff61c2015-10-28 09:26:00 -0700831 constexpr bool DoLegalize = false;
John Porto7e93c622015-06-23 10:58:57 -0700832 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
833 DoLegalize);
834 }
835 }
Andrew Scull57e12682015-09-16 11:30:19 -0700836 // A Load instruction can be folded into the following instruction only
837 // if the following instruction ends the Load's Dest variable's live
838 // range.
John Porto7e93c622015-06-23 10:58:57 -0700839 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
840 assert(LoadSrc);
841 Inst *NewInst = nullptr;
842 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
843 Operand *Src0 = Arith->getSrc(0);
844 Operand *Src1 = Arith->getSrc(1);
845 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
846 NewInst = InstArithmetic::create(Func, Arith->getOp(),
847 Arith->getDest(), Src0, Src1);
848 }
849 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
850 Operand *Src0 = Icmp->getSrc(0);
851 Operand *Src1 = Icmp->getSrc(1);
852 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
853 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
854 Icmp->getDest(), Src0, Src1);
855 }
856 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
857 Operand *Src0 = Fcmp->getSrc(0);
858 Operand *Src1 = Fcmp->getSrc(1);
859 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
860 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
861 Fcmp->getDest(), Src0, Src1);
862 }
863 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
864 Operand *Src0 = Select->getTrueOperand();
865 Operand *Src1 = Select->getFalseOperand();
866 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
867 NewInst = InstSelect::create(Func, Select->getDest(),
868 Select->getCondition(), Src0, Src1);
869 }
870 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
Andrew Scull57e12682015-09-16 11:30:19 -0700871 // The load dest can always be folded into a Cast instruction.
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800872 auto *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
John Porto7e93c622015-06-23 10:58:57 -0700873 if (Src0 == LoadDest) {
874 NewInst = InstCast::create(Func, Cast->getCastKind(),
875 Cast->getDest(), LoadSrc);
876 }
877 }
878 if (NewInst) {
879 CurInst->setDeleted();
880 Next->setDeleted();
881 Context.insert(NewInst);
Andrew Scull57e12682015-09-16 11:30:19 -0700882 // Update NewInst->LiveRangesEnded so that target lowering may
883 // benefit. Also update NewInst->HasSideEffects.
John Porto7e93c622015-06-23 10:58:57 -0700884 NewInst->spliceLivenessInfo(Next, CurInst);
885 }
886 }
887 Context.advanceCur();
888 Context.advanceNext();
889 }
890 }
891 Func->dump("After load optimization");
892}
893
John Porto4a566862016-01-04 09:33:41 -0800894template <typename TraitsType>
895bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
896 if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) {
John Porto7e93c622015-06-23 10:58:57 -0700897 return Br->optimizeBranch(NextNode);
898 }
899 return false;
900}
901
John Porto4a566862016-01-04 09:33:41 -0800902template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800903Variable *TargetX86Base<TraitsType>::getPhysicalRegister(RegNumT RegNum,
John Porto4a566862016-01-04 09:33:41 -0800904 Type Ty) {
John Porto7e93c622015-06-23 10:58:57 -0700905 if (Ty == IceType_void)
906 Ty = IceType_i32;
907 if (PhysicalRegisters[Ty].empty())
John Porto5d0acff2015-06-30 15:29:21 -0700908 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800909 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
John Porto7e93c622015-06-23 10:58:57 -0700910 Variable *Reg = PhysicalRegisters[Ty][RegNum];
911 if (Reg == nullptr) {
John Porto5aeed952015-07-21 13:39:09 -0700912 Reg = Func->makeVariable(Ty);
John Porto7e93c622015-06-23 10:58:57 -0700913 Reg->setRegNum(RegNum);
914 PhysicalRegisters[Ty][RegNum] = Reg;
Jim Stichnoth69660552015-09-18 06:41:02 -0700915 // Specially mark a named physical register as an "argument" so that it is
916 // considered live upon function entry. Otherwise it's possible to get
917 // liveness validation errors for saving callee-save registers.
918 Func->addImplicitArg(Reg);
919 // Don't bother tracking the live range of a named physical register.
920 Reg->setIgnoreLiveness();
John Porto7e93c622015-06-23 10:58:57 -0700921 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800922 assert(Traits::getGprForType(Ty, RegNum) == RegNum);
John Porto7e93c622015-06-23 10:58:57 -0700923 return Reg;
924}
925
John Porto4a566862016-01-04 09:33:41 -0800926template <typename TraitsType>
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700927const char *TargetX86Base<TraitsType>::getRegName(RegNumT RegNum,
928 Type Ty) const {
John Porto008f4ce2015-12-24 13:22:18 -0800929 return Traits::getRegName(Traits::getGprForType(Ty, RegNum));
John Porto7e93c622015-06-23 10:58:57 -0700930}
931
John Porto4a566862016-01-04 09:33:41 -0800932template <typename TraitsType>
933void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const {
Jan Voung28068ad2015-07-31 12:58:46 -0700934 if (!BuildDefs::dump())
935 return;
John Porto7e93c622015-06-23 10:58:57 -0700936 Ostream &Str = Ctx->getStrEmit();
937 if (Var->hasReg()) {
John Porto56958cb2016-01-14 09:18:18 -0800938 const bool Is64BitSandboxing = Traits::Is64Bit && NeedSandboxing;
939 const Type VarType = (Var->isRematerializable() && Is64BitSandboxing)
940 ? IceType_i64
941 : Var->getType();
942 Str << "%" << getRegName(Var->getRegNum(), VarType);
John Porto7e93c622015-06-23 10:58:57 -0700943 return;
944 }
Andrew Scull11c9a322015-08-28 14:24:14 -0700945 if (Var->mustHaveReg()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700946 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
Jim Stichnoth45bec542016-02-05 10:26:09 -0800947 ") has no register assigned - function " +
948 Func->getFunctionName());
John Porto7e93c622015-06-23 10:58:57 -0700949 }
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700950 const int32_t Offset = Var->getStackOffset();
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800951 auto BaseRegNum = Var->getBaseRegNum();
Reed Kotler5fa0a5f2016-02-15 20:01:24 -0800952 if (BaseRegNum.hasNoValue())
Jan Voung28068ad2015-07-31 12:58:46 -0700953 BaseRegNum = getFrameOrStackReg();
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700954
Jim Stichnothfe62f0a2016-07-10 05:13:18 -0700955 // Print in the form "Offset(%reg)", omitting Offset when it is 0.
956 if (getFlags().getDecorateAsm()) {
957 Str << Var->getSymbolicStackOffset();
958 } else if (Offset != 0) {
959 Str << Offset;
Jim Stichnoth238b4c12015-10-01 07:46:38 -0700960 }
John Porto1d235422015-08-12 12:37:53 -0700961 const Type FrameSPTy = Traits::WordType;
Jan Voung28068ad2015-07-31 12:58:46 -0700962 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
John Porto7e93c622015-06-23 10:58:57 -0700963}
964
John Porto4a566862016-01-04 09:33:41 -0800965template <typename TraitsType>
966typename TargetX86Base<TraitsType>::X86Address
967TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const {
John Porto7e93c622015-06-23 10:58:57 -0700968 if (Var->hasReg())
Jim Stichnoth8ff4b282016-01-04 15:39:06 -0800969 llvm::report_fatal_error("Stack Variable has a register assigned");
Andrew Scull11c9a322015-08-28 14:24:14 -0700970 if (Var->mustHaveReg()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -0700971 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
Jim Stichnoth45bec542016-02-05 10:26:09 -0800972 ") has no register assigned - function " +
973 Func->getFunctionName());
John Porto7e93c622015-06-23 10:58:57 -0700974 }
975 int32_t Offset = Var->getStackOffset();
Jim Stichnoth8aa39662016-02-10 11:20:30 -0800976 auto BaseRegNum = Var->getBaseRegNum();
Reed Kotler5fa0a5f2016-02-15 20:01:24 -0800977 if (Var->getBaseRegNum().hasNoValue())
Jan Voung28068ad2015-07-31 12:58:46 -0700978 BaseRegNum = getFrameOrStackReg();
John Porto4a566862016-01-04 09:33:41 -0800979 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset,
980 AssemblerFixup::NoFixup);
John Porto7e93c622015-06-23 10:58:57 -0700981}
982
David Sehrb9a404d2016-01-21 08:09:27 -0800983template <typename TraitsType>
984void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
985 // Stack frame layout:
986 //
987 // +------------------------+
988 // | 1. return address |
989 // +------------------------+
990 // | 2. preserved registers |
991 // +------------------------+
992 // | 3. padding |
993 // +------------------------+
994 // | 4. global spill area |
995 // +------------------------+
996 // | 5. padding |
997 // +------------------------+
998 // | 6. local spill area |
999 // +------------------------+
1000 // | 7. padding |
1001 // +------------------------+
1002 // | 8. allocas |
1003 // +------------------------+
1004 // | 9. padding |
1005 // +------------------------+
1006 // | 10. out args |
1007 // +------------------------+ <--- StackPointer
1008 //
1009 // The following variables record the size in bytes of the given areas:
1010 // * X86_RET_IP_SIZE_BYTES: area 1
1011 // * PreservedRegsSizeBytes: area 2
1012 // * SpillAreaPaddingBytes: area 3
1013 // * GlobalsSize: area 4
1014 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
1015 // * LocalsSpillAreaSize: area 6
1016 // * SpillAreaSizeBytes: areas 3 - 10
1017 // * maxOutArgsSizeBytes(): area 10
1018
1019 // Determine stack frame offsets for each Variable without a register
1020 // assignment. This can be done as one variable per stack slot. Or, do
1021 // coalescing by running the register allocator again with an infinite set of
1022 // registers (as a side effect, this gives variables a second chance at
1023 // physical register assignment).
1024 //
1025 // A middle ground approach is to leverage sparsity and allocate one block of
1026 // space on the frame for globals (variables with multi-block lifetime), and
1027 // one block to share for locals (single-block lifetime).
1028
1029 Context.init(Node);
1030 Context.setInsertPoint(Context.getCur());
1031
John Portoe82b5602016-02-24 15:58:55 -08001032 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1033 RegsUsed = SmallBitVector(CalleeSaves.size());
David Sehrb9a404d2016-01-21 08:09:27 -08001034 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
1035 size_t GlobalsSize = 0;
1036 // If there is a separate locals area, this represents that area. Otherwise
1037 // it counts any variable not counted by GlobalsSize.
1038 SpillAreaSizeBytes = 0;
1039 // If there is a separate locals area, this specifies the alignment for it.
1040 uint32_t LocalsSlotsAlignmentBytes = 0;
1041 // The entire spill locations area gets aligned to largest natural alignment
1042 // of the variables that have a spill slot.
1043 uint32_t SpillAreaAlignmentBytes = 0;
1044 // A spill slot linked to a variable with a stack slot should reuse that
1045 // stack slot.
1046 std::function<bool(Variable *)> TargetVarHook =
1047 [&VariablesLinkedToSpillSlots](Variable *Var) {
Jim Stichnothb9a84722016-08-01 13:18:36 -07001048 // TODO(stichnot): Refactor this into the base class.
1049 Variable *Root = Var->getLinkedToStackRoot();
1050 if (Root != nullptr) {
1051 assert(!Root->hasReg());
1052 if (!Root->hasReg()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001053 VariablesLinkedToSpillSlots.push_back(Var);
1054 return true;
1055 }
1056 }
1057 return false;
1058 };
1059
1060 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1061 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1062 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1063 &LocalsSlotsAlignmentBytes, TargetVarHook);
1064 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1065 SpillAreaSizeBytes += GlobalsSize;
1066
1067 // Add push instructions for preserved registers.
1068 uint32_t NumCallee = 0;
1069 size_t PreservedRegsSizeBytes = 0;
John Portoe82b5602016-02-24 15:58:55 -08001070 SmallBitVector Pushed(CalleeSaves.size());
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001071 for (RegNumT i : RegNumBVIter(CalleeSaves)) {
1072 const auto Canonical = Traits::getBaseReg(i);
David Sehrb9a404d2016-01-21 08:09:27 -08001073 assert(Canonical == Traits::getBaseReg(Canonical));
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001074 if (RegsUsed[i]) {
David Sehrb9a404d2016-01-21 08:09:27 -08001075 Pushed[Canonical] = true;
1076 }
1077 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001078 for (RegNumT RegNum : RegNumBVIter(Pushed)) {
1079 assert(RegNum == Traits::getBaseReg(RegNum));
David Sehrb9a404d2016-01-21 08:09:27 -08001080 ++NumCallee;
1081 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001082 _push_reg(getPhysicalRegister(RegNum, Traits::WordType));
David Sehrb9a404d2016-01-21 08:09:27 -08001083 }
1084 Ctx->statsUpdateRegistersSaved(NumCallee);
1085
1086 // Generate "push frameptr; mov frameptr, stackptr"
1087 if (IsEbpBasedFrame) {
1088 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
1089 .count() == 0);
1090 PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
1091 _link_bp();
1092 }
1093
1094 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1095 // after the preserved registers and before the spill areas.
1096 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1097 // locals area if they are separate.
1098 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
1099 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1100 uint32_t SpillAreaPaddingBytes = 0;
1101 uint32_t LocalsSlotsPaddingBytes = 0;
1102 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
1103 SpillAreaAlignmentBytes, GlobalsSize,
1104 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1105 &LocalsSlotsPaddingBytes);
1106 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1107 uint32_t GlobalsAndSubsequentPaddingSize =
1108 GlobalsSize + LocalsSlotsPaddingBytes;
1109
1110 // Functions returning scalar floating point types may need to convert values
1111 // from an in-register xmm value to the top of the x87 floating point stack.
1112 // This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch
1113 // space on the stack for this.
1114 const Type ReturnType = Func->getReturnType();
1115 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1116 if (isScalarFloatingType(ReturnType)) {
1117 // Avoid misaligned double-precicion load/store.
1118 NeedsStackAlignment = true;
1119 SpillAreaSizeBytes =
1120 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
1121 }
1122 }
1123
1124 // Align esp if necessary.
1125 if (NeedsStackAlignment) {
1126 uint32_t StackOffset =
1127 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1128 uint32_t StackSize =
1129 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1130 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
1131 SpillAreaSizeBytes = StackSize - StackOffset;
1132 } else {
1133 SpillAreaSizeBytes += maxOutArgsSizeBytes();
1134 }
1135
1136 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
1137 // fixed allocations in the prolog.
1138 if (PrologEmitsFixedAllocas)
1139 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1140 if (SpillAreaSizeBytes) {
1141 // Generate "sub stackptr, SpillAreaSizeBytes"
1142 _sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1143 // If the fixed allocas are aligned more than the stack frame, align the
1144 // stack pointer accordingly.
1145 if (PrologEmitsFixedAllocas &&
1146 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
1147 assert(IsEbpBasedFrame);
1148 _and(getPhysicalRegister(getStackReg(), Traits::WordType),
1149 Ctx->getConstantInt32(-FixedAllocaAlignBytes));
1150 }
1151 }
1152
1153 // Account for known-frame-offset alloca instructions that were not already
1154 // combined into the prolog.
1155 if (!PrologEmitsFixedAllocas)
1156 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1157
1158 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1159
1160 // Fill in stack offsets for stack args, and copy args into registers for
1161 // those that were register-allocated. Args are pushed right to left, so
1162 // Arg[0] is closest to the stack/frame pointer.
1163 Variable *FramePtr =
1164 getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
1165 size_t BasicFrameOffset =
1166 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1167 if (!IsEbpBasedFrame)
1168 BasicFrameOffset += SpillAreaSizeBytes;
1169
1170 emitGetIP(Node);
1171
1172 const VarList &Args = Func->getArgs();
1173 size_t InArgsSizeBytes = 0;
1174 unsigned NumXmmArgs = 0;
1175 unsigned NumGPRArgs = 0;
1176 for (Variable *Arg : Args) {
1177 // Skip arguments passed in registers.
1178 if (isVectorType(Arg->getType())) {
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001179 if (Traits::getRegisterForXmmArgNum(NumXmmArgs).hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001180 ++NumXmmArgs;
1181 continue;
1182 }
1183 } else if (isScalarFloatingType(Arg->getType())) {
1184 if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001185 Traits::getRegisterForXmmArgNum(NumXmmArgs).hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001186 ++NumXmmArgs;
1187 continue;
1188 }
1189 } else {
1190 assert(isScalarIntegerType(Arg->getType()));
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001191 if (Traits::getRegisterForGprArgNum(Traits::WordType, NumGPRArgs)
1192 .hasValue()) {
David Sehrb9a404d2016-01-21 08:09:27 -08001193 ++NumGPRArgs;
1194 continue;
1195 }
1196 }
1197 // For esp-based frames where the allocas are done outside the prolog, the
1198 // esp value may not stabilize to its home value until after all the
1199 // fixed-size alloca instructions have executed. In this case, a stack
1200 // adjustment is needed when accessing in-args in order to copy them into
1201 // registers.
1202 size_t StackAdjBytes = 0;
1203 if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
1204 StackAdjBytes -= FixedAllocaSizeBytes;
1205 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
1206 InArgsSizeBytes);
1207 }
1208
1209 // Fill in stack offsets for locals.
1210 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1211 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1212 IsEbpBasedFrame);
1213 // Assign stack offsets to variables that have been linked to spilled
1214 // variables.
1215 for (Variable *Var : VariablesLinkedToSpillSlots) {
Jim Stichnothb9a84722016-08-01 13:18:36 -07001216 const Variable *Root = Var->getLinkedToStackRoot();
Jim Stichnothfe62f0a2016-07-10 05:13:18 -07001217 assert(Root != nullptr);
1218 Var->setStackOffset(Root->getStackOffset());
David Sehrb9a404d2016-01-21 08:09:27 -08001219 }
1220 this->HasComputedFrame = true;
1221
1222 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1223 OstreamLocker L(Func->getContext());
1224 Ostream &Str = Func->getContext()->getStrDump();
1225
1226 Str << "Stack layout:\n";
1227 uint32_t EspAdjustmentPaddingSize =
1228 SpillAreaSizeBytes - LocalsSpillAreaSize -
1229 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1230 maxOutArgsSizeBytes();
1231 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1232 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1233 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1234 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1235 << " globals spill area = " << GlobalsSize << " bytes\n"
1236 << " globals-locals spill areas intermediate padding = "
1237 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1238 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1239 << " esp alignment padding = " << EspAdjustmentPaddingSize
1240 << " bytes\n";
1241
1242 Str << "Stack details:\n"
1243 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1244 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1245 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
1246 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1247 << " bytes\n"
1248 << " is ebp based = " << IsEbpBasedFrame << "\n";
1249 }
1250}
1251
Andrew Scull9612d322015-07-06 14:53:25 -07001252/// Helper function for addProlog().
1253///
Andrew Scull57e12682015-09-16 11:30:19 -07001254/// This assumes Arg is an argument passed on the stack. This sets the frame
1255/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1256/// I64 arg that has been split into Lo and Hi components, it calls itself
1257/// recursively on the components, taking care to handle Lo first because of the
1258/// little-endian architecture. Lastly, this function generates an instruction
1259/// to copy Arg into its assigned register if applicable.
John Porto4a566862016-01-04 09:33:41 -08001260template <typename TraitsType>
1261void TargetX86Base<TraitsType>::finishArgumentLowering(
1262 Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset,
1263 size_t StackAdjBytes, size_t &InArgsSizeBytes) {
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001264 if (!Traits::Is64Bit) {
1265 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1266 Variable *Lo = Arg64On32->getLo();
1267 Variable *Hi = Arg64On32->getHi();
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001268 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,
1269 InArgsSizeBytes);
1270 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,
1271 InArgsSizeBytes);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001272 return;
1273 }
John Porto7e93c622015-06-23 10:58:57 -07001274 }
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001275 Type Ty = Arg->getType();
John Porto7e93c622015-06-23 10:58:57 -07001276 if (isVectorType(Ty)) {
1277 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
1278 }
1279 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
1280 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1281 if (Arg->hasReg()) {
John Porto1d235422015-08-12 12:37:53 -07001282 assert(Ty != IceType_i64 || Traits::Is64Bit);
John Porto4a566862016-01-04 09:33:41 -08001283 auto *Mem = X86OperandMem::create(
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001284 Func, Ty, FramePtr,
1285 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
John Porto7e93c622015-06-23 10:58:57 -07001286 if (isVectorType(Arg->getType())) {
1287 _movp(Arg, Mem);
1288 } else {
1289 _mov(Arg, Mem);
1290 }
John Porto4a566862016-01-04 09:33:41 -08001291 // This argument-copying instruction uses an explicit X86OperandMem
Andrew Scull57e12682015-09-16 11:30:19 -07001292 // operand instead of a Variable, so its fill-from-stack operation has to
1293 // be tracked separately for statistics.
John Porto7e93c622015-06-23 10:58:57 -07001294 Ctx->statsUpdateFills();
1295 }
1296}
1297
David Sehrb9a404d2016-01-21 08:09:27 -08001298template <typename TraitsType>
1299void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
1300 InstList &Insts = Node->getInsts();
1301 InstList::reverse_iterator RI, E;
1302 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1303 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
1304 break;
1305 }
1306 if (RI == E)
1307 return;
1308
1309 // Convert the reverse_iterator position into its corresponding (forward)
1310 // iterator position.
1311 InstList::iterator InsertPoint = RI.base();
1312 --InsertPoint;
1313 Context.init(Node);
1314 Context.setInsertPoint(InsertPoint);
1315
1316 if (IsEbpBasedFrame) {
1317 _unlink_bp();
1318 } else {
1319 // add stackptr, SpillAreaSizeBytes
1320 if (SpillAreaSizeBytes != 0) {
1321 _add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
1322 }
1323 }
1324
1325 // Add pop instructions for preserved registers.
John Portoe82b5602016-02-24 15:58:55 -08001326 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1327 SmallBitVector Popped(CalleeSaves.size());
David Sehrb9a404d2016-01-21 08:09:27 -08001328 for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001329 const auto RegNum = RegNumT::fromInt(i);
1330 if (RegNum == getFrameReg() && IsEbpBasedFrame)
David Sehrb9a404d2016-01-21 08:09:27 -08001331 continue;
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001332 const RegNumT Canonical = Traits::getBaseReg(RegNum);
David Sehrb9a404d2016-01-21 08:09:27 -08001333 if (CalleeSaves[i] && RegsUsed[i]) {
1334 Popped[Canonical] = true;
1335 }
1336 }
1337 for (int32_t i = Popped.size() - 1; i >= 0; --i) {
1338 if (!Popped[i])
1339 continue;
Jim Stichnoth8aa39662016-02-10 11:20:30 -08001340 const auto RegNum = RegNumT::fromInt(i);
1341 assert(RegNum == Traits::getBaseReg(RegNum));
1342 _pop(getPhysicalRegister(RegNum, Traits::WordType));
David Sehrb9a404d2016-01-21 08:09:27 -08001343 }
1344
1345 if (!NeedSandboxing) {
1346 return;
1347 }
1348 emitSandboxedReturn();
1349 if (RI->getSrcSize()) {
1350 auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1351 Context.insert<InstFakeUse>(RetValue);
1352 }
1353 RI->setDeleted();
1354}
1355
Jim Stichnothb9a84722016-08-01 13:18:36 -07001356template <typename TraitsType>
1357Inst *TargetX86Base<TraitsType>::createLoweredMove(Variable *Dest,
1358 Variable *SrcVar) {
1359 if (isVectorType(Dest->getType())) {
1360 return Traits::Insts::Movp::create(Func, Dest, SrcVar);
1361 }
1362 return Traits::Insts::Mov::create(Func, Dest, SrcVar);
1363}
1364
John Porto4a566862016-01-04 09:33:41 -08001365template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
John Porto1d235422015-08-12 12:37:53 -07001366 return Traits::WordType;
John Porto7e93c622015-06-23 10:58:57 -07001367}
1368
John Porto4a566862016-01-04 09:33:41 -08001369template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07001370template <typename T>
John Porto1d235422015-08-12 12:37:53 -07001371typename std::enable_if<!T::Is64Bit, Operand>::type *
John Porto4a566862016-01-04 09:33:41 -08001372TargetX86Base<TraitsType>::loOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -07001373 assert(Operand->getType() == IceType_i64 ||
1374 Operand->getType() == IceType_f64);
1375 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1376 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001377 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1378 return Var64On32->getLo();
Jan Voungfbdd2442015-07-15 12:36:20 -07001379 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1380 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001381 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
Jan Voungfbdd2442015-07-15 12:36:20 -07001382 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001383 return legalize(ConstInt);
1384 }
John Porto4a566862016-01-04 09:33:41 -08001385 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
1386 auto *MemOperand = X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001387 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
John Porto56958cb2016-01-14 09:18:18 -08001388 Mem->getShift(), Mem->getSegmentRegister(), Mem->getIsRebased());
John Porto7e93c622015-06-23 10:58:57 -07001389 // Test if we should randomize or pool the offset, if so randomize it or
1390 // pool it then create mem operand with the blinded/pooled constant.
1391 // Otherwise, return the mem operand as ordinary mem operand.
1392 return legalize(MemOperand);
1393 }
1394 llvm_unreachable("Unsupported operand type");
1395 return nullptr;
1396}
1397
John Porto4a566862016-01-04 09:33:41 -08001398template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07001399template <typename T>
1400typename std::enable_if<!T::Is64Bit, Operand>::type *
John Porto4a566862016-01-04 09:33:41 -08001401TargetX86Base<TraitsType>::hiOperand(Operand *Operand) {
John Porto7e93c622015-06-23 10:58:57 -07001402 assert(Operand->getType() == IceType_i64 ||
1403 Operand->getType() == IceType_f64);
1404 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1405 return Operand;
Andrew Scull6d47bcd2015-09-17 17:10:05 -07001406 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1407 return Var64On32->getHi();
Jan Voungfbdd2442015-07-15 12:36:20 -07001408 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1409 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
John Porto7e93c622015-06-23 10:58:57 -07001410 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
Jan Voungfbdd2442015-07-15 12:36:20 -07001411 // Check if we need to blind/pool the constant.
John Porto7e93c622015-06-23 10:58:57 -07001412 return legalize(ConstInt);
1413 }
John Porto4a566862016-01-04 09:33:41 -08001414 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
John Porto7e93c622015-06-23 10:58:57 -07001415 Constant *Offset = Mem->getOffset();
1416 if (Offset == nullptr) {
1417 Offset = Ctx->getConstantInt32(4);
Jan Voungfbdd2442015-07-15 12:36:20 -07001418 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001419 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
Jan Voungfbdd2442015-07-15 12:36:20 -07001420 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07001421 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1422 Offset =
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001423 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName());
John Porto7e93c622015-06-23 10:58:57 -07001424 }
John Porto4a566862016-01-04 09:33:41 -08001425 auto *MemOperand = X86OperandMem::create(
John Porto7e93c622015-06-23 10:58:57 -07001426 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
John Porto56958cb2016-01-14 09:18:18 -08001427 Mem->getShift(), Mem->getSegmentRegister(), Mem->getIsRebased());
John Porto7e93c622015-06-23 10:58:57 -07001428 // Test if the Offset is an eligible i32 constants for randomization and
1429 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1430 // operand.
1431 return legalize(MemOperand);
1432 }
1433 llvm_unreachable("Unsupported operand type");
1434 return nullptr;
1435}
1436
John Porto4a566862016-01-04 09:33:41 -08001437template <typename TraitsType>
John Portoe82b5602016-02-24 15:58:55 -08001438SmallBitVector
John Porto4a566862016-01-04 09:33:41 -08001439TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include,
1440 RegSetMask Exclude) const {
Karl Schimpfd4699942016-04-02 09:55:31 -07001441 return Traits::getRegisterSet(getFlags(), Include, Exclude);
John Porto7e93c622015-06-23 10:58:57 -07001442}
1443
John Porto4a566862016-01-04 09:33:41 -08001444template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001445void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) {
Andrew Scull57e12682015-09-16 11:30:19 -07001446 // Conservatively require the stack to be aligned. Some stack adjustment
1447 // operations implemented below assume that the stack is aligned before the
1448 // alloca. All the alloca code ensures that the stack alignment is preserved
1449 // after the alloca. The stack alignment restriction can be relaxed in some
1450 // cases.
John Porto7e93c622015-06-23 10:58:57 -07001451 NeedsStackAlignment = true;
1452
John Porto7e93c622015-06-23 10:58:57 -07001453 // For default align=0, set it to the real value 1, to avoid any
1454 // bit-manipulation problems below.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001455 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
John Porto7e93c622015-06-23 10:58:57 -07001456
1457 // LLVM enforces power of 2 alignment.
1458 assert(llvm::isPowerOf2_32(AlignmentParam));
1459 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1460
David Sehr26217e32015-11-26 13:03:50 -08001461 const uint32_t Alignment =
John Porto7e93c622015-06-23 10:58:57 -07001462 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
David Sehr26217e32015-11-26 13:03:50 -08001463 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07001464 const bool OptM1 = Func->getOptLevel() == Opt_m1;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001465 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
David Sehr26217e32015-11-26 13:03:50 -08001466 const bool UseFramePointer =
1467 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1468
1469 if (UseFramePointer)
David Sehr4318a412015-11-11 15:01:55 -08001470 setHasFramePointer();
David Sehr26217e32015-11-26 13:03:50 -08001471
John Porto008f4ce2015-12-24 13:22:18 -08001472 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
David Sehr26217e32015-11-26 13:03:50 -08001473 if (OverAligned) {
John Porto7e93c622015-06-23 10:58:57 -07001474 _and(esp, Ctx->getConstantInt32(-Alignment));
1475 }
David Sehr26217e32015-11-26 13:03:50 -08001476
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001477 Variable *Dest = Instr->getDest();
1478 Operand *TotalSize = legalize(Instr->getSizeInBytes());
David Sehr26217e32015-11-26 13:03:50 -08001479
John Porto7e93c622015-06-23 10:58:57 -07001480 if (const auto *ConstantTotalSize =
1481 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
David Sehr26217e32015-11-26 13:03:50 -08001482 const uint32_t Value =
1483 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
John Porto56958cb2016-01-14 09:18:18 -08001484 if (UseFramePointer) {
1485 _sub_sp(Ctx->getConstantInt32(Value));
1486 } else {
David Sehr26217e32015-11-26 13:03:50 -08001487 // If we don't need a Frame Pointer, this alloca has a known offset to the
1488 // stack pointer. We don't need adjust the stack pointer, nor assign any
1489 // value to Dest, as Dest is rematerializable.
1490 assert(Dest->isRematerializable());
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001491 FixedAllocaSizeBytes += Value;
John Porto1d937a82015-12-17 06:19:34 -08001492 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth55f931f2015-09-23 16:33:08 -07001493 }
John Porto7e93c622015-06-23 10:58:57 -07001494 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07001495 // Non-constant sizes need to be adjusted to the next highest multiple of
1496 // the required alignment at runtime.
John Porto56958cb2016-01-14 09:18:18 -08001497 Variable *T = nullptr;
1498 if (Traits::Is64Bit && TotalSize->getType() != IceType_i64 &&
1499 !NeedSandboxing) {
1500 T = makeReg(IceType_i64);
John Porto008f4ce2015-12-24 13:22:18 -08001501 _movzx(T, TotalSize);
1502 } else {
John Porto56958cb2016-01-14 09:18:18 -08001503 T = makeReg(IceType_i32);
John Porto008f4ce2015-12-24 13:22:18 -08001504 _mov(T, TotalSize);
1505 }
John Porto7e93c622015-06-23 10:58:57 -07001506 _add(T, Ctx->getConstantInt32(Alignment - 1));
1507 _and(T, Ctx->getConstantInt32(-Alignment));
John Porto56958cb2016-01-14 09:18:18 -08001508 _sub_sp(T);
John Porto7e93c622015-06-23 10:58:57 -07001509 }
David Sehr26217e32015-11-26 13:03:50 -08001510 // Add enough to the returned address to account for the out args area.
1511 uint32_t OutArgsSize = maxOutArgsSizeBytes();
1512 if (OutArgsSize > 0) {
1513 Variable *T = makeReg(IceType_i32);
John Porto4a566862016-01-04 09:33:41 -08001514 auto *CalculateOperand = X86OperandMem::create(
John Porto56958cb2016-01-14 09:18:18 -08001515 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
David Sehr26217e32015-11-26 13:03:50 -08001516 _lea(T, CalculateOperand);
1517 _mov(Dest, T);
1518 } else {
1519 _mov(Dest, esp);
1520 }
John Porto7e93c622015-06-23 10:58:57 -07001521}
1522
David Sehr0c68bef2016-01-20 10:00:23 -08001523template <typename TraitsType>
1524void TargetX86Base<TraitsType>::lowerArguments() {
Jim Stichnothf5319312016-06-10 12:21:17 -07001525 const bool OptM1 = Func->getOptLevel() == Opt_m1;
David Sehr0c68bef2016-01-20 10:00:23 -08001526 VarList &Args = Func->getArgs();
1527 unsigned NumXmmArgs = 0;
1528 bool XmmSlotsRemain = true;
1529 unsigned NumGprArgs = 0;
1530 bool GprSlotsRemain = true;
1531
1532 Context.init(Func->getEntryNode());
1533 Context.setInsertPoint(Context.getCur());
1534
1535 for (SizeT i = 0, End = Args.size();
1536 i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) {
1537 Variable *Arg = Args[i];
1538 Type Ty = Arg->getType();
1539 Variable *RegisterArg = nullptr;
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001540 RegNumT RegNum;
David Sehr0c68bef2016-01-20 10:00:23 -08001541 if (isVectorType(Ty)) {
1542 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001543 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001544 XmmSlotsRemain = false;
1545 continue;
1546 }
1547 ++NumXmmArgs;
1548 RegisterArg = Func->makeVariable(Ty);
1549 } else if (isScalarFloatingType(Ty)) {
1550 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1551 continue;
1552 }
1553 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001554 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001555 XmmSlotsRemain = false;
1556 continue;
1557 }
1558 ++NumXmmArgs;
1559 RegisterArg = Func->makeVariable(Ty);
1560 } else if (isScalarIntegerType(Ty)) {
1561 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001562 if (RegNum.hasNoValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08001563 GprSlotsRemain = false;
1564 continue;
1565 }
1566 ++NumGprArgs;
1567 RegisterArg = Func->makeVariable(Ty);
1568 }
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08001569 assert(RegNum.hasValue());
David Sehr0c68bef2016-01-20 10:00:23 -08001570 assert(RegisterArg != nullptr);
1571 // Replace Arg in the argument list with the home register. Then generate
1572 // an instruction in the prolog to copy the home register to the assigned
1573 // location of Arg.
1574 if (BuildDefs::dump())
Jim Stichnotha91c3412016-04-05 15:31:43 -07001575 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
David Sehr0c68bef2016-01-20 10:00:23 -08001576 RegisterArg->setRegNum(RegNum);
1577 RegisterArg->setIsArg();
1578 Arg->setIsArg(false);
1579
1580 Args[i] = RegisterArg;
Jim Stichnothf5319312016-06-10 12:21:17 -07001581 // When not Om1, do the assignment through a temporary, instead of directly
1582 // from the pre-colored variable, so that a subsequent availabilityGet()
1583 // call has a chance to work. (In Om1, don't bother creating extra
1584 // instructions with extra variables to register-allocate.)
1585 if (OptM1) {
1586 Context.insert<InstAssign>(Arg, RegisterArg);
1587 } else {
1588 Variable *Tmp = makeReg(RegisterArg->getType());
1589 Context.insert<InstAssign>(Tmp, RegisterArg);
1590 Context.insert<InstAssign>(Arg, Tmp);
1591 }
David Sehr0c68bef2016-01-20 10:00:23 -08001592 }
Jim Stichnothf5319312016-06-10 12:21:17 -07001593 if (!OptM1)
1594 Context.availabilityUpdate();
David Sehr0c68bef2016-01-20 10:00:23 -08001595}
1596
Andrew Scull57e12682015-09-16 11:30:19 -07001597/// Strength-reduce scalar integer multiplication by a constant (for i32 or
1598/// narrower) for certain constants. The lea instruction can be used to multiply
1599/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1600/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1601/// lea-based multiplies by 5, combined with left-shifting by 2.
John Porto4a566862016-01-04 09:33:41 -08001602template <typename TraitsType>
1603bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1604 int32_t Src1) {
Andrew Scull57e12682015-09-16 11:30:19 -07001605 // Disable this optimization for Om1 and O0, just to keep things simple
1606 // there.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07001607 if (Func->getOptLevel() < Opt_1)
John Porto7e93c622015-06-23 10:58:57 -07001608 return false;
1609 Type Ty = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07001610 if (Src1 == -1) {
John Porto56958cb2016-01-14 09:18:18 -08001611 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001612 _mov(T, Src0);
1613 _neg(T);
1614 _mov(Dest, T);
1615 return true;
1616 }
1617 if (Src1 == 0) {
1618 _mov(Dest, Ctx->getConstantZero(Ty));
1619 return true;
1620 }
1621 if (Src1 == 1) {
John Porto56958cb2016-01-14 09:18:18 -08001622 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07001623 _mov(T, Src0);
1624 _mov(Dest, T);
1625 return true;
1626 }
1627 // Don't bother with the edge case where Src1 == MININT.
1628 if (Src1 == -Src1)
1629 return false;
1630 const bool Src1IsNegative = Src1 < 0;
1631 if (Src1IsNegative)
1632 Src1 = -Src1;
1633 uint32_t Count9 = 0;
1634 uint32_t Count5 = 0;
1635 uint32_t Count3 = 0;
1636 uint32_t Count2 = 0;
1637 uint32_t CountOps = 0;
1638 while (Src1 > 1) {
1639 if (Src1 % 9 == 0) {
1640 ++CountOps;
1641 ++Count9;
1642 Src1 /= 9;
1643 } else if (Src1 % 5 == 0) {
1644 ++CountOps;
1645 ++Count5;
1646 Src1 /= 5;
1647 } else if (Src1 % 3 == 0) {
1648 ++CountOps;
1649 ++Count3;
1650 Src1 /= 3;
1651 } else if (Src1 % 2 == 0) {
1652 if (Count2 == 0)
1653 ++CountOps;
1654 ++Count2;
1655 Src1 /= 2;
1656 } else {
1657 return false;
1658 }
1659 }
John Porto56958cb2016-01-14 09:18:18 -08001660 // Lea optimization only works for i16 and i32 types, not i8.
1661 if (Ty != IceType_i32 && !(Traits::Is64Bit && Ty == IceType_i64) &&
1662 (Count3 || Count5 || Count9))
John Porto7e93c622015-06-23 10:58:57 -07001663 return false;
Andrew Scull57e12682015-09-16 11:30:19 -07001664 // Limit the number of lea/shl operations for a single multiply, to a
1665 // somewhat arbitrary choice of 3.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001666 constexpr uint32_t MaxOpsForOptimizedMul = 3;
John Porto7e93c622015-06-23 10:58:57 -07001667 if (CountOps > MaxOpsForOptimizedMul)
1668 return false;
John Porto56958cb2016-01-14 09:18:18 -08001669 Variable *T = makeReg(Traits::WordType);
1670 if (typeWidthInBytes(Src0->getType()) < typeWidthInBytes(T->getType())) {
1671 _movzx(T, Src0);
1672 } else {
1673 _mov(T, Src0);
1674 }
John Porto7e93c622015-06-23 10:58:57 -07001675 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1676 for (uint32_t i = 0; i < Count9; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001677 constexpr uint16_t Shift = 3; // log2(9-1)
John Porto4a566862016-01-04 09:33:41 -08001678 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001679 }
1680 for (uint32_t i = 0; i < Count5; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001681 constexpr uint16_t Shift = 2; // log2(5-1)
John Porto4a566862016-01-04 09:33:41 -08001682 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001683 }
1684 for (uint32_t i = 0; i < Count3; ++i) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001685 constexpr uint16_t Shift = 1; // log2(3-1)
John Porto4a566862016-01-04 09:33:41 -08001686 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
John Porto7e93c622015-06-23 10:58:57 -07001687 }
1688 if (Count2) {
1689 _shl(T, Ctx->getConstantInt(Ty, Count2));
1690 }
1691 if (Src1IsNegative)
1692 _neg(T);
1693 _mov(Dest, T);
1694 return true;
1695}
1696
John Porto4a566862016-01-04 09:33:41 -08001697template <typename TraitsType>
1698void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,
1699 Operand *Src0Lo, Operand *Src0Hi,
1700 Operand *Src1Lo, Variable *DestLo,
1701 Variable *DestHi) {
David Sehr188eae52015-09-24 11:42:55 -07001702 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1703 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1704 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1705 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1706 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1707 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1708 if (ShiftAmount > 32) {
1709 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);
1710 switch (Op) {
1711 default:
1712 assert(0 && "non-shift op");
1713 break;
1714 case InstArithmetic::Shl: {
1715 // a=b<<c ==>
1716 // t2 = b.lo
1717 // t2 = shl t2, ShiftAmount-32
1718 // t3 = t2
1719 // t2 = 0
1720 _mov(T_2, Src0Lo);
1721 _shl(T_2, ReducedShift);
1722 _mov(DestHi, T_2);
1723 _mov(DestLo, Zero);
1724 } break;
1725 case InstArithmetic::Lshr: {
1726 // a=b>>c (unsigned) ==>
1727 // t2 = b.hi
1728 // t2 = shr t2, ShiftAmount-32
1729 // a.lo = t2
1730 // a.hi = 0
1731 _mov(T_2, Src0Hi);
1732 _shr(T_2, ReducedShift);
1733 _mov(DestLo, T_2);
1734 _mov(DestHi, Zero);
1735 } break;
1736 case InstArithmetic::Ashr: {
1737 // a=b>>c (signed) ==>
1738 // t3 = b.hi
1739 // t3 = sar t3, 0x1f
1740 // t2 = b.hi
1741 // t2 = shrd t2, t3, ShiftAmount-32
1742 // a.lo = t2
1743 // a.hi = t3
1744 _mov(T_3, Src0Hi);
1745 _sar(T_3, SignExtend);
1746 _mov(T_2, Src0Hi);
1747 _shrd(T_2, T_3, ReducedShift);
1748 _mov(DestLo, T_2);
1749 _mov(DestHi, T_3);
1750 } break;
1751 }
1752 } else if (ShiftAmount == 32) {
1753 switch (Op) {
1754 default:
1755 assert(0 && "non-shift op");
1756 break;
1757 case InstArithmetic::Shl: {
1758 // a=b<<c ==>
1759 // t2 = b.lo
1760 // a.hi = t2
1761 // a.lo = 0
1762 _mov(T_2, Src0Lo);
1763 _mov(DestHi, T_2);
1764 _mov(DestLo, Zero);
1765 } break;
1766 case InstArithmetic::Lshr: {
1767 // a=b>>c (unsigned) ==>
1768 // t2 = b.hi
1769 // a.lo = t2
1770 // a.hi = 0
1771 _mov(T_2, Src0Hi);
1772 _mov(DestLo, T_2);
1773 _mov(DestHi, Zero);
1774 } break;
1775 case InstArithmetic::Ashr: {
1776 // a=b>>c (signed) ==>
1777 // t2 = b.hi
1778 // a.lo = t2
1779 // t3 = b.hi
1780 // t3 = sar t3, 0x1f
1781 // a.hi = t3
1782 _mov(T_2, Src0Hi);
1783 _mov(DestLo, T_2);
1784 _mov(T_3, Src0Hi);
1785 _sar(T_3, SignExtend);
1786 _mov(DestHi, T_3);
1787 } break;
1788 }
1789 } else {
1790 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1791 // t2 = b.lo
1792 // t3 = b.hi
1793 _mov(T_2, Src0Lo);
1794 _mov(T_3, Src0Hi);
1795 switch (Op) {
1796 default:
1797 assert(0 && "non-shift op");
1798 break;
1799 case InstArithmetic::Shl: {
1800 // a=b<<c ==>
1801 // t3 = shld t3, t2, ShiftAmount
1802 // t2 = shl t2, ShiftAmount
1803 _shld(T_3, T_2, ConstantShiftAmount);
1804 _shl(T_2, ConstantShiftAmount);
1805 } break;
1806 case InstArithmetic::Lshr: {
1807 // a=b>>c (unsigned) ==>
1808 // t2 = shrd t2, t3, ShiftAmount
1809 // t3 = shr t3, ShiftAmount
1810 _shrd(T_2, T_3, ConstantShiftAmount);
1811 _shr(T_3, ConstantShiftAmount);
1812 } break;
1813 case InstArithmetic::Ashr: {
1814 // a=b>>c (signed) ==>
1815 // t2 = shrd t2, t3, ShiftAmount
1816 // t3 = sar t3, ShiftAmount
1817 _shrd(T_2, T_3, ConstantShiftAmount);
1818 _sar(T_3, ConstantShiftAmount);
1819 } break;
1820 }
1821 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1822 // a.lo = t2
1823 // a.hi = t3
1824 _mov(DestLo, T_2);
1825 _mov(DestHi, T_3);
1826 }
1827 } else {
1828 // NON-CONSTANT CASES.
1829 Constant *BitTest = Ctx->getConstantInt32(0x20);
John Porto4a566862016-01-04 09:33:41 -08001830 InstX86Label *Label = InstX86Label::create(Func, this);
David Sehr188eae52015-09-24 11:42:55 -07001831 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1832 // t1:ecx = c.lo & 0xff
1833 // t2 = b.lo
1834 // t3 = b.hi
Jim Stichnothc59288b2015-11-09 11:38:40 -08001835 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
David Sehr188eae52015-09-24 11:42:55 -07001836 _mov(T_2, Src0Lo);
1837 _mov(T_3, Src0Hi);
1838 switch (Op) {
1839 default:
1840 assert(0 && "non-shift op");
1841 break;
1842 case InstArithmetic::Shl: {
1843 // a=b<<c ==>
1844 // t3 = shld t3, t2, t1
1845 // t2 = shl t2, t1
1846 // test t1, 0x20
1847 // je L1
1848 // use(t3)
1849 // t3 = t2
1850 // t2 = 0
1851 _shld(T_3, T_2, T_1);
1852 _shl(T_2, T_1);
1853 _test(T_1, BitTest);
1854 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001855 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001856 // flow, so we need to use _redefined to avoid liveness problems.
1857 _redefined(_mov(T_3, T_2));
1858 _redefined(_mov(T_2, Zero));
David Sehr188eae52015-09-24 11:42:55 -07001859 } break;
1860 case InstArithmetic::Lshr: {
1861 // a=b>>c (unsigned) ==>
1862 // t2 = shrd t2, t3, t1
1863 // t3 = shr t3, t1
1864 // test t1, 0x20
1865 // je L1
1866 // use(t2)
1867 // t2 = t3
1868 // t3 = 0
1869 _shrd(T_2, T_3, T_1);
1870 _shr(T_3, T_1);
1871 _test(T_1, BitTest);
1872 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001873 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001874 // flow, so we need to use _redefined to avoid liveness problems.
1875 _redefined(_mov(T_2, T_3));
1876 _redefined(_mov(T_3, Zero));
David Sehr188eae52015-09-24 11:42:55 -07001877 } break;
1878 case InstArithmetic::Ashr: {
1879 // a=b>>c (signed) ==>
1880 // t2 = shrd t2, t3, t1
1881 // t3 = sar t3, t1
1882 // test t1, 0x20
1883 // je L1
1884 // use(t2)
1885 // t2 = t3
1886 // t3 = sar t3, 0x1f
1887 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1888 _shrd(T_2, T_3, T_1);
1889 _sar(T_3, T_1);
1890 _test(T_1, BitTest);
1891 _br(Traits::Cond::Br_e, Label);
Jim Stichnoth230d4102015-09-25 17:40:32 -07001892 // T_2 and T_3 are being assigned again because of the intra-block control
David Sehre3984282015-12-15 17:34:55 -08001893 // flow, so T_2 needs to use _redefined to avoid liveness problems. T_3
1894 // doesn't need special treatment because it is reassigned via _sar
1895 // instead of _mov.
1896 _redefined(_mov(T_2, T_3));
David Sehr188eae52015-09-24 11:42:55 -07001897 _sar(T_3, SignExtend);
1898 } break;
1899 }
1900 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1901 // L1:
1902 // a.lo = t2
1903 // a.hi = t3
1904 Context.insert(Label);
1905 _mov(DestLo, T_2);
1906 _mov(DestHi, T_3);
1907 }
1908}
1909
John Porto4a566862016-01-04 09:33:41 -08001910template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001911void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Instr) {
1912 Variable *Dest = Instr->getDest();
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001913 if (Dest->isRematerializable()) {
John Porto1d937a82015-12-17 06:19:34 -08001914 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001915 return;
1916 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001917 Type Ty = Dest->getType();
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001918 Operand *Src0 = legalize(Instr->getSrc(0));
1919 Operand *Src1 = legalize(Instr->getSrc(1));
1920 if (Instr->isCommutative()) {
David Sehr487bad02015-10-06 17:41:26 -07001921 uint32_t SwapCount = 0;
1922 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001923 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001924 ++SwapCount;
1925 }
1926 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07001927 std::swap(Src0, Src1);
David Sehr487bad02015-10-06 17:41:26 -07001928 ++SwapCount;
1929 }
1930 // Improve two-address code patterns by avoiding a copy to the dest
1931 // register when one of the source operands ends its lifetime here.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001932 if (!Instr->isLastUse(Src0) && Instr->isLastUse(Src1)) {
David Sehr487bad02015-10-06 17:41:26 -07001933 std::swap(Src0, Src1);
1934 ++SwapCount;
1935 }
1936 assert(SwapCount <= 1);
Karl Schimpfa313a122015-10-08 10:40:57 -07001937 (void)SwapCount;
John Porto7e93c622015-06-23 10:58:57 -07001938 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08001939 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto1d235422015-08-12 12:37:53 -07001940 // These x86-32 helper-call-involved instructions are lowered in this
Andrew Scull57e12682015-09-16 11:30:19 -07001941 // separate switch. This is because loOperand() and hiOperand() may insert
1942 // redundant instructions for constant blinding and pooling. Such redundant
1943 // instructions will fail liveness analysis under -Om1 setting. And,
1944 // actually these arguments do not need to be processed with loOperand()
1945 // and hiOperand() to be used.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001946 switch (Instr->getOp()) {
David Sehr26217e32015-11-26 13:03:50 -08001947 case InstArithmetic::Udiv:
1948 case InstArithmetic::Sdiv:
1949 case InstArithmetic::Urem:
1950 case InstArithmetic::Srem:
1951 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07001952 return;
John Porto7e93c622015-06-23 10:58:57 -07001953 default:
1954 break;
1955 }
1956
Jim Stichnoth54f3d512015-12-11 09:53:00 -08001957 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1958 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07001959 Operand *Src0Lo = loOperand(Src0);
1960 Operand *Src0Hi = hiOperand(Src0);
1961 Operand *Src1Lo = loOperand(Src1);
1962 Operand *Src1Hi = hiOperand(Src1);
1963 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001964 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07001965 case InstArithmetic::_num:
1966 llvm_unreachable("Unknown arithmetic operator");
1967 break;
1968 case InstArithmetic::Add:
1969 _mov(T_Lo, Src0Lo);
1970 _add(T_Lo, Src1Lo);
1971 _mov(DestLo, T_Lo);
1972 _mov(T_Hi, Src0Hi);
1973 _adc(T_Hi, Src1Hi);
1974 _mov(DestHi, T_Hi);
1975 break;
1976 case InstArithmetic::And:
1977 _mov(T_Lo, Src0Lo);
1978 _and(T_Lo, Src1Lo);
1979 _mov(DestLo, T_Lo);
1980 _mov(T_Hi, Src0Hi);
1981 _and(T_Hi, Src1Hi);
1982 _mov(DestHi, T_Hi);
1983 break;
1984 case InstArithmetic::Or:
1985 _mov(T_Lo, Src0Lo);
1986 _or(T_Lo, Src1Lo);
1987 _mov(DestLo, T_Lo);
1988 _mov(T_Hi, Src0Hi);
1989 _or(T_Hi, Src1Hi);
1990 _mov(DestHi, T_Hi);
1991 break;
1992 case InstArithmetic::Xor:
1993 _mov(T_Lo, Src0Lo);
1994 _xor(T_Lo, Src1Lo);
1995 _mov(DestLo, T_Lo);
1996 _mov(T_Hi, Src0Hi);
1997 _xor(T_Hi, Src1Hi);
1998 _mov(DestHi, T_Hi);
1999 break;
2000 case InstArithmetic::Sub:
2001 _mov(T_Lo, Src0Lo);
2002 _sub(T_Lo, Src1Lo);
2003 _mov(DestLo, T_Lo);
2004 _mov(T_Hi, Src0Hi);
2005 _sbb(T_Hi, Src1Hi);
2006 _mov(DestHi, T_Hi);
2007 break;
2008 case InstArithmetic::Mul: {
2009 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
John Porto5d0acff2015-06-30 15:29:21 -07002010 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2011 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
John Porto7e93c622015-06-23 10:58:57 -07002012 // gcc does the following:
2013 // a=b*c ==>
2014 // t1 = b.hi; t1 *=(imul) c.lo
2015 // t2 = c.hi; t2 *=(imul) b.lo
2016 // t3:eax = b.lo
2017 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
2018 // a.lo = t4.lo
2019 // t4.hi += t1
2020 // t4.hi += t2
2021 // a.hi = t4.hi
2022 // The mul instruction cannot take an immediate operand.
2023 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
2024 _mov(T_1, Src0Hi);
2025 _imul(T_1, Src1Lo);
John Porto5d0acff2015-06-30 15:29:21 -07002026 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
John Porto7e93c622015-06-23 10:58:57 -07002027 _mul(T_4Lo, T_3, Src1Lo);
Andrew Scull57e12682015-09-16 11:30:19 -07002028 // The mul instruction produces two dest variables, edx:eax. We create a
2029 // fake definition of edx to account for this.
John Porto1d937a82015-12-17 06:19:34 -08002030 Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
Jim Stichnoth28df6ba2016-02-05 15:43:24 -08002031 Context.insert<InstFakeUse>(T_4Hi);
John Porto7e93c622015-06-23 10:58:57 -07002032 _mov(DestLo, T_4Lo);
2033 _add(T_4Hi, T_1);
Jim Stichnothb40595a2016-01-29 06:14:31 -08002034 _mov(T_2, Src1Hi);
2035 _imul(T_2, Src0Lo);
John Porto7e93c622015-06-23 10:58:57 -07002036 _add(T_4Hi, T_2);
2037 _mov(DestHi, T_4Hi);
2038 } break;
David Sehr188eae52015-09-24 11:42:55 -07002039 case InstArithmetic::Shl:
2040 case InstArithmetic::Lshr:
2041 case InstArithmetic::Ashr:
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002042 lowerShift64(Instr->getOp(), Src0Lo, Src0Hi, Src1Lo, DestLo, DestHi);
David Sehr188eae52015-09-24 11:42:55 -07002043 break;
John Porto7e93c622015-06-23 10:58:57 -07002044 case InstArithmetic::Fadd:
2045 case InstArithmetic::Fsub:
2046 case InstArithmetic::Fmul:
2047 case InstArithmetic::Fdiv:
2048 case InstArithmetic::Frem:
2049 llvm_unreachable("FP instruction with i64 type");
2050 break;
2051 case InstArithmetic::Udiv:
2052 case InstArithmetic::Sdiv:
2053 case InstArithmetic::Urem:
2054 case InstArithmetic::Srem:
2055 llvm_unreachable("Call-helper-involved instruction for i64 type \
2056 should have already been handled before");
2057 break;
2058 }
2059 return;
2060 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002061 if (isVectorType(Ty)) {
Andrew Scull57e12682015-09-16 11:30:19 -07002062 // TODO: Trap on integer divide and integer modulo by zero. See:
2063 // https://code.google.com/p/nativeclient/issues/detail?id=3899
John Porto4a566862016-01-04 09:33:41 -08002064 if (llvm::isa<X86OperandMem>(Src1))
Andrew Scull97f460d2015-07-21 10:07:42 -07002065 Src1 = legalizeToReg(Src1);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002066 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07002067 case InstArithmetic::_num:
2068 llvm_unreachable("Unknown arithmetic operator");
2069 break;
2070 case InstArithmetic::Add: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002071 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002072 _movp(T, Src0);
2073 _padd(T, Src1);
2074 _movp(Dest, T);
2075 } break;
2076 case InstArithmetic::And: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002077 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002078 _movp(T, Src0);
2079 _pand(T, Src1);
2080 _movp(Dest, T);
2081 } break;
2082 case InstArithmetic::Or: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002083 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002084 _movp(T, Src0);
2085 _por(T, Src1);
2086 _movp(Dest, T);
2087 } break;
2088 case InstArithmetic::Xor: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002089 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002090 _movp(T, Src0);
2091 _pxor(T, Src1);
2092 _movp(Dest, T);
2093 } break;
2094 case InstArithmetic::Sub: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002095 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002096 _movp(T, Src0);
2097 _psub(T, Src1);
2098 _movp(Dest, T);
2099 } break;
2100 case InstArithmetic::Mul: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002101 bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
John Porto7e93c622015-06-23 10:58:57 -07002102 bool InstructionSetIsValidForPmull =
Jim Stichnothc59288b2015-11-09 11:38:40 -08002103 Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
John Porto7e93c622015-06-23 10:58:57 -07002104 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002105 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002106 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002107 _pmull(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002108 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002109 } else if (Ty == IceType_v4i32) {
John Porto7e93c622015-06-23 10:58:57 -07002110 // Lowering sequence:
2111 // Note: The mask arguments have index 0 on the left.
2112 //
2113 // movups T1, Src0
2114 // pshufd T2, Src0, {1,0,3,0}
2115 // pshufd T3, Src1, {1,0,3,0}
2116 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
2117 // pmuludq T1, Src1
2118 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
2119 // pmuludq T2, T3
2120 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
2121 // shufps T1, T2, {0,2,0,2}
2122 // pshufd T4, T1, {0,2,1,3}
2123 // movups Dest, T4
2124
2125 // Mask that directs pshufd to create a vector with entries
2126 // Src[1, 0, 3, 0]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002127 constexpr unsigned Constant1030 = 0x31;
John Porto7e93c622015-06-23 10:58:57 -07002128 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
2129 // Mask that directs shufps to create a vector with entries
2130 // Dest[0, 2], Src[0, 2]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002131 constexpr unsigned Mask0202 = 0x88;
John Porto7e93c622015-06-23 10:58:57 -07002132 // Mask that directs pshufd to create a vector with entries
2133 // Src[0, 2, 1, 3]
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002134 constexpr unsigned Mask0213 = 0xd8;
John Porto7e93c622015-06-23 10:58:57 -07002135 Variable *T1 = makeReg(IceType_v4i32);
2136 Variable *T2 = makeReg(IceType_v4i32);
2137 Variable *T3 = makeReg(IceType_v4i32);
2138 Variable *T4 = makeReg(IceType_v4i32);
2139 _movp(T1, Src0);
2140 _pshufd(T2, Src0, Mask1030);
2141 _pshufd(T3, Src1, Mask1030);
2142 _pmuludq(T1, Src1);
2143 _pmuludq(T2, T3);
2144 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
2145 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
2146 _movp(Dest, T4);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002147 } else if (Ty == IceType_v16i8) {
David Sehr26217e32015-11-26 13:03:50 -08002148 llvm::report_fatal_error("Scalarized operation was expected");
Jim Stichnothebbb5912015-10-05 15:12:09 -07002149 } else {
2150 llvm::report_fatal_error("Invalid vector multiply type");
John Porto7e93c622015-06-23 10:58:57 -07002151 }
2152 } break;
2153 case InstArithmetic::Shl:
2154 case InstArithmetic::Lshr:
2155 case InstArithmetic::Ashr:
2156 case InstArithmetic::Udiv:
2157 case InstArithmetic::Urem:
2158 case InstArithmetic::Sdiv:
2159 case InstArithmetic::Srem:
David Sehr26217e32015-11-26 13:03:50 -08002160 llvm::report_fatal_error("Scalarized operation was expected");
John Porto7e93c622015-06-23 10:58:57 -07002161 break;
2162 case InstArithmetic::Fadd: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002163 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002164 _movp(T, Src0);
2165 _addps(T, Src1);
2166 _movp(Dest, T);
2167 } break;
2168 case InstArithmetic::Fsub: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002169 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002170 _movp(T, Src0);
2171 _subps(T, Src1);
2172 _movp(Dest, T);
2173 } break;
2174 case InstArithmetic::Fmul: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002175 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002176 _movp(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002177 _mulps(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002178 _movp(Dest, T);
2179 } break;
2180 case InstArithmetic::Fdiv: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002181 Variable *T = makeReg(Ty);
John Porto7e93c622015-06-23 10:58:57 -07002182 _movp(T, Src0);
2183 _divps(T, Src1);
2184 _movp(Dest, T);
2185 } break;
2186 case InstArithmetic::Frem:
David Sehr26217e32015-11-26 13:03:50 -08002187 llvm::report_fatal_error("Scalarized operation was expected");
John Porto7e93c622015-06-23 10:58:57 -07002188 break;
2189 }
2190 return;
2191 }
2192 Variable *T_edx = nullptr;
2193 Variable *T = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002194 switch (Instr->getOp()) {
John Porto7e93c622015-06-23 10:58:57 -07002195 case InstArithmetic::_num:
2196 llvm_unreachable("Unknown arithmetic operator");
2197 break;
2198 case InstArithmetic::Add:
2199 _mov(T, Src0);
2200 _add(T, Src1);
2201 _mov(Dest, T);
2202 break;
2203 case InstArithmetic::And:
2204 _mov(T, Src0);
2205 _and(T, Src1);
2206 _mov(Dest, T);
2207 break;
2208 case InstArithmetic::Or:
2209 _mov(T, Src0);
2210 _or(T, Src1);
2211 _mov(Dest, T);
2212 break;
2213 case InstArithmetic::Xor:
2214 _mov(T, Src0);
2215 _xor(T, Src1);
2216 _mov(Dest, T);
2217 break;
2218 case InstArithmetic::Sub:
2219 _mov(T, Src0);
2220 _sub(T, Src1);
2221 _mov(Dest, T);
2222 break;
2223 case InstArithmetic::Mul:
2224 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2225 if (optimizeScalarMul(Dest, Src0, C->getValue()))
2226 return;
2227 }
Andrew Scull57e12682015-09-16 11:30:19 -07002228 // The 8-bit version of imul only allows the form "imul r/m8" where T must
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002229 // be in al.
Jim Stichnothc59288b2015-11-09 11:38:40 -08002230 if (isByteSizedArithType(Ty)) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002231 _mov(T, Src0, Traits::RegisterSet::Reg_al);
John Porto7e93c622015-06-23 10:58:57 -07002232 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
David Sehre11f8782015-10-06 10:26:57 -07002233 _imul(T, Src0 == Src1 ? T : Src1);
2234 _mov(Dest, T);
2235 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002236 T = makeReg(Ty);
David Sehre11f8782015-10-06 10:26:57 -07002237 _imul_imm(T, Src0, ImmConst);
2238 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002239 } else {
2240 _mov(T, Src0);
David Sehre11f8782015-10-06 10:26:57 -07002241 _imul(T, Src0 == Src1 ? T : Src1);
2242 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002243 }
John Porto7e93c622015-06-23 10:58:57 -07002244 break;
2245 case InstArithmetic::Shl:
2246 _mov(T, Src0);
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07002247 if (!llvm::isa<ConstantInteger32>(Src1) &&
2248 !llvm::isa<ConstantInteger64>(Src1))
Jim Stichnothc59288b2015-11-09 11:38:40 -08002249 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002250 _shl(T, Src1);
2251 _mov(Dest, T);
2252 break;
2253 case InstArithmetic::Lshr:
2254 _mov(T, Src0);
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07002255 if (!llvm::isa<ConstantInteger32>(Src1) &&
2256 !llvm::isa<ConstantInteger64>(Src1))
Jim Stichnothc59288b2015-11-09 11:38:40 -08002257 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002258 _shr(T, Src1);
2259 _mov(Dest, T);
2260 break;
2261 case InstArithmetic::Ashr:
2262 _mov(T, Src0);
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07002263 if (!llvm::isa<ConstantInteger32>(Src1) &&
2264 !llvm::isa<ConstantInteger64>(Src1))
Jim Stichnothc59288b2015-11-09 11:38:40 -08002265 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
John Porto7e93c622015-06-23 10:58:57 -07002266 _sar(T, Src1);
2267 _mov(Dest, T);
2268 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002269 case InstArithmetic::Udiv: {
John Porto7e93c622015-06-23 10:58:57 -07002270 // div and idiv are the few arithmetic operators that do not allow
2271 // immediates as the operand.
2272 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002273 RegNumT Eax;
2274 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002275 switch (Ty) {
2276 default:
John Porto3c275ce2015-12-22 08:14:00 -08002277 llvm::report_fatal_error("Bad type for udiv");
2278 case IceType_i64:
2279 Eax = Traits::getRaxOrDie();
2280 Edx = Traits::getRdxOrDie();
John Porto008f4ce2015-12-24 13:22:18 -08002281 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002282 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002283 Eax = Traits::RegisterSet::Reg_eax;
2284 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002285 break;
2286 case IceType_i16:
2287 Eax = Traits::RegisterSet::Reg_ax;
2288 Edx = Traits::RegisterSet::Reg_dx;
2289 break;
2290 case IceType_i8:
2291 Eax = Traits::RegisterSet::Reg_al;
2292 Edx = Traits::RegisterSet::Reg_ah;
2293 break;
John Porto7e93c622015-06-23 10:58:57 -07002294 }
John Porto008f4ce2015-12-24 13:22:18 -08002295 T_edx = makeReg(Ty, Edx);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002296 _mov(T, Src0, Eax);
John Porto008f4ce2015-12-24 13:22:18 -08002297 _mov(T_edx, Ctx->getConstantZero(Ty));
Jim Stichnoth017a5532016-07-19 06:35:52 -07002298 _div(T_edx, Src1, T);
2299 _redefined(Context.insert<InstFakeDef>(T, T_edx));
Jim Stichnothc59288b2015-11-09 11:38:40 -08002300 _mov(Dest, T);
2301 } break;
John Porto7e93c622015-06-23 10:58:57 -07002302 case InstArithmetic::Sdiv:
Andrew Scull57e12682015-09-16 11:30:19 -07002303 // TODO(stichnot): Enable this after doing better performance and cross
2304 // testing.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07002305 if (false && Func->getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07002306 // Optimize division by constant power of 2, but not for Om1 or O0, just
2307 // to keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07002308 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002309 const int32_t Divisor = C->getValue();
2310 const uint32_t UDivisor = Divisor;
John Porto7e93c622015-06-23 10:58:57 -07002311 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2312 uint32_t LogDiv = llvm::Log2_32(UDivisor);
John Porto7e93c622015-06-23 10:58:57 -07002313 // LLVM does the following for dest=src/(1<<log):
2314 // t=src
2315 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2316 // shr t,typewidth-log
2317 // add t,src
2318 // sar t,log
2319 // dest=t
2320 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2321 _mov(T, Src0);
Andrew Scull57e12682015-09-16 11:30:19 -07002322 // If for some reason we are dividing by 1, just treat it like an
2323 // assignment.
John Porto7e93c622015-06-23 10:58:57 -07002324 if (LogDiv > 0) {
2325 // The initial sar is unnecessary when dividing by 2.
2326 if (LogDiv > 1)
2327 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2328 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2329 _add(T, Src0);
2330 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
2331 }
2332 _mov(Dest, T);
2333 return;
2334 }
2335 }
2336 }
2337 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002338 switch (Ty) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002339 default:
John Porto3c275ce2015-12-22 08:14:00 -08002340 llvm::report_fatal_error("Bad type for sdiv");
2341 case IceType_i64:
2342 T_edx = makeReg(Ty, Traits::getRdxOrDie());
2343 _mov(T, Src0, Traits::getRaxOrDie());
2344 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002345 case IceType_i32:
2346 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
John Porto5d0acff2015-06-30 15:29:21 -07002347 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002348 break;
2349 case IceType_i16:
2350 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
2351 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
2352 break;
2353 case IceType_i8:
2354 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
2355 _mov(T, Src0, Traits::RegisterSet::Reg_al);
2356 break;
John Porto7e93c622015-06-23 10:58:57 -07002357 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002358 _cbwdq(T_edx, T);
Jim Stichnoth017a5532016-07-19 06:35:52 -07002359 _idiv(T_edx, Src1, T);
2360 _redefined(Context.insert<InstFakeDef>(T, T_edx));
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002361 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002362 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002363 case InstArithmetic::Urem: {
John Porto7e93c622015-06-23 10:58:57 -07002364 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002365 RegNumT Eax;
2366 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002367 switch (Ty) {
2368 default:
John Porto3c275ce2015-12-22 08:14:00 -08002369 llvm::report_fatal_error("Bad type for urem");
2370 case IceType_i64:
2371 Eax = Traits::getRaxOrDie();
2372 Edx = Traits::getRdxOrDie();
2373 break;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002374 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002375 Eax = Traits::RegisterSet::Reg_eax;
2376 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002377 break;
2378 case IceType_i16:
2379 Eax = Traits::RegisterSet::Reg_ax;
2380 Edx = Traits::RegisterSet::Reg_dx;
2381 break;
2382 case IceType_i8:
2383 Eax = Traits::RegisterSet::Reg_al;
2384 Edx = Traits::RegisterSet::Reg_ah;
2385 break;
John Porto7e93c622015-06-23 10:58:57 -07002386 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002387 T_edx = makeReg(Ty, Edx);
2388 _mov(T_edx, Ctx->getConstantZero(Ty));
2389 _mov(T, Src0, Eax);
Jim Stichnoth017a5532016-07-19 06:35:52 -07002390 _div(T, Src1, T_edx);
2391 _redefined(Context.insert<InstFakeDef>(T_edx, T));
Jim Stichnoth2655d962016-04-21 05:38:15 -07002392 if (Ty == IceType_i8) {
2393 // Register ah must be moved into one of {al,bl,cl,dl} before it can be
2394 // moved into a general 8-bit register.
2395 auto *T_AhRcvr = makeReg(Ty);
2396 T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
2397 _mov(T_AhRcvr, T_edx);
2398 T_edx = T_AhRcvr;
2399 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002400 _mov(Dest, T_edx);
2401 } break;
2402 case InstArithmetic::Srem: {
Andrew Scull57e12682015-09-16 11:30:19 -07002403 // TODO(stichnot): Enable this after doing better performance and cross
2404 // testing.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07002405 if (false && Func->getOptLevel() >= Opt_1) {
Andrew Scull57e12682015-09-16 11:30:19 -07002406 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
2407 // keep things simple there.
John Porto7e93c622015-06-23 10:58:57 -07002408 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002409 const int32_t Divisor = C->getValue();
2410 const uint32_t UDivisor = Divisor;
John Porto7e93c622015-06-23 10:58:57 -07002411 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2412 uint32_t LogDiv = llvm::Log2_32(UDivisor);
John Porto7e93c622015-06-23 10:58:57 -07002413 // LLVM does the following for dest=src%(1<<log):
2414 // t=src
2415 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2416 // shr t,typewidth-log
2417 // add t,src
2418 // and t, -(1<<log)
2419 // sub t,src
2420 // neg t
2421 // dest=t
2422 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2423 // If for some reason we are dividing by 1, just assign 0.
2424 if (LogDiv == 0) {
2425 _mov(Dest, Ctx->getConstantZero(Ty));
2426 return;
2427 }
2428 _mov(T, Src0);
2429 // The initial sar is unnecessary when dividing by 2.
2430 if (LogDiv > 1)
2431 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2432 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2433 _add(T, Src0);
2434 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
2435 _sub(T, Src0);
2436 _neg(T);
2437 _mov(Dest, T);
2438 return;
2439 }
2440 }
2441 }
2442 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08002443 RegNumT Eax;
2444 RegNumT Edx;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002445 switch (Ty) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002446 default:
John Porto3c275ce2015-12-22 08:14:00 -08002447 llvm::report_fatal_error("Bad type for srem");
2448 case IceType_i64:
2449 Eax = Traits::getRaxOrDie();
2450 Edx = Traits::getRdxOrDie();
2451 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002452 case IceType_i32:
John Porto3c275ce2015-12-22 08:14:00 -08002453 Eax = Traits::RegisterSet::Reg_eax;
2454 Edx = Traits::RegisterSet::Reg_edx;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002455 break;
2456 case IceType_i16:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002457 Eax = Traits::RegisterSet::Reg_ax;
2458 Edx = Traits::RegisterSet::Reg_dx;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002459 break;
2460 case IceType_i8:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002461 Eax = Traits::RegisterSet::Reg_al;
2462 Edx = Traits::RegisterSet::Reg_ah;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07002463 break;
John Porto7e93c622015-06-23 10:58:57 -07002464 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002465 T_edx = makeReg(Ty, Edx);
2466 _mov(T, Src0, Eax);
2467 _cbwdq(T_edx, T);
Jim Stichnoth017a5532016-07-19 06:35:52 -07002468 _idiv(T, Src1, T_edx);
2469 _redefined(Context.insert<InstFakeDef>(T_edx, T));
Jim Stichnoth2655d962016-04-21 05:38:15 -07002470 if (Ty == IceType_i8) {
2471 // Register ah must be moved into one of {al,bl,cl,dl} before it can be
2472 // moved into a general 8-bit register.
2473 auto *T_AhRcvr = makeReg(Ty);
2474 T_AhRcvr->setRegClass(RCX86_IsAhRcvr);
2475 _mov(T_AhRcvr, T_edx);
2476 T_edx = T_AhRcvr;
2477 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002478 _mov(Dest, T_edx);
2479 } break;
John Porto7e93c622015-06-23 10:58:57 -07002480 case InstArithmetic::Fadd:
2481 _mov(T, Src0);
2482 _addss(T, Src1);
2483 _mov(Dest, T);
2484 break;
2485 case InstArithmetic::Fsub:
2486 _mov(T, Src0);
2487 _subss(T, Src1);
2488 _mov(Dest, T);
2489 break;
2490 case InstArithmetic::Fmul:
2491 _mov(T, Src0);
Jim Stichnothebbb5912015-10-05 15:12:09 -07002492 _mulss(T, Src0 == Src1 ? T : Src1);
John Porto7e93c622015-06-23 10:58:57 -07002493 _mov(Dest, T);
2494 break;
2495 case InstArithmetic::Fdiv:
2496 _mov(T, Src0);
2497 _divss(T, Src1);
2498 _mov(Dest, T);
2499 break;
David Sehr26217e32015-11-26 13:03:50 -08002500 case InstArithmetic::Frem:
2501 llvm::report_fatal_error("Helper call was expected");
2502 break;
John Porto7e93c622015-06-23 10:58:57 -07002503 }
2504}
2505
John Porto4a566862016-01-04 09:33:41 -08002506template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002507void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Instr) {
2508 Variable *Dest = Instr->getDest();
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08002509 if (Dest->isRematerializable()) {
John Porto1d937a82015-12-17 06:19:34 -08002510 Context.insert<InstFakeDef>(Dest);
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08002511 return;
2512 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002513 Operand *Src = Instr->getSrc(0);
David Sehre3984282015-12-15 17:34:55 -08002514 assert(Dest->getType() == Src->getType());
2515 lowerMove(Dest, Src, false);
John Porto7e93c622015-06-23 10:58:57 -07002516}
2517
John Porto4a566862016-01-04 09:33:41 -08002518template <typename TraitsType>
2519void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) {
David Sehre3984282015-12-15 17:34:55 -08002520 if (Br->isUnconditional()) {
2521 _br(Br->getTargetUnconditional());
John Porto7e93c622015-06-23 10:58:57 -07002522 return;
2523 }
David Sehre3984282015-12-15 17:34:55 -08002524 Operand *Cond = Br->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07002525
2526 // Handle folding opportunities.
David Sehre3984282015-12-15 17:34:55 -08002527 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
John Porto7e93c622015-06-23 10:58:57 -07002528 assert(Producer->isDeleted());
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002529 switch (BoolFolding<Traits>::getProducerKind(Producer)) {
John Porto7e93c622015-06-23 10:58:57 -07002530 default:
2531 break;
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002532 case BoolFolding<Traits>::PK_Icmp32:
2533 case BoolFolding<Traits>::PK_Icmp64: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002534 lowerIcmpAndConsumer(llvm::cast<InstIcmp>(Producer), Br);
John Porto7e93c622015-06-23 10:58:57 -07002535 return;
2536 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002537 case BoolFolding<Traits>::PK_Fcmp: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002538 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Br);
David Sehrdaf096c2015-11-11 10:56:58 -08002539 return;
2540 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08002541 case BoolFolding<Traits>::PK_Arith: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08002542 lowerArithAndConsumer(llvm::cast<InstArithmetic>(Producer), Br);
David Sehrdaf096c2015-11-11 10:56:58 -08002543 return;
2544 }
John Porto7e93c622015-06-23 10:58:57 -07002545 }
2546 }
John Porto7e93c622015-06-23 10:58:57 -07002547 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2548 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2549 _cmp(Src0, Zero);
David Sehre3984282015-12-15 17:34:55 -08002550 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07002551}
2552
David Sehr0c68bef2016-01-20 10:00:23 -08002553// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
2554// OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
2555inline constexpr SizeT constexprMax(SizeT S0, SizeT S1) {
2556 return S0 < S1 ? S1 : S0;
2557}
2558
2559template <typename TraitsType>
2560void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
2561 // Common x86 calling convention lowering:
2562 //
2563 // * At the point before the call, the stack must be aligned to 16 bytes.
2564 //
2565 // * Non-register arguments are pushed onto the stack in right-to-left order,
2566 // such that the left-most argument ends up on the top of the stack at the
2567 // lowest memory address.
2568 //
2569 // * Stack arguments of vector type are aligned to start at the next highest
2570 // multiple of 16 bytes. Other stack arguments are aligned to the next word
2571 // size boundary (4 or 8 bytes, respectively).
2572 NeedsStackAlignment = true;
2573
2574 using OperandList =
2575 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
2576 Traits::X86_MAX_GPR_ARGS)>;
2577 OperandList XmmArgs;
2578 CfgVector<std::pair<const Type, Operand *>> GprArgs;
2579 OperandList StackArgs, StackArgLocations;
2580 uint32_t ParameterAreaSizeBytes = 0;
2581
2582 // Classify each argument operand according to the location where the argument
2583 // is passed.
2584 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2585 Operand *Arg = Instr->getArg(i);
2586 const Type Ty = Arg->getType();
2587 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2588 assert(typeWidthInBytes(Ty) >= 4);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002589 if (isVectorType(Ty) &&
2590 Traits::getRegisterForXmmArgNum(XmmArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002591 XmmArgs.push_back(Arg);
2592 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002593 Traits::getRegisterForXmmArgNum(XmmArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002594 XmmArgs.push_back(Arg);
2595 } else if (isScalarIntegerType(Ty) &&
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08002596 Traits::getRegisterForGprArgNum(Ty, GprArgs.size()).hasValue()) {
David Sehr0c68bef2016-01-20 10:00:23 -08002597 GprArgs.emplace_back(Ty, Arg);
2598 } else {
2599 // Place on stack.
2600 StackArgs.push_back(Arg);
2601 if (isVectorType(Arg->getType())) {
2602 ParameterAreaSizeBytes =
2603 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2604 }
2605 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
2606 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2607 StackArgLocations.push_back(
2608 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2609 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2610 }
2611 }
2612 // Ensure there is enough space for the fstp/movs for floating returns.
2613 Variable *Dest = Instr->getDest();
2614 const Type DestTy = Dest ? Dest->getType() : IceType_void;
John Porto4ab4fbe2016-01-20 13:44:30 -08002615 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
David Sehr0c68bef2016-01-20 10:00:23 -08002616 if (isScalarFloatingType(DestTy)) {
2617 ParameterAreaSizeBytes =
2618 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
2619 typeWidthInBytesOnStack(DestTy));
2620 }
2621 }
2622 // Adjust the parameter area so that the stack is aligned. It is assumed that
2623 // the stack is already aligned at the start of the calling sequence.
2624 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2625 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
2626 // Copy arguments that are passed on the stack to the appropriate stack
Jim Stichnothf5319312016-06-10 12:21:17 -07002627 // locations. We make sure legalize() is called on each argument at this
2628 // point, to allow availabilityGet() to work.
David Sehr0c68bef2016-01-20 10:00:23 -08002629 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
Jim Stichnothf5319312016-06-10 12:21:17 -07002630 lowerStore(
2631 InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
David Sehr0c68bef2016-01-20 10:00:23 -08002632 }
2633 // Copy arguments to be passed in registers to the appropriate registers.
2634 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Jim Stichnothf5319312016-06-10 12:21:17 -07002635 XmmArgs[i] =
2636 legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
David Sehr0c68bef2016-01-20 10:00:23 -08002637 }
2638 // Materialize moves for arguments passed in GPRs.
2639 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
2640 const Type SignatureTy = GprArgs[i].first;
Jim Stichnothe4506562016-06-20 06:44:07 -07002641 Operand *Arg =
2642 legalize(GprArgs[i].second, Legal_Default | Legal_Rematerializable);
Jim Stichnothf5319312016-06-10 12:21:17 -07002643 GprArgs[i].second =
David Sehr0c68bef2016-01-20 10:00:23 -08002644 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
2645 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
2646 assert(SignatureTy == Arg->getType());
2647 (void)SignatureTy;
Jim Stichnothf5319312016-06-10 12:21:17 -07002648 }
2649 // Generate a FakeUse of register arguments so that they do not get dead code
2650 // eliminated as a result of the FakeKill of scratch registers after the call.
2651 // These need to be right before the call instruction.
2652 for (auto *Arg : XmmArgs) {
2653 Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
2654 }
2655 for (auto &ArgPair : GprArgs) {
2656 Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
David Sehr0c68bef2016-01-20 10:00:23 -08002657 }
2658 // Generate the call instruction. Assign its result to a temporary with high
2659 // register allocation weight.
2660 // ReturnReg doubles as ReturnRegLo as necessary.
2661 Variable *ReturnReg = nullptr;
2662 Variable *ReturnRegHi = nullptr;
2663 if (Dest) {
2664 switch (DestTy) {
2665 case IceType_NUM:
2666 case IceType_void:
2667 case IceType_i1:
2668 case IceType_i8:
2669 case IceType_i16:
2670 llvm::report_fatal_error("Invalid Call dest type");
2671 break;
2672 case IceType_i32:
2673 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
2674 break;
2675 case IceType_i64:
2676 if (Traits::Is64Bit) {
Jim Stichnothee1aae82016-02-02 09:29:21 -08002677 ReturnReg = makeReg(IceType_i64, Traits::getRaxOrDie());
David Sehr0c68bef2016-01-20 10:00:23 -08002678 } else {
2679 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2680 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2681 }
2682 break;
2683 case IceType_f32:
2684 case IceType_f64:
2685 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2686 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2687 // the fstp instruction.
2688 break;
2689 }
2690 // Fallthrough intended.
2691 case IceType_v4i1:
2692 case IceType_v8i1:
2693 case IceType_v16i1:
2694 case IceType_v16i8:
2695 case IceType_v8i16:
2696 case IceType_v4i32:
2697 case IceType_v4f32:
2698 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
2699 break;
2700 }
2701 }
2702 // Emit the call to the function.
2703 Operand *CallTarget =
2704 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
2705 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg);
2706 // Keep the upper return register live on 32-bit platform.
2707 if (ReturnRegHi)
2708 Context.insert<InstFakeDef>(ReturnRegHi);
2709 // Mark the call as killing all the caller-save registers.
2710 Context.insert<InstFakeKill>(NewCall);
2711 // Handle x86-32 floating point returns.
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002712 if (Dest != nullptr && isScalarFloatingType(DestTy) &&
David Sehr0c68bef2016-01-20 10:00:23 -08002713 !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2714 // Special treatment for an FP function which returns its result in st(0).
2715 // If Dest ends up being a physical xmm register, the fstp emit code will
2716 // route st(0) through the space reserved in the function argument area
2717 // we allocated.
2718 _fstp(Dest);
2719 // Create a fake use of Dest in case it actually isn't used, because st(0)
2720 // still needs to be popped.
2721 Context.insert<InstFakeUse>(Dest);
2722 }
2723 // Generate a FakeUse to keep the call live if necessary.
2724 if (Instr->hasSideEffects() && ReturnReg) {
2725 Context.insert<InstFakeUse>(ReturnReg);
2726 }
2727 // Process the return value, if any.
2728 if (Dest == nullptr)
2729 return;
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002730 // Assign the result of the call to Dest. Route it through a temporary so
2731 // that the local register availability peephole can be subsequently used.
2732 Variable *Tmp = nullptr;
David Sehr0c68bef2016-01-20 10:00:23 -08002733 if (isVectorType(DestTy)) {
2734 assert(ReturnReg && "Vector type requires a return register");
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002735 Tmp = makeReg(DestTy);
2736 _movp(Tmp, ReturnReg);
2737 _movp(Dest, Tmp);
David Sehr0c68bef2016-01-20 10:00:23 -08002738 } else if (isScalarFloatingType(DestTy)) {
2739 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2740 assert(ReturnReg && "FP type requires a return register");
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002741 _mov(Tmp, ReturnReg);
2742 _mov(Dest, Tmp);
David Sehr0c68bef2016-01-20 10:00:23 -08002743 }
2744 } else {
2745 assert(isScalarIntegerType(DestTy));
2746 assert(ReturnReg && "Integer type requires a return register");
2747 if (DestTy == IceType_i64 && !Traits::Is64Bit) {
2748 assert(ReturnRegHi && "64-bit type requires two return registers");
2749 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
2750 Variable *DestLo = Dest64On32->getLo();
2751 Variable *DestHi = Dest64On32->getHi();
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002752 _mov(Tmp, ReturnReg);
2753 _mov(DestLo, Tmp);
2754 Variable *TmpHi = nullptr;
2755 _mov(TmpHi, ReturnRegHi);
2756 _mov(DestHi, TmpHi);
David Sehr0c68bef2016-01-20 10:00:23 -08002757 } else {
Jim Stichnothdeb5a822016-06-14 22:21:33 -07002758 _mov(Tmp, ReturnReg);
2759 _mov(Dest, Tmp);
David Sehr0c68bef2016-01-20 10:00:23 -08002760 }
2761 }
2762}
2763
John Porto4a566862016-01-04 09:33:41 -08002764template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002765void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
John Porto7e93c622015-06-23 10:58:57 -07002766 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002767 InstCast::OpKind CastKind = Instr->getCastKind();
2768 Variable *Dest = Instr->getDest();
Jim Stichnothc59288b2015-11-09 11:38:40 -08002769 Type DestTy = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07002770 switch (CastKind) {
2771 default:
2772 Func->setError("Cast type not supported");
2773 return;
2774 case InstCast::Sext: {
2775 // Src0RM is the source operand legalized to physical register or memory,
2776 // but not immediate, since the relevant x86 native instructions don't
Andrew Scull57e12682015-09-16 11:30:19 -07002777 // allow an immediate operand. If the operand is an immediate, we could
2778 // consider computing the strength-reduced result at translation time, but
2779 // we're unlikely to see something like that in the bitcode that the
2780 // optimizer wouldn't have already taken care of.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002781 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002782 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002783 if (DestTy == IceType_v16i8) {
2784 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
Jim Stichnothc59288b2015-11-09 11:38:40 -08002785 Variable *OneMask = makeVectorOfOnes(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002786 Variable *T = makeReg(DestTy);
2787 _movp(T, Src0RM);
2788 _pand(T, OneMask);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002789 Variable *Zeros = makeVectorOfZeros(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002790 _pcmpgt(T, Zeros);
2791 _movp(Dest, T);
2792 } else {
Andrew Scull9612d322015-07-06 14:53:25 -07002793 /// width = width(elty) - 1; dest = (src << width) >> width
John Porto7e93c622015-06-23 10:58:57 -07002794 SizeT ShiftAmount =
2795 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2796 1;
2797 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2798 Variable *T = makeReg(DestTy);
2799 _movp(T, Src0RM);
2800 _psll(T, ShiftConstant);
2801 _psra(T, ShiftConstant);
2802 _movp(Dest, T);
2803 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002804 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002805 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2806 Constant *Shift = Ctx->getConstantInt32(31);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002807 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2808 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07002809 Variable *T_Lo = makeReg(DestLo->getType());
2810 if (Src0RM->getType() == IceType_i32) {
2811 _mov(T_Lo, Src0RM);
2812 } else if (Src0RM->getType() == IceType_i1) {
2813 _movzx(T_Lo, Src0RM);
2814 _shl(T_Lo, Shift);
2815 _sar(T_Lo, Shift);
2816 } else {
2817 _movsx(T_Lo, Src0RM);
2818 }
2819 _mov(DestLo, T_Lo);
2820 Variable *T_Hi = nullptr;
2821 _mov(T_Hi, T_Lo);
2822 if (Src0RM->getType() != IceType_i1)
2823 // For i1, the sar instruction is already done above.
2824 _sar(T_Hi, Shift);
2825 _mov(DestHi, T_Hi);
2826 } else if (Src0RM->getType() == IceType_i1) {
2827 // t1 = src
2828 // shl t1, dst_bitwidth - 1
2829 // sar t1, dst_bitwidth - 1
2830 // dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002831 size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002832 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002833 Variable *T = makeReg(DestTy);
2834 if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
John Porto7e93c622015-06-23 10:58:57 -07002835 _mov(T, Src0RM);
2836 } else {
Andrew Scull57e12682015-09-16 11:30:19 -07002837 // Widen the source using movsx or movzx. (It doesn't matter which one,
2838 // since the following shl/sar overwrite the bits.)
John Porto7e93c622015-06-23 10:58:57 -07002839 _movzx(T, Src0RM);
2840 }
2841 _shl(T, ShiftAmount);
2842 _sar(T, ShiftAmount);
2843 _mov(Dest, T);
2844 } else {
2845 // t1 = movsx src; dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002846 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002847 _movsx(T, Src0RM);
2848 _mov(Dest, T);
2849 }
2850 break;
2851 }
2852 case InstCast::Zext: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002853 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002854 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002855 // onemask = materialize(1,1,...); dest = onemask & src
John Porto7e93c622015-06-23 10:58:57 -07002856 Variable *OneMask = makeVectorOfOnes(DestTy);
2857 Variable *T = makeReg(DestTy);
2858 _movp(T, Src0RM);
2859 _pand(T, OneMask);
2860 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002861 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07002862 // t1=movzx src; dst.lo=t1; dst.hi=0
2863 Constant *Zero = Ctx->getConstantZero(IceType_i32);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08002864 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2865 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07002866 Variable *Tmp = makeReg(DestLo->getType());
2867 if (Src0RM->getType() == IceType_i32) {
2868 _mov(Tmp, Src0RM);
2869 } else {
2870 _movzx(Tmp, Src0RM);
2871 }
John Porto7e93c622015-06-23 10:58:57 -07002872 _mov(DestLo, Tmp);
2873 _mov(DestHi, Zero);
2874 } else if (Src0RM->getType() == IceType_i1) {
Jim Stichnoth485d0772015-10-09 06:52:19 -07002875 // t = Src0RM; Dest = t
John Porto1d235422015-08-12 12:37:53 -07002876 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07002877 if (DestTy == IceType_i8) {
John Porto7e93c622015-06-23 10:58:57 -07002878 _mov(T, Src0RM);
2879 } else {
John Porto1d235422015-08-12 12:37:53 -07002880 assert(DestTy != IceType_i1);
2881 assert(Traits::Is64Bit || DestTy != IceType_i64);
John Porto7e93c622015-06-23 10:58:57 -07002882 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
John Porto1d235422015-08-12 12:37:53 -07002883 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
2884 // written to the stack (i.e., in -Om1) will be fully zero-extended.
2885 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07002886 _movzx(T, Src0RM);
2887 }
John Porto7e93c622015-06-23 10:58:57 -07002888 _mov(Dest, T);
2889 } else {
2890 // t1 = movzx src; dst = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002891 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002892 _movzx(T, Src0RM);
2893 _mov(Dest, T);
2894 }
2895 break;
2896 }
2897 case InstCast::Trunc: {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002898 if (isVectorType(DestTy)) {
John Porto7e93c622015-06-23 10:58:57 -07002899 // onemask = materialize(1,1,...); dst = src & onemask
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002900 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002901 Type Src0Ty = Src0RM->getType();
2902 Variable *OneMask = makeVectorOfOnes(Src0Ty);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002903 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002904 _movp(T, Src0RM);
2905 _pand(T, OneMask);
2906 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002907 } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
2908 // Make sure we truncate from and into valid registers.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002909 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
Jim Stichnothc59288b2015-11-09 11:38:40 -08002910 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
2911 Src0 = loOperand(Src0);
2912 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2913 Variable *T = copyToReg8(Src0RM);
2914 if (DestTy == IceType_i1)
2915 _and(T, Ctx->getConstantInt1(1));
2916 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07002917 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002918 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
John Porto1d235422015-08-12 12:37:53 -07002919 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
John Porto7e93c622015-06-23 10:58:57 -07002920 Src0 = loOperand(Src0);
2921 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2922 // t1 = trunc Src0RM; Dest = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002923 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07002924 _mov(T, Src0RM);
John Porto7e93c622015-06-23 10:58:57 -07002925 _mov(Dest, T);
2926 }
2927 break;
2928 }
2929 case InstCast::Fptrunc:
2930 case InstCast::Fpext: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002931 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002932 // t1 = cvt Src0RM; Dest = t1
Jim Stichnothc59288b2015-11-09 11:38:40 -08002933 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002934 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
John Porto7e93c622015-06-23 10:58:57 -07002935 _mov(Dest, T);
2936 break;
2937 }
2938 case InstCast::Fptosi:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002939 if (isVectorType(DestTy)) {
2940 assert(DestTy == IceType_v4i32 &&
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002941 Instr->getSrc(0)->getType() == IceType_v4f32);
2942 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08002943 if (llvm::isa<X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07002944 Src0RM = legalizeToReg(Src0RM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002945 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07002946 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
John Porto7e93c622015-06-23 10:58:57 -07002947 _movp(Dest, T);
Jim Stichnothc59288b2015-11-09 11:38:40 -08002948 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
David Sehr26217e32015-11-26 13:03:50 -08002949 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002950 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002951 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002952 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
John Porto1d235422015-08-12 12:37:53 -07002953 Variable *T_1 = nullptr;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002954 if (Traits::Is64Bit && DestTy == IceType_i64) {
John Porto1d235422015-08-12 12:37:53 -07002955 T_1 = makeReg(IceType_i64);
2956 } else {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002957 assert(DestTy != IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002958 T_1 = makeReg(IceType_i32);
2959 }
2960 // cvt() requires its integer argument to be a GPR.
Jim Stichnothc59288b2015-11-09 11:38:40 -08002961 Variable *T_2 = makeReg(DestTy);
2962 if (isByteSizedType(DestTy)) {
2963 assert(T_1->getType() == IceType_i32);
2964 T_1->setRegClass(RCX86_Is32To8);
2965 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2966 }
John Porto921856d2015-07-07 11:56:26 -07002967 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002968 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnothc59288b2015-11-09 11:38:40 -08002969 if (DestTy == IceType_i1)
John Porto7e93c622015-06-23 10:58:57 -07002970 _and(T_2, Ctx->getConstantInt1(1));
2971 _mov(Dest, T_2);
2972 }
2973 break;
2974 case InstCast::Fptoui:
Jim Stichnothc59288b2015-11-09 11:38:40 -08002975 if (isVectorType(DestTy)) {
David Sehr26217e32015-11-26 13:03:50 -08002976 llvm::report_fatal_error("Helper call was expected");
Jim Stichnothc59288b2015-11-09 11:38:40 -08002977 } else if (DestTy == IceType_i64 ||
2978 (!Traits::Is64Bit && DestTy == IceType_i32)) {
David Sehr26217e32015-11-26 13:03:50 -08002979 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07002980 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08002981 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07002982 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Jim Stichnothc59288b2015-11-09 11:38:40 -08002983 assert(DestTy != IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07002984 Variable *T_1 = nullptr;
Jim Stichnothc59288b2015-11-09 11:38:40 -08002985 if (Traits::Is64Bit && DestTy == IceType_i32) {
John Porto1d235422015-08-12 12:37:53 -07002986 T_1 = makeReg(IceType_i64);
2987 } else {
Jim Stichnothc59288b2015-11-09 11:38:40 -08002988 assert(DestTy != IceType_i32);
John Porto1d235422015-08-12 12:37:53 -07002989 T_1 = makeReg(IceType_i32);
2990 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08002991 Variable *T_2 = makeReg(DestTy);
2992 if (isByteSizedType(DestTy)) {
2993 assert(T_1->getType() == IceType_i32);
2994 T_1->setRegClass(RCX86_Is32To8);
2995 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2996 }
John Porto921856d2015-07-07 11:56:26 -07002997 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
John Porto7e93c622015-06-23 10:58:57 -07002998 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
Jim Stichnothc59288b2015-11-09 11:38:40 -08002999 if (DestTy == IceType_i1)
John Porto7e93c622015-06-23 10:58:57 -07003000 _and(T_2, Ctx->getConstantInt1(1));
3001 _mov(Dest, T_2);
3002 }
3003 break;
3004 case InstCast::Sitofp:
Jim Stichnothc59288b2015-11-09 11:38:40 -08003005 if (isVectorType(DestTy)) {
3006 assert(DestTy == IceType_v4f32 &&
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003007 Instr->getSrc(0)->getType() == IceType_v4i32);
3008 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08003009 if (llvm::isa<X86OperandMem>(Src0RM))
Andrew Scull97f460d2015-07-21 10:07:42 -07003010 Src0RM = legalizeToReg(Src0RM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08003011 Variable *T = makeReg(DestTy);
John Porto921856d2015-07-07 11:56:26 -07003012 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
John Porto7e93c622015-06-23 10:58:57 -07003013 _movp(Dest, T);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003014 } else if (!Traits::Is64Bit && Instr->getSrc(0)->getType() == IceType_i64) {
David Sehr26217e32015-11-26 13:03:50 -08003015 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003016 } else {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003017 Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
John Porto7e93c622015-06-23 10:58:57 -07003018 // Sign-extend the operand.
3019 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07003020 Variable *T_1 = nullptr;
3021 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
3022 T_1 = makeReg(IceType_i64);
3023 } else {
3024 assert(Src0RM->getType() != IceType_i64);
3025 T_1 = makeReg(IceType_i32);
3026 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08003027 Variable *T_2 = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003028 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07003029 _mov(T_1, Src0RM);
3030 else
3031 _movsx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07003032 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07003033 _mov(Dest, T_2);
3034 }
3035 break;
3036 case InstCast::Uitofp: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003037 Operand *Src0 = Instr->getSrc(0);
John Porto7e93c622015-06-23 10:58:57 -07003038 if (isVectorType(Src0->getType())) {
David Sehr26217e32015-11-26 13:03:50 -08003039 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003040 } else if (Src0->getType() == IceType_i64 ||
John Porto1d235422015-08-12 12:37:53 -07003041 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
David Sehr26217e32015-11-26 13:03:50 -08003042 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003043 } else {
3044 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3045 // Zero-extend the operand.
3046 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
John Porto1d235422015-08-12 12:37:53 -07003047 Variable *T_1 = nullptr;
3048 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
3049 T_1 = makeReg(IceType_i64);
3050 } else {
3051 assert(Src0RM->getType() != IceType_i64);
3052 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
3053 T_1 = makeReg(IceType_i32);
3054 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08003055 Variable *T_2 = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003056 if (Src0RM->getType() == T_1->getType())
John Porto7e93c622015-06-23 10:58:57 -07003057 _mov(T_1, Src0RM);
3058 else
3059 _movzx(T_1, Src0RM);
John Porto921856d2015-07-07 11:56:26 -07003060 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
John Porto7e93c622015-06-23 10:58:57 -07003061 _mov(Dest, T_2);
3062 }
3063 break;
3064 }
3065 case InstCast::Bitcast: {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003066 Operand *Src0 = Instr->getSrc(0);
Jim Stichnothc59288b2015-11-09 11:38:40 -08003067 if (DestTy == Src0->getType()) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003068 auto *Assign = InstAssign::create(Func, Dest, Src0);
John Porto7e93c622015-06-23 10:58:57 -07003069 lowerAssign(Assign);
3070 return;
3071 }
Jim Stichnothc59288b2015-11-09 11:38:40 -08003072 switch (DestTy) {
John Porto7e93c622015-06-23 10:58:57 -07003073 default:
3074 llvm_unreachable("Unexpected Bitcast dest type");
3075 case IceType_i8: {
David Sehr26217e32015-11-26 13:03:50 -08003076 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003077 } break;
3078 case IceType_i16: {
David Sehr26217e32015-11-26 13:03:50 -08003079 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003080 } break;
3081 case IceType_i32:
3082 case IceType_f32: {
Jim Stichnotha1410df2016-06-16 10:02:48 -07003083 Variable *Src0R = legalizeToReg(Src0);
3084 Variable *T = makeReg(DestTy);
3085 _movd(T, Src0R);
3086 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07003087 } break;
3088 case IceType_i64: {
John Porto1d235422015-08-12 12:37:53 -07003089 assert(Src0->getType() == IceType_f64);
3090 if (Traits::Is64Bit) {
John Porto1d235422015-08-12 12:37:53 -07003091 Variable *Src0R = legalizeToReg(Src0);
3092 Variable *T = makeReg(IceType_i64);
3093 _movd(T, Src0R);
3094 _mov(Dest, T);
John Porto7e93c622015-06-23 10:58:57 -07003095 } else {
John Porto1d235422015-08-12 12:37:53 -07003096 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3097 // a.i64 = bitcast b.f64 ==>
3098 // s.f64 = spill b.f64
3099 // t_lo.i32 = lo(s.f64)
3100 // a_lo.i32 = t_lo.i32
3101 // t_hi.i32 = hi(s.f64)
3102 // a_hi.i32 = t_hi.i32
3103 Operand *SpillLo, *SpillHi;
3104 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
Jim Stichnothe343e062016-06-28 21:40:33 -07003105 Variable *Spill = Func->makeVariable(IceType_f64);
3106 Spill->setLinkedTo(Src0Var);
Andrew Scull11c9a322015-08-28 14:24:14 -07003107 Spill->setMustNotHaveReg();
John Porto1d235422015-08-12 12:37:53 -07003108 _movq(Spill, Src0RM);
3109 SpillLo = Traits::VariableSplit::create(Func, Spill,
3110 Traits::VariableSplit::Low);
3111 SpillHi = Traits::VariableSplit::create(Func, Spill,
3112 Traits::VariableSplit::High);
3113 } else {
3114 SpillLo = loOperand(Src0RM);
3115 SpillHi = hiOperand(Src0RM);
3116 }
3117
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003118 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3119 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto1d235422015-08-12 12:37:53 -07003120 Variable *T_Lo = makeReg(IceType_i32);
3121 Variable *T_Hi = makeReg(IceType_i32);
3122
3123 _mov(T_Lo, SpillLo);
3124 _mov(DestLo, T_Lo);
3125 _mov(T_Hi, SpillHi);
3126 _mov(DestHi, T_Hi);
John Porto7e93c622015-06-23 10:58:57 -07003127 }
John Porto7e93c622015-06-23 10:58:57 -07003128 } break;
3129 case IceType_f64: {
John Porto7e93c622015-06-23 10:58:57 -07003130 assert(Src0->getType() == IceType_i64);
John Porto1d235422015-08-12 12:37:53 -07003131 if (Traits::Is64Bit) {
3132 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3133 Variable *T = makeReg(IceType_f64);
John Porto1d235422015-08-12 12:37:53 -07003134 _movd(T, Src0RM);
3135 _mov(Dest, T);
3136 } else {
3137 Src0 = legalize(Src0);
John Porto4a566862016-01-04 09:33:41 -08003138 if (llvm::isa<X86OperandMem>(Src0)) {
Jim Stichnothb9a84722016-08-01 13:18:36 -07003139 Variable *T = makeReg(DestTy);
John Porto1d235422015-08-12 12:37:53 -07003140 _movq(T, Src0);
3141 _movq(Dest, T);
3142 break;
3143 }
3144 // a.f64 = bitcast b.i64 ==>
3145 // t_lo.i32 = b_lo.i32
3146 // FakeDef(s.f64)
3147 // lo(s.f64) = t_lo.i32
3148 // t_hi.i32 = b_hi.i32
3149 // hi(s.f64) = t_hi.i32
3150 // a.f64 = s.f64
Jim Stichnothe343e062016-06-28 21:40:33 -07003151 Variable *Spill = Func->makeVariable(IceType_f64);
3152 Spill->setLinkedTo(Dest);
Andrew Scull11c9a322015-08-28 14:24:14 -07003153 Spill->setMustNotHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07003154
John Porto1d235422015-08-12 12:37:53 -07003155 Variable *T_Lo = nullptr, *T_Hi = nullptr;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003156 auto *SpillLo = Traits::VariableSplit::create(
John Porto1d235422015-08-12 12:37:53 -07003157 Func, Spill, Traits::VariableSplit::Low);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003158 auto *SpillHi = Traits::VariableSplit::create(
John Porto1d235422015-08-12 12:37:53 -07003159 Func, Spill, Traits::VariableSplit::High);
3160 _mov(T_Lo, loOperand(Src0));
3161 // Technically, the Spill is defined after the _store happens, but
Andrew Scull57e12682015-09-16 11:30:19 -07003162 // SpillLo is considered a "use" of Spill so define Spill before it is
3163 // used.
John Porto1d937a82015-12-17 06:19:34 -08003164 Context.insert<InstFakeDef>(Spill);
John Porto1d235422015-08-12 12:37:53 -07003165 _store(T_Lo, SpillLo);
3166 _mov(T_Hi, hiOperand(Src0));
3167 _store(T_Hi, SpillHi);
3168 _movq(Dest, Spill);
3169 }
John Porto7e93c622015-06-23 10:58:57 -07003170 } break;
3171 case IceType_v8i1: {
David Sehr26217e32015-11-26 13:03:50 -08003172 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003173 } break;
3174 case IceType_v16i1: {
David Sehr26217e32015-11-26 13:03:50 -08003175 llvm::report_fatal_error("Helper call was expected");
John Porto7e93c622015-06-23 10:58:57 -07003176 } break;
3177 case IceType_v8i16:
3178 case IceType_v16i8:
3179 case IceType_v4i32:
3180 case IceType_v4f32: {
Andrew Scull97f460d2015-07-21 10:07:42 -07003181 _movp(Dest, legalizeToReg(Src0));
John Porto7e93c622015-06-23 10:58:57 -07003182 } break;
3183 }
3184 break;
3185 }
3186 }
3187}
3188
John Porto4a566862016-01-04 09:33:41 -08003189template <typename TraitsType>
3190void TargetX86Base<TraitsType>::lowerExtractElement(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003191 const InstExtractElement *Instr) {
3192 Operand *SourceVectNotLegalized = Instr->getSrc(0);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003193 auto *ElementIndex = llvm::dyn_cast<ConstantInteger32>(Instr->getSrc(1));
John Porto7e93c622015-06-23 10:58:57 -07003194 // Only constant indices are allowed in PNaCl IR.
3195 assert(ElementIndex);
3196
3197 unsigned Index = ElementIndex->getValue();
3198 Type Ty = SourceVectNotLegalized->getType();
3199 Type ElementTy = typeElementType(Ty);
3200 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
John Porto7e93c622015-06-23 10:58:57 -07003201
3202 // TODO(wala): Determine the best lowering sequences for each type.
3203 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07003204 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
3205 Variable *ExtractedElementR =
3206 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
3207 if (CanUsePextr) {
3208 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
3209 // bits of the destination register, so we represent this by always
3210 // extracting into an i32 register. The _mov into Dest below will do
3211 // truncation as necessary.
John Porto7e93c622015-06-23 10:58:57 -07003212 Constant *Mask = Ctx->getConstantInt32(Index);
Andrew Scull97f460d2015-07-21 10:07:42 -07003213 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003214 _pextr(ExtractedElementR, SourceVectR, Mask);
3215 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3216 // Use pshufd and movd/movss.
3217 Variable *T = nullptr;
3218 if (Index) {
Andrew Scull57e12682015-09-16 11:30:19 -07003219 // The shuffle only needs to occur if the element to be extracted is not
3220 // at the lowest index.
John Porto7e93c622015-06-23 10:58:57 -07003221 Constant *Mask = Ctx->getConstantInt32(Index);
3222 T = makeReg(Ty);
3223 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
3224 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07003225 T = legalizeToReg(SourceVectNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003226 }
3227
3228 if (InVectorElementTy == IceType_i32) {
3229 _movd(ExtractedElementR, T);
3230 } else { // Ty == IceType_f32
Andrew Scull57e12682015-09-16 11:30:19 -07003231 // TODO(wala): _movss is only used here because _mov does not allow a
3232 // vector source and a scalar destination. _mov should be able to be
3233 // used here.
3234 // _movss is a binary instruction, so the FakeDef is needed to keep the
3235 // live range analysis consistent.
John Porto1d937a82015-12-17 06:19:34 -08003236 Context.insert<InstFakeDef>(ExtractedElementR);
John Porto7e93c622015-06-23 10:58:57 -07003237 _movss(ExtractedElementR, T);
3238 }
3239 } else {
3240 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
3241 // Spill the value to a stack slot and do the extraction in memory.
3242 //
Andrew Scull57e12682015-09-16 11:30:19 -07003243 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3244 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003245 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003246 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003247 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003248
3249 // Compute the location of the element in memory.
3250 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto4a566862016-01-04 09:33:41 -08003251 X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003252 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3253 _mov(ExtractedElementR, Loc);
3254 }
3255
3256 if (ElementTy == IceType_i1) {
3257 // Truncate extracted integers to i1s if necessary.
3258 Variable *T = makeReg(IceType_i1);
3259 InstCast *Cast =
3260 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
3261 lowerCast(Cast);
3262 ExtractedElementR = T;
3263 }
3264
3265 // Copy the element to the destination.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003266 Variable *Dest = Instr->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003267 _mov(Dest, ExtractedElementR);
3268}
3269
John Porto4a566862016-01-04 09:33:41 -08003270template <typename TraitsType>
3271void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) {
David Sehre3984282015-12-15 17:34:55 -08003272 Variable *Dest = Fcmp->getDest();
3273
3274 if (isVectorType(Dest->getType())) {
3275 lowerFcmpVector(Fcmp);
3276 } else {
3277 constexpr Inst *Consumer = nullptr;
3278 lowerFcmpAndConsumer(Fcmp, Consumer);
3279 }
David Sehrdaf096c2015-11-11 10:56:58 -08003280}
3281
John Porto4a566862016-01-04 09:33:41 -08003282template <typename TraitsType>
3283void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
3284 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003285 Operand *Src0 = Fcmp->getSrc(0);
3286 Operand *Src1 = Fcmp->getSrc(1);
3287 Variable *Dest = Fcmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003288
David Sehre3984282015-12-15 17:34:55 -08003289 if (isVectorType(Dest->getType()))
3290 llvm::report_fatal_error("Vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07003291
David Sehre3984282015-12-15 17:34:55 -08003292 if (Consumer != nullptr) {
3293 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3294 if (lowerOptimizeFcmpSelect(Fcmp, Select))
3295 return;
John Porto7e93c622015-06-23 10:58:57 -07003296 }
John Porto7e93c622015-06-23 10:58:57 -07003297 }
3298
3299 // Lowering a = fcmp cond, b, c
3300 // ucomiss b, c /* only if C1 != Br_None */
3301 // /* but swap b,c order if SwapOperands==true */
3302 // mov a, <default>
3303 // j<C1> label /* only if C1 != Br_None */
3304 // j<C2> label /* only if C2 != Br_None */
3305 // FakeUse(a) /* only if C1 != Br_None */
3306 // mov a, !<default> /* only if C1 != Br_None */
3307 // label: /* only if C1 != Br_None */
3308 //
3309 // setcc lowering when C1 != Br_None && C2 == Br_None:
3310 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
3311 // setcc a, C1
David Sehre3984282015-12-15 17:34:55 -08003312 InstFcmp::FCond Condition = Fcmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003313 assert(Condition < Traits::TableFcmpSize);
3314 if (Traits::TableFcmp[Condition].SwapScalarOperands)
John Porto7e93c622015-06-23 10:58:57 -07003315 std::swap(Src0, Src1);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003316 const bool HasC1 = (Traits::TableFcmp[Condition].C1 != Traits::Cond::Br_None);
3317 const bool HasC2 = (Traits::TableFcmp[Condition].C2 != Traits::Cond::Br_None);
John Porto7e93c622015-06-23 10:58:57 -07003318 if (HasC1) {
3319 Src0 = legalize(Src0);
3320 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3321 Variable *T = nullptr;
3322 _mov(T, Src0);
3323 _ucomiss(T, Src1RM);
3324 if (!HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003325 assert(Traits::TableFcmp[Condition].Default);
3326 setccOrConsumer(Traits::TableFcmp[Condition].C1, Dest, Consumer);
John Porto7e93c622015-06-23 10:58:57 -07003327 return;
3328 }
3329 }
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003330 int32_t IntDefault = Traits::TableFcmp[Condition].Default;
David Sehre3984282015-12-15 17:34:55 -08003331 if (Consumer == nullptr) {
David Sehrdaf096c2015-11-11 10:56:58 -08003332 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
3333 _mov(Dest, Default);
3334 if (HasC1) {
John Porto4a566862016-01-04 09:33:41 -08003335 InstX86Label *Label = InstX86Label::create(Func, this);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003336 _br(Traits::TableFcmp[Condition].C1, Label);
David Sehrdaf096c2015-11-11 10:56:58 -08003337 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003338 _br(Traits::TableFcmp[Condition].C2, Label);
David Sehrdaf096c2015-11-11 10:56:58 -08003339 }
3340 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
David Sehre3984282015-12-15 17:34:55 -08003341 _redefined(_mov(Dest, NonDefault));
David Sehrdaf096c2015-11-11 10:56:58 -08003342 Context.insert(Label);
John Porto7e93c622015-06-23 10:58:57 -07003343 }
David Sehre3984282015-12-15 17:34:55 -08003344 return;
3345 }
3346 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrdaf096c2015-11-11 10:56:58 -08003347 CfgNode *TrueSucc = Br->getTargetTrue();
3348 CfgNode *FalseSucc = Br->getTargetFalse();
3349 if (IntDefault != 0)
3350 std::swap(TrueSucc, FalseSucc);
3351 if (HasC1) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003352 _br(Traits::TableFcmp[Condition].C1, FalseSucc);
David Sehrdaf096c2015-11-11 10:56:58 -08003353 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003354 _br(Traits::TableFcmp[Condition].C2, FalseSucc);
David Sehrdaf096c2015-11-11 10:56:58 -08003355 }
3356 _br(TrueSucc);
3357 return;
3358 }
3359 _br(FalseSucc);
David Sehre3984282015-12-15 17:34:55 -08003360 return;
John Porto7e93c622015-06-23 10:58:57 -07003361 }
David Sehre3984282015-12-15 17:34:55 -08003362 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3363 Operand *SrcT = Select->getTrueOperand();
3364 Operand *SrcF = Select->getFalseOperand();
3365 Variable *SelectDest = Select->getDest();
3366 if (IntDefault != 0)
3367 std::swap(SrcT, SrcF);
3368 lowerMove(SelectDest, SrcF, false);
3369 if (HasC1) {
John Porto4a566862016-01-04 09:33:41 -08003370 InstX86Label *Label = InstX86Label::create(Func, this);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003371 _br(Traits::TableFcmp[Condition].C1, Label);
David Sehre3984282015-12-15 17:34:55 -08003372 if (HasC2) {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003373 _br(Traits::TableFcmp[Condition].C2, Label);
David Sehre3984282015-12-15 17:34:55 -08003374 }
3375 static constexpr bool IsRedefinition = true;
3376 lowerMove(SelectDest, SrcT, IsRedefinition);
3377 Context.insert(Label);
3378 }
3379 return;
3380 }
3381 llvm::report_fatal_error("Unexpected consumer type");
3382}
3383
John Porto4a566862016-01-04 09:33:41 -08003384template <typename TraitsType>
3385void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) {
David Sehre3984282015-12-15 17:34:55 -08003386 Operand *Src0 = Fcmp->getSrc(0);
3387 Operand *Src1 = Fcmp->getSrc(1);
3388 Variable *Dest = Fcmp->getDest();
3389
3390 if (!isVectorType(Dest->getType()))
3391 llvm::report_fatal_error("Expected vector compare");
3392
3393 InstFcmp::FCond Condition = Fcmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003394 assert(Condition < Traits::TableFcmpSize);
David Sehre3984282015-12-15 17:34:55 -08003395
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003396 if (Traits::TableFcmp[Condition].SwapVectorOperands)
David Sehre3984282015-12-15 17:34:55 -08003397 std::swap(Src0, Src1);
3398
3399 Variable *T = nullptr;
3400
3401 if (Condition == InstFcmp::True) {
3402 // makeVectorOfOnes() requires an integer vector type.
3403 T = makeVectorOfMinusOnes(IceType_v4i32);
3404 } else if (Condition == InstFcmp::False) {
3405 T = makeVectorOfZeros(Dest->getType());
3406 } else {
3407 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3408 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
John Porto4a566862016-01-04 09:33:41 -08003409 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003410 Src1RM = legalizeToReg(Src1RM);
3411
3412 switch (Condition) {
3413 default: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003414 const CmppsCond Predicate = Traits::TableFcmp[Condition].Predicate;
David Sehre3984282015-12-15 17:34:55 -08003415 assert(Predicate != Traits::Cond::Cmpps_Invalid);
3416 T = makeReg(Src0RM->getType());
3417 _movp(T, Src0RM);
3418 _cmpps(T, Src1RM, Predicate);
3419 } break;
3420 case InstFcmp::One: {
3421 // Check both unequal and ordered.
3422 T = makeReg(Src0RM->getType());
3423 Variable *T2 = makeReg(Src0RM->getType());
3424 _movp(T, Src0RM);
3425 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
3426 _movp(T2, Src0RM);
3427 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
3428 _pand(T, T2);
3429 } break;
3430 case InstFcmp::Ueq: {
3431 // Check both equal or unordered.
3432 T = makeReg(Src0RM->getType());
3433 Variable *T2 = makeReg(Src0RM->getType());
3434 _movp(T, Src0RM);
3435 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
3436 _movp(T2, Src0RM);
3437 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
3438 _por(T, T2);
3439 } break;
3440 }
3441 }
3442
3443 assert(T != nullptr);
3444 _movp(Dest, T);
3445 eliminateNextVectorSextInstruction(Dest);
John Porto7e93c622015-06-23 10:58:57 -07003446}
3447
David Sehr5c875422015-10-15 10:38:53 -07003448inline bool isZero(const Operand *Opnd) {
3449 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
3450 return C64->getValue() == 0;
3451 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
3452 return C32->getValue() == 0;
3453 return false;
3454}
3455
John Porto4a566862016-01-04 09:33:41 -08003456template <typename TraitsType>
3457void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
3458 const Inst *Consumer) {
David Sehrd9810252015-10-16 13:23:17 -07003459 Operand *Src0 = legalize(Icmp->getSrc(0));
3460 Operand *Src1 = legalize(Icmp->getSrc(1));
3461 Variable *Dest = Icmp->getDest();
John Porto7e93c622015-06-23 10:58:57 -07003462
David Sehre3984282015-12-15 17:34:55 -08003463 if (isVectorType(Dest->getType()))
3464 llvm::report_fatal_error("Vector compare/branch cannot be folded");
John Porto7e93c622015-06-23 10:58:57 -07003465
John Porto1d235422015-08-12 12:37:53 -07003466 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
David Sehre3984282015-12-15 17:34:55 -08003467 lowerIcmp64(Icmp, Consumer);
John Porto7e93c622015-06-23 10:58:57 -07003468 return;
3469 }
3470
3471 // cmp b, c
David Sehr5c875422015-10-15 10:38:53 -07003472 if (isZero(Src1)) {
David Sehrd9810252015-10-16 13:23:17 -07003473 switch (Icmp->getCondition()) {
David Sehr5c875422015-10-15 10:38:53 -07003474 default:
3475 break;
3476 case InstIcmp::Uge:
David Sehre3984282015-12-15 17:34:55 -08003477 movOrConsumer(true, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003478 return;
3479 case InstIcmp::Ult:
David Sehre3984282015-12-15 17:34:55 -08003480 movOrConsumer(false, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003481 return;
3482 }
3483 }
John Porto7e93c622015-06-23 10:58:57 -07003484 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
3485 _cmp(Src0RM, Src1);
David Sehre3984282015-12-15 17:34:55 -08003486 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
3487 Consumer);
3488}
3489
John Porto4a566862016-01-04 09:33:41 -08003490template <typename TraitsType>
3491void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) {
David Sehre3984282015-12-15 17:34:55 -08003492 Operand *Src0 = legalize(Icmp->getSrc(0));
3493 Operand *Src1 = legalize(Icmp->getSrc(1));
3494 Variable *Dest = Icmp->getDest();
3495
3496 if (!isVectorType(Dest->getType()))
3497 llvm::report_fatal_error("Expected a vector compare");
3498
3499 Type Ty = Src0->getType();
3500 // Promote i1 vectors to 128 bit integer vector types.
3501 if (typeElementType(Ty) == IceType_i1) {
3502 Type NewTy = IceType_NUM;
3503 switch (Ty) {
3504 default:
3505 llvm::report_fatal_error("unexpected type");
3506 break;
3507 case IceType_v4i1:
3508 NewTy = IceType_v4i32;
3509 break;
3510 case IceType_v8i1:
3511 NewTy = IceType_v8i16;
3512 break;
3513 case IceType_v16i1:
3514 NewTy = IceType_v16i8;
3515 break;
3516 }
3517 Variable *NewSrc0 = Func->makeVariable(NewTy);
3518 Variable *NewSrc1 = Func->makeVariable(NewTy);
3519 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
3520 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
3521 Src0 = NewSrc0;
3522 Src1 = NewSrc1;
3523 Ty = NewTy;
3524 }
3525
3526 InstIcmp::ICond Condition = Icmp->getCondition();
3527
3528 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3529 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3530
3531 // SSE2 only has signed comparison operations. Transform unsigned inputs in
3532 // a manner that allows for the use of signed comparison operations by
3533 // flipping the high order bits.
3534 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
3535 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
3536 Variable *T0 = makeReg(Ty);
3537 Variable *T1 = makeReg(Ty);
3538 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
3539 _movp(T0, Src0RM);
3540 _pxor(T0, HighOrderBits);
3541 _movp(T1, Src1RM);
3542 _pxor(T1, HighOrderBits);
3543 Src0RM = T0;
3544 Src1RM = T1;
3545 }
3546
3547 Variable *T = makeReg(Ty);
3548 switch (Condition) {
3549 default:
3550 llvm_unreachable("unexpected condition");
3551 break;
3552 case InstIcmp::Eq: {
John Porto4a566862016-01-04 09:33:41 -08003553 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003554 Src1RM = legalizeToReg(Src1RM);
3555 _movp(T, Src0RM);
3556 _pcmpeq(T, Src1RM);
3557 } break;
3558 case InstIcmp::Ne: {
John Porto4a566862016-01-04 09:33:41 -08003559 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003560 Src1RM = legalizeToReg(Src1RM);
3561 _movp(T, Src0RM);
3562 _pcmpeq(T, Src1RM);
3563 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3564 _pxor(T, MinusOne);
3565 } break;
3566 case InstIcmp::Ugt:
3567 case InstIcmp::Sgt: {
John Porto4a566862016-01-04 09:33:41 -08003568 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003569 Src1RM = legalizeToReg(Src1RM);
3570 _movp(T, Src0RM);
3571 _pcmpgt(T, Src1RM);
3572 } break;
3573 case InstIcmp::Uge:
3574 case InstIcmp::Sge: {
3575 // !(Src1RM > Src0RM)
John Porto4a566862016-01-04 09:33:41 -08003576 if (llvm::isa<X86OperandMem>(Src0RM))
David Sehre3984282015-12-15 17:34:55 -08003577 Src0RM = legalizeToReg(Src0RM);
3578 _movp(T, Src1RM);
3579 _pcmpgt(T, Src0RM);
3580 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3581 _pxor(T, MinusOne);
3582 } break;
3583 case InstIcmp::Ult:
3584 case InstIcmp::Slt: {
John Porto4a566862016-01-04 09:33:41 -08003585 if (llvm::isa<X86OperandMem>(Src0RM))
David Sehre3984282015-12-15 17:34:55 -08003586 Src0RM = legalizeToReg(Src0RM);
3587 _movp(T, Src1RM);
3588 _pcmpgt(T, Src0RM);
3589 } break;
3590 case InstIcmp::Ule:
3591 case InstIcmp::Sle: {
3592 // !(Src0RM > Src1RM)
John Porto4a566862016-01-04 09:33:41 -08003593 if (llvm::isa<X86OperandMem>(Src1RM))
David Sehre3984282015-12-15 17:34:55 -08003594 Src1RM = legalizeToReg(Src1RM);
3595 _movp(T, Src0RM);
3596 _pcmpgt(T, Src1RM);
3597 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3598 _pxor(T, MinusOne);
3599 } break;
3600 }
3601
3602 _movp(Dest, T);
3603 eliminateNextVectorSextInstruction(Dest);
John Porto7e93c622015-06-23 10:58:57 -07003604}
3605
John Porto4a566862016-01-04 09:33:41 -08003606template <typename TraitsType>
John Porto1d235422015-08-12 12:37:53 -07003607template <typename T>
3608typename std::enable_if<!T::Is64Bit, void>::type
John Porto4a566862016-01-04 09:33:41 -08003609TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp,
3610 const Inst *Consumer) {
John Porto1d235422015-08-12 12:37:53 -07003611 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
David Sehrd9810252015-10-16 13:23:17 -07003612 Operand *Src0 = legalize(Icmp->getSrc(0));
3613 Operand *Src1 = legalize(Icmp->getSrc(1));
3614 Variable *Dest = Icmp->getDest();
3615 InstIcmp::ICond Condition = Icmp->getCondition();
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003616 assert(Condition < Traits::TableIcmp64Size);
David Sehr5c875422015-10-15 10:38:53 -07003617 Operand *Src0LoRM = nullptr;
3618 Operand *Src0HiRM = nullptr;
3619 // Legalize the portions of Src0 that are going to be needed.
3620 if (isZero(Src1)) {
3621 switch (Condition) {
3622 default:
3623 llvm_unreachable("unexpected condition");
3624 break;
3625 // These two are not optimized, so we fall through to the general case,
3626 // which needs the upper and lower halves legalized.
3627 case InstIcmp::Sgt:
3628 case InstIcmp::Sle:
Jim Stichnoth1fb030c2015-10-15 11:10:38 -07003629 // These four compare after performing an "or" of the high and low half, so
3630 // they need the upper and lower halves legalized.
David Sehr5c875422015-10-15 10:38:53 -07003631 case InstIcmp::Eq:
3632 case InstIcmp::Ule:
3633 case InstIcmp::Ne:
3634 case InstIcmp::Ugt:
3635 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3636 // These two test only the high half's sign bit, so they need only
3637 // the upper half legalized.
3638 case InstIcmp::Sge:
3639 case InstIcmp::Slt:
3640 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3641 break;
3642
3643 // These two move constants and hence need no legalization.
3644 case InstIcmp::Uge:
3645 case InstIcmp::Ult:
3646 break;
3647 }
3648 } else {
3649 Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3650 Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3651 }
3652 // Optimize comparisons with zero.
3653 if (isZero(Src1)) {
3654 Constant *SignMask = Ctx->getConstantInt32(0x80000000);
3655 Variable *Temp = nullptr;
3656 switch (Condition) {
3657 default:
3658 llvm_unreachable("unexpected condition");
3659 break;
3660 case InstIcmp::Eq:
3661 case InstIcmp::Ule:
David Sehraa0b1a12015-10-27 16:55:40 -07003662 // Mov Src0HiRM first, because it was legalized most recently, and will
3663 // sometimes avoid a move before the OR.
3664 _mov(Temp, Src0HiRM);
3665 _or(Temp, Src0LoRM);
John Porto1d937a82015-12-17 06:19:34 -08003666 Context.insert<InstFakeUse>(Temp);
David Sehre3984282015-12-15 17:34:55 -08003667 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003668 return;
3669 case InstIcmp::Ne:
3670 case InstIcmp::Ugt:
David Sehraa0b1a12015-10-27 16:55:40 -07003671 // Mov Src0HiRM first, because it was legalized most recently, and will
3672 // sometimes avoid a move before the OR.
3673 _mov(Temp, Src0HiRM);
3674 _or(Temp, Src0LoRM);
John Porto1d937a82015-12-17 06:19:34 -08003675 Context.insert<InstFakeUse>(Temp);
David Sehre3984282015-12-15 17:34:55 -08003676 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003677 return;
3678 case InstIcmp::Uge:
David Sehre3984282015-12-15 17:34:55 -08003679 movOrConsumer(true, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003680 return;
3681 case InstIcmp::Ult:
David Sehre3984282015-12-15 17:34:55 -08003682 movOrConsumer(false, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003683 return;
3684 case InstIcmp::Sgt:
3685 break;
3686 case InstIcmp::Sge:
3687 _test(Src0HiRM, SignMask);
David Sehre3984282015-12-15 17:34:55 -08003688 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003689 return;
3690 case InstIcmp::Slt:
3691 _test(Src0HiRM, SignMask);
David Sehre3984282015-12-15 17:34:55 -08003692 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
David Sehr5c875422015-10-15 10:38:53 -07003693 return;
3694 case InstIcmp::Sle:
3695 break;
3696 }
3697 }
3698 // Handle general compares.
3699 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3700 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
David Sehre3984282015-12-15 17:34:55 -08003701 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003702 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
3703 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
John Porto4a566862016-01-04 09:33:41 -08003704 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
3705 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
David Sehrd9810252015-10-16 13:23:17 -07003706 _mov(Dest, One);
3707 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003708 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3709 _br(Traits::TableIcmp64[Condition].C1, LabelTrue);
3710 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3711 _br(Traits::TableIcmp64[Condition].C2, LabelFalse);
David Sehrd9810252015-10-16 13:23:17 -07003712 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003713 _br(Traits::TableIcmp64[Condition].C3, LabelTrue);
David Sehrd9810252015-10-16 13:23:17 -07003714 Context.insert(LabelFalse);
David Sehre3984282015-12-15 17:34:55 -08003715 _redefined(_mov(Dest, Zero));
David Sehrd9810252015-10-16 13:23:17 -07003716 Context.insert(LabelTrue);
David Sehre3984282015-12-15 17:34:55 -08003717 return;
3718 }
3719 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrd9810252015-10-16 13:23:17 -07003720 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003721 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3722 _br(Traits::TableIcmp64[Condition].C1, Br->getTargetTrue());
3723 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3724 _br(Traits::TableIcmp64[Condition].C2, Br->getTargetFalse());
David Sehrd9810252015-10-16 13:23:17 -07003725 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003726 _br(Traits::TableIcmp64[Condition].C3, Br->getTargetTrue(),
David Sehrd9810252015-10-16 13:23:17 -07003727 Br->getTargetFalse());
David Sehre3984282015-12-15 17:34:55 -08003728 return;
David Sehrd9810252015-10-16 13:23:17 -07003729 }
David Sehre3984282015-12-15 17:34:55 -08003730 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3731 Operand *SrcT = Select->getTrueOperand();
3732 Operand *SrcF = Select->getFalseOperand();
3733 Variable *SelectDest = Select->getDest();
John Porto4a566862016-01-04 09:33:41 -08003734 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
3735 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
David Sehre3984282015-12-15 17:34:55 -08003736 lowerMove(SelectDest, SrcT, false);
3737 _cmp(Src0HiRM, Src1HiRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003738 if (Traits::TableIcmp64[Condition].C1 != Traits::Cond::Br_None)
3739 _br(Traits::TableIcmp64[Condition].C1, LabelTrue);
3740 if (Traits::TableIcmp64[Condition].C2 != Traits::Cond::Br_None)
3741 _br(Traits::TableIcmp64[Condition].C2, LabelFalse);
David Sehre3984282015-12-15 17:34:55 -08003742 _cmp(Src0LoRM, Src1LoRI);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003743 _br(Traits::TableIcmp64[Condition].C3, LabelTrue);
David Sehre3984282015-12-15 17:34:55 -08003744 Context.insert(LabelFalse);
3745 static constexpr bool IsRedefinition = true;
3746 lowerMove(SelectDest, SrcF, IsRedefinition);
3747 Context.insert(LabelTrue);
3748 return;
3749 }
3750 llvm::report_fatal_error("Unexpected consumer type");
David Sehrd9810252015-10-16 13:23:17 -07003751}
3752
John Porto4a566862016-01-04 09:33:41 -08003753template <typename TraitsType>
3754void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition,
3755 Variable *Dest,
3756 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003757 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003758 _setcc(Dest, Condition);
David Sehre3984282015-12-15 17:34:55 -08003759 return;
David Sehrd9810252015-10-16 13:23:17 -07003760 }
David Sehre3984282015-12-15 17:34:55 -08003761 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3762 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
3763 return;
3764 }
3765 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3766 Operand *SrcT = Select->getTrueOperand();
3767 Operand *SrcF = Select->getFalseOperand();
3768 Variable *SelectDest = Select->getDest();
3769 lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
3770 return;
3771 }
3772 llvm::report_fatal_error("Unexpected consumer type");
David Sehrd9810252015-10-16 13:23:17 -07003773}
3774
John Porto4a566862016-01-04 09:33:41 -08003775template <typename TraitsType>
3776void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest,
3777 const Inst *Consumer) {
David Sehre3984282015-12-15 17:34:55 -08003778 if (Consumer == nullptr) {
David Sehrd9810252015-10-16 13:23:17 -07003779 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
David Sehre3984282015-12-15 17:34:55 -08003780 return;
3781 }
3782 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
David Sehrd9810252015-10-16 13:23:17 -07003783 // TODO(sehr,stichnot): This could be done with a single unconditional
3784 // branch instruction, but subzero doesn't know how to handle the resulting
3785 // control flow graph changes now. Make it do so to eliminate mov and cmp.
3786 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
3787 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
3788 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
David Sehre3984282015-12-15 17:34:55 -08003789 return;
David Sehrd9810252015-10-16 13:23:17 -07003790 }
David Sehre3984282015-12-15 17:34:55 -08003791 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3792 Operand *Src = nullptr;
3793 if (IcmpResult) {
3794 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
3795 } else {
3796 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
3797 }
3798 Variable *SelectDest = Select->getDest();
3799 lowerMove(SelectDest, Src, false);
3800 return;
3801 }
3802 llvm::report_fatal_error("Unexpected consumer type");
John Porto1d235422015-08-12 12:37:53 -07003803}
3804
John Porto4a566862016-01-04 09:33:41 -08003805template <typename TraitsType>
3806void TargetX86Base<TraitsType>::lowerArithAndConsumer(
3807 const InstArithmetic *Arith, const Inst *Consumer) {
David Sehrdaf096c2015-11-11 10:56:58 -08003808 Variable *T = nullptr;
3809 Operand *Src0 = legalize(Arith->getSrc(0));
3810 Operand *Src1 = legalize(Arith->getSrc(1));
3811 Variable *Dest = Arith->getDest();
3812 switch (Arith->getOp()) {
3813 default:
3814 llvm_unreachable("arithmetic operator not AND or OR");
3815 break;
3816 case InstArithmetic::And:
3817 _mov(T, Src0);
3818 // Test cannot have an address in the second position. Since T is
3819 // guaranteed to be a register and Src1 could be a memory load, ensure
3820 // that the second argument is a register.
3821 if (llvm::isa<Constant>(Src1))
3822 _test(T, Src1);
3823 else
3824 _test(Src1, T);
3825 break;
3826 case InstArithmetic::Or:
3827 _mov(T, Src0);
3828 _or(T, Src1);
3829 break;
3830 }
David Sehre3984282015-12-15 17:34:55 -08003831
3832 if (Consumer == nullptr) {
3833 llvm::report_fatal_error("Expected a consumer instruction");
3834 }
3835 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
John Porto1d937a82015-12-17 06:19:34 -08003836 Context.insert<InstFakeUse>(T);
3837 Context.insert<InstFakeDef>(Dest);
David Sehre3984282015-12-15 17:34:55 -08003838 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3839 return;
3840 }
3841 llvm::report_fatal_error("Unexpected consumer type");
David Sehrdaf096c2015-11-11 10:56:58 -08003842}
3843
John Porto4a566862016-01-04 09:33:41 -08003844template <typename TraitsType>
3845void TargetX86Base<TraitsType>::lowerInsertElement(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003846 const InstInsertElement *Instr) {
3847 Operand *SourceVectNotLegalized = Instr->getSrc(0);
3848 Operand *ElementToInsertNotLegalized = Instr->getSrc(1);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08003849 auto *ElementIndex = llvm::dyn_cast<ConstantInteger32>(Instr->getSrc(2));
John Porto7e93c622015-06-23 10:58:57 -07003850 // Only constant indices are allowed in PNaCl IR.
3851 assert(ElementIndex);
3852 unsigned Index = ElementIndex->getValue();
3853 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3854
3855 Type Ty = SourceVectNotLegalized->getType();
3856 Type ElementTy = typeElementType(Ty);
3857 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3858
3859 if (ElementTy == IceType_i1) {
Andrew Scull57e12682015-09-16 11:30:19 -07003860 // Expand the element to the appropriate size for it to be inserted in the
3861 // vector.
John Porto5aeed952015-07-21 13:39:09 -07003862 Variable *Expanded = Func->makeVariable(InVectorElementTy);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08003863 auto *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3864 ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003865 lowerCast(Cast);
3866 ElementToInsertNotLegalized = Expanded;
3867 }
3868
3869 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
John Porto5d0acff2015-06-30 15:29:21 -07003870 InstructionSet >= Traits::SSE4_1) {
John Porto7e93c622015-06-23 10:58:57 -07003871 // Use insertps, pinsrb, pinsrw, or pinsrd.
3872 Operand *ElementRM =
3873 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3874 Operand *SourceVectRM =
3875 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3876 Variable *T = makeReg(Ty);
3877 _movp(T, SourceVectRM);
Jim Stichnothc59288b2015-11-09 11:38:40 -08003878 if (Ty == IceType_v4f32) {
John Porto7e93c622015-06-23 10:58:57 -07003879 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
Jim Stichnothc59288b2015-11-09 11:38:40 -08003880 } else {
3881 // For the pinsrb and pinsrw instructions, when the source operand is a
3882 // register, it must be a full r32 register like eax, and not ax/al/ah.
John Porto4a566862016-01-04 09:33:41 -08003883 // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for
3884 // the use
Jim Stichnothc59288b2015-11-09 11:38:40 -08003885 // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
3886 // validates that the original and base register encodings are the same.
3887 if (ElementRM->getType() == IceType_i8 &&
3888 llvm::isa<Variable>(ElementRM)) {
3889 // Don't use ah/bh/ch/dh for pinsrb.
3890 ElementRM = copyToReg8(ElementRM);
3891 }
John Porto7e93c622015-06-23 10:58:57 -07003892 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
Jim Stichnothc59288b2015-11-09 11:38:40 -08003893 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003894 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003895 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3896 // Use shufps or movss.
3897 Variable *ElementR = nullptr;
3898 Operand *SourceVectRM =
3899 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3900
3901 if (InVectorElementTy == IceType_f32) {
3902 // ElementR will be in an XMM register since it is floating point.
Andrew Scull97f460d2015-07-21 10:07:42 -07003903 ElementR = legalizeToReg(ElementToInsertNotLegalized);
John Porto7e93c622015-06-23 10:58:57 -07003904 } else {
3905 // Copy an integer to an XMM register.
3906 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3907 ElementR = makeReg(Ty);
3908 _movd(ElementR, T);
3909 }
3910
3911 if (Index == 0) {
3912 Variable *T = makeReg(Ty);
3913 _movp(T, SourceVectRM);
3914 _movss(T, ElementR);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003915 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003916 return;
3917 }
3918
Andrew Scull57e12682015-09-16 11:30:19 -07003919 // shufps treats the source and destination operands as vectors of four
3920 // doublewords. The destination's two high doublewords are selected from
3921 // the source operand and the two low doublewords are selected from the
3922 // (original value of) the destination operand. An insertelement operation
3923 // can be effected with a sequence of two shufps operations with
3924 // appropriate masks. In all cases below, Element[0] is being inserted into
3925 // SourceVectOperand. Indices are ordered from left to right.
John Porto7e93c622015-06-23 10:58:57 -07003926 //
3927 // insertelement into index 1 (result is stored in ElementR):
3928 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3929 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
3930 //
3931 // insertelement into index 2 (result is stored in T):
3932 // T := SourceVectRM
3933 // ElementR := ElementR[0, 0] T[0, 3]
3934 // T := T[0, 1] ElementR[0, 3]
3935 //
3936 // insertelement into index 3 (result is stored in T):
3937 // T := SourceVectRM
3938 // ElementR := ElementR[0, 0] T[0, 2]
3939 // T := T[0, 1] ElementR[3, 0]
3940 const unsigned char Mask1[3] = {0, 192, 128};
3941 const unsigned char Mask2[3] = {227, 196, 52};
3942
3943 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3944 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
3945
3946 if (Index == 1) {
3947 _shufps(ElementR, SourceVectRM, Mask1Constant);
3948 _shufps(ElementR, SourceVectRM, Mask2Constant);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003949 _movp(Instr->getDest(), ElementR);
John Porto7e93c622015-06-23 10:58:57 -07003950 } else {
3951 Variable *T = makeReg(Ty);
3952 _movp(T, SourceVectRM);
3953 _shufps(ElementR, T, Mask1Constant);
3954 _shufps(T, ElementR, Mask2Constant);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003955 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003956 }
3957 } else {
3958 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
Andrew Scull57e12682015-09-16 11:30:19 -07003959 // Spill the value to a stack slot and perform the insertion in memory.
John Porto7e93c622015-06-23 10:58:57 -07003960 //
Andrew Scull57e12682015-09-16 11:30:19 -07003961 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3962 // for legalizing to mem is implemented.
John Porto5aeed952015-07-21 13:39:09 -07003963 Variable *Slot = Func->makeVariable(Ty);
Andrew Scull11c9a322015-08-28 14:24:14 -07003964 Slot->setMustNotHaveReg();
Andrew Scull97f460d2015-07-21 10:07:42 -07003965 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
John Porto7e93c622015-06-23 10:58:57 -07003966
3967 // Compute the location of the position to insert in memory.
3968 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
John Porto4a566862016-01-04 09:33:41 -08003969 X86OperandMem *Loc =
John Porto7e93c622015-06-23 10:58:57 -07003970 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
Andrew Scull97f460d2015-07-21 10:07:42 -07003971 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
John Porto7e93c622015-06-23 10:58:57 -07003972
3973 Variable *T = makeReg(Ty);
3974 _movp(T, Slot);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08003975 _movp(Instr->getDest(), T);
John Porto7e93c622015-06-23 10:58:57 -07003976 }
3977}
3978
John Porto4a566862016-01-04 09:33:41 -08003979template <typename TraitsType>
3980void TargetX86Base<TraitsType>::lowerIntrinsicCall(
John Porto7e93c622015-06-23 10:58:57 -07003981 const InstIntrinsicCall *Instr) {
3982 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3983 case Intrinsics::AtomicCmpxchg: {
3984 if (!Intrinsics::isMemoryOrderValid(
3985 ID, getConstantMemoryOrder(Instr->getArg(3)),
3986 getConstantMemoryOrder(Instr->getArg(4)))) {
3987 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3988 return;
3989 }
3990 Variable *DestPrev = Instr->getDest();
Jan Voungfbdd2442015-07-15 12:36:20 -07003991 Operand *PtrToMem = legalize(Instr->getArg(0));
3992 Operand *Expected = legalize(Instr->getArg(1));
3993 Operand *Desired = legalize(Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07003994 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
3995 return;
3996 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
3997 return;
3998 }
3999 case Intrinsics::AtomicFence:
4000 if (!Intrinsics::isMemoryOrderValid(
4001 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
4002 Func->setError("Unexpected memory ordering for AtomicFence");
4003 return;
4004 }
4005 _mfence();
4006 return;
4007 case Intrinsics::AtomicFenceAll:
Andrew Scull57e12682015-09-16 11:30:19 -07004008 // NOTE: FenceAll should prevent and load/store from being moved across the
4009 // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
4010 // currently marked coarsely as "HasSideEffects".
John Porto7e93c622015-06-23 10:58:57 -07004011 _mfence();
4012 return;
4013 case Intrinsics::AtomicIsLockFree: {
4014 // X86 is always lock free for 8/16/32/64 bit accesses.
Andrew Scull57e12682015-09-16 11:30:19 -07004015 // TODO(jvoung): Since the result is constant when given a constant byte
4016 // size, this opens up DCE opportunities.
John Porto7e93c622015-06-23 10:58:57 -07004017 Operand *ByteSize = Instr->getArg(0);
4018 Variable *Dest = Instr->getDest();
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004019 if (auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
John Porto7e93c622015-06-23 10:58:57 -07004020 Constant *Result;
4021 switch (CI->getValue()) {
4022 default:
Andrew Scull57e12682015-09-16 11:30:19 -07004023 // Some x86-64 processors support the cmpxchg16b instruction, which can
4024 // make 16-byte operations lock free (when used with the LOCK prefix).
4025 // However, that's not supported in 32-bit mode, so just return 0 even
4026 // for large sizes.
John Porto7e93c622015-06-23 10:58:57 -07004027 Result = Ctx->getConstantZero(IceType_i32);
4028 break;
4029 case 1:
4030 case 2:
4031 case 4:
4032 case 8:
4033 Result = Ctx->getConstantInt32(1);
4034 break;
4035 }
4036 _mov(Dest, Result);
4037 return;
4038 }
4039 // The PNaCl ABI requires the byte size to be a compile-time constant.
4040 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4041 return;
4042 }
4043 case Intrinsics::AtomicLoad: {
Andrew Scull57e12682015-09-16 11:30:19 -07004044 // We require the memory address to be naturally aligned. Given that is the
4045 // case, then normal loads are atomic.
John Porto7e93c622015-06-23 10:58:57 -07004046 if (!Intrinsics::isMemoryOrderValid(
4047 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4048 Func->setError("Unexpected memory ordering for AtomicLoad");
4049 return;
4050 }
4051 Variable *Dest = Instr->getDest();
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004052 if (!Traits::Is64Bit) {
4053 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) {
4054 // Follow what GCC does and use a movq instead of what lowerLoad()
4055 // normally does (split the load into two). Thus, this skips
4056 // load/arithmetic op folding. Load/arithmetic folding can't happen
4057 // anyway, since this is x86-32 and integer arithmetic only happens on
4058 // 32-bit quantities.
4059 Variable *T = makeReg(IceType_f64);
John Porto4a566862016-01-04 09:33:41 -08004060 X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004061 _movq(T, Addr);
4062 // Then cast the bits back out of the XMM register to the i64 Dest.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004063 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004064 lowerCast(Cast);
4065 // Make sure that the atomic load isn't elided when unused.
John Porto1d937a82015-12-17 06:19:34 -08004066 Context.insert<InstFakeUse>(Dest64On32->getLo());
4067 Context.insert<InstFakeUse>(Dest64On32->getHi());
Andrew Scull6d47bcd2015-09-17 17:10:05 -07004068 return;
4069 }
John Porto7e93c622015-06-23 10:58:57 -07004070 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004071 auto *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
John Porto7e93c622015-06-23 10:58:57 -07004072 lowerLoad(Load);
4073 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
Andrew Scull57e12682015-09-16 11:30:19 -07004074 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
4075 // the FakeUse on the last-inserted instruction's dest.
John Porto1d937a82015-12-17 06:19:34 -08004076 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
John Porto7e93c622015-06-23 10:58:57 -07004077 return;
4078 }
4079 case Intrinsics::AtomicRMW:
4080 if (!Intrinsics::isMemoryOrderValid(
4081 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4082 Func->setError("Unexpected memory ordering for AtomicRMW");
4083 return;
4084 }
Jim Stichnoth20b71f52015-06-24 15:52:24 -07004085 lowerAtomicRMW(
4086 Instr->getDest(),
4087 static_cast<uint32_t>(
4088 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
4089 Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004090 return;
4091 case Intrinsics::AtomicStore: {
4092 if (!Intrinsics::isMemoryOrderValid(
4093 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4094 Func->setError("Unexpected memory ordering for AtomicStore");
4095 return;
4096 }
Andrew Scull57e12682015-09-16 11:30:19 -07004097 // We require the memory address to be naturally aligned. Given that is the
4098 // case, then normal stores are atomic. Add a fence after the store to make
4099 // it visible.
John Porto7e93c622015-06-23 10:58:57 -07004100 Operand *Value = Instr->getArg(0);
4101 Operand *Ptr = Instr->getArg(1);
John Porto1d235422015-08-12 12:37:53 -07004102 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
Andrew Scull57e12682015-09-16 11:30:19 -07004103 // Use a movq instead of what lowerStore() normally does (split the store
4104 // into two), following what GCC does. Cast the bits from int -> to an
4105 // xmm register first.
John Porto7e93c622015-06-23 10:58:57 -07004106 Variable *T = makeReg(IceType_f64);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004107 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
John Porto7e93c622015-06-23 10:58:57 -07004108 lowerCast(Cast);
4109 // Then store XMM w/ a movq.
John Porto4a566862016-01-04 09:33:41 -08004110 X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64);
John Porto7e93c622015-06-23 10:58:57 -07004111 _storeq(T, Addr);
4112 _mfence();
4113 return;
4114 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004115 auto *Store = InstStore::create(Func, Value, Ptr);
John Porto7e93c622015-06-23 10:58:57 -07004116 lowerStore(Store);
4117 _mfence();
4118 return;
4119 }
4120 case Intrinsics::Bswap: {
4121 Variable *Dest = Instr->getDest();
4122 Operand *Val = Instr->getArg(0);
Andrew Scull57e12682015-09-16 11:30:19 -07004123 // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
4124 // must be a register. Use rotate left for 16-bit bswap.
John Porto1d235422015-08-12 12:37:53 -07004125 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07004126 Val = legalizeUndef(Val);
Andrew Scull97f460d2015-07-21 10:07:42 -07004127 Variable *T_Lo = legalizeToReg(loOperand(Val));
4128 Variable *T_Hi = legalizeToReg(hiOperand(Val));
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004129 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4130 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004131 _bswap(T_Lo);
4132 _bswap(T_Hi);
4133 _mov(DestLo, T_Hi);
4134 _mov(DestHi, T_Lo);
John Porto1d235422015-08-12 12:37:53 -07004135 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
4136 Val->getType() == IceType_i32) {
Andrew Scull97f460d2015-07-21 10:07:42 -07004137 Variable *T = legalizeToReg(Val);
John Porto7e93c622015-06-23 10:58:57 -07004138 _bswap(T);
4139 _mov(Dest, T);
4140 } else {
4141 assert(Val->getType() == IceType_i16);
John Porto7e93c622015-06-23 10:58:57 -07004142 Constant *Eight = Ctx->getConstantInt16(8);
4143 Variable *T = nullptr;
Jan Voungfbdd2442015-07-15 12:36:20 -07004144 Val = legalize(Val);
John Porto7e93c622015-06-23 10:58:57 -07004145 _mov(T, Val);
4146 _rol(T, Eight);
4147 _mov(Dest, T);
4148 }
4149 return;
4150 }
4151 case Intrinsics::Ctpop: {
4152 Variable *Dest = Instr->getDest();
John Porto1d235422015-08-12 12:37:53 -07004153 Variable *T = nullptr;
John Porto7e93c622015-06-23 10:58:57 -07004154 Operand *Val = Instr->getArg(0);
John Porto1d235422015-08-12 12:37:53 -07004155 Type ValTy = Val->getType();
4156 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
4157
4158 if (!Traits::Is64Bit) {
4159 T = Dest;
4160 } else {
4161 T = makeReg(IceType_i64);
4162 if (ValTy == IceType_i32) {
4163 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
4164 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
4165 // ensure we will not have any bits set on Val's upper 32 bits.
4166 Variable *V = makeReg(IceType_i64);
4167 _movzx(V, Val);
4168 Val = V;
4169 }
4170 ValTy = IceType_i64;
4171 }
4172
Karl Schimpf20070e82016-03-17 13:30:13 -07004173 InstCall *Call =
4174 makeHelperCall(ValTy == IceType_i32 ? RuntimeHelper::H_call_ctpop_i32
4175 : RuntimeHelper::H_call_ctpop_i64,
4176 T, 1);
John Porto7e93c622015-06-23 10:58:57 -07004177 Call->addArg(Val);
4178 lowerCall(Call);
4179 // The popcount helpers always return 32-bit values, while the intrinsic's
4180 // signature matches the native POPCNT instruction and fills a 64-bit reg
4181 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
4182 // the user doesn't do that in the IR. If the user does that in the IR,
4183 // then this zero'ing instruction is dead and gets optimized out.
John Porto1d235422015-08-12 12:37:53 -07004184 if (!Traits::Is64Bit) {
4185 assert(T == Dest);
4186 if (Val->getType() == IceType_i64) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004187 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto1d235422015-08-12 12:37:53 -07004188 Constant *Zero = Ctx->getConstantZero(IceType_i32);
4189 _mov(DestHi, Zero);
4190 }
4191 } else {
4192 assert(Val->getType() == IceType_i64);
4193 // T is 64 bit. It needs to be copied to dest. We need to:
4194 //
4195 // T_1.32 = trunc T.64 to i32
4196 // T_2.64 = zext T_1.32 to i64
4197 // Dest.<<right_size>> = T_2.<<right_size>>
4198 //
4199 // which ensures the upper 32 bits will always be cleared. Just doing a
4200 //
4201 // mov Dest.32 = trunc T.32 to i32
4202 //
4203 // is dangerous because there's a chance the compiler will optimize this
4204 // copy out. To use _movzx we need two new registers (one 32-, and
4205 // another 64-bit wide.)
4206 Variable *T_1 = makeReg(IceType_i32);
4207 _mov(T_1, T);
4208 Variable *T_2 = makeReg(IceType_i64);
4209 _movzx(T_2, T_1);
4210 _mov(Dest, T_2);
John Porto7e93c622015-06-23 10:58:57 -07004211 }
4212 return;
4213 }
4214 case Intrinsics::Ctlz: {
Andrew Scull57e12682015-09-16 11:30:19 -07004215 // The "is zero undef" parameter is ignored and we always return a
4216 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07004217 Operand *Val = legalize(Instr->getArg(0));
4218 Operand *FirstVal;
4219 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07004220 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004221 FirstVal = loOperand(Val);
4222 SecondVal = hiOperand(Val);
4223 } else {
4224 FirstVal = Val;
4225 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004226 constexpr bool IsCttz = false;
John Porto7e93c622015-06-23 10:58:57 -07004227 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
4228 SecondVal);
4229 return;
4230 }
4231 case Intrinsics::Cttz: {
Andrew Scull57e12682015-09-16 11:30:19 -07004232 // The "is zero undef" parameter is ignored and we always return a
4233 // well-defined value.
John Porto7e93c622015-06-23 10:58:57 -07004234 Operand *Val = legalize(Instr->getArg(0));
4235 Operand *FirstVal;
4236 Operand *SecondVal = nullptr;
John Porto1d235422015-08-12 12:37:53 -07004237 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004238 FirstVal = hiOperand(Val);
4239 SecondVal = loOperand(Val);
4240 } else {
4241 FirstVal = Val;
4242 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004243 constexpr bool IsCttz = true;
John Porto7e93c622015-06-23 10:58:57 -07004244 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
4245 SecondVal);
4246 return;
4247 }
4248 case Intrinsics::Fabs: {
4249 Operand *Src = legalize(Instr->getArg(0));
4250 Type Ty = Src->getType();
4251 Variable *Dest = Instr->getDest();
4252 Variable *T = makeVectorOfFabsMask(Ty);
Andrew Scull57e12682015-09-16 11:30:19 -07004253 // The pand instruction operates on an m128 memory operand, so if Src is an
4254 // f32 or f64, we need to make sure it's in a register.
John Porto7e93c622015-06-23 10:58:57 -07004255 if (isVectorType(Ty)) {
John Porto4a566862016-01-04 09:33:41 -08004256 if (llvm::isa<X86OperandMem>(Src))
Andrew Scull97f460d2015-07-21 10:07:42 -07004257 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07004258 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07004259 Src = legalizeToReg(Src);
John Porto7e93c622015-06-23 10:58:57 -07004260 }
4261 _pand(T, Src);
4262 if (isVectorType(Ty))
4263 _movp(Dest, T);
4264 else
4265 _mov(Dest, T);
4266 return;
4267 }
4268 case Intrinsics::Longjmp: {
Karl Schimpf20070e82016-03-17 13:30:13 -07004269 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_longjmp, nullptr, 2);
John Porto7e93c622015-06-23 10:58:57 -07004270 Call->addArg(Instr->getArg(0));
4271 Call->addArg(Instr->getArg(1));
4272 lowerCall(Call);
4273 return;
4274 }
4275 case Intrinsics::Memcpy: {
Andrew Scull9df4a372015-08-07 09:19:35 -07004276 lowerMemcpy(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004277 return;
4278 }
4279 case Intrinsics::Memmove: {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004280 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004281 return;
4282 }
4283 case Intrinsics::Memset: {
Andrew Scull713dbde2015-08-04 14:25:27 -07004284 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
John Porto7e93c622015-06-23 10:58:57 -07004285 return;
4286 }
4287 case Intrinsics::NaClReadTP: {
John Porto56958cb2016-01-14 09:18:18 -08004288 if (NeedSandboxing) {
John Porto4a566862016-01-04 09:33:41 -08004289 Operand *Src =
4290 dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand);
John Porto7e93c622015-06-23 10:58:57 -07004291 Variable *Dest = Instr->getDest();
4292 Variable *T = nullptr;
4293 _mov(T, Src);
4294 _mov(Dest, T);
4295 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07004296 InstCall *Call =
4297 makeHelperCall(RuntimeHelper::H_call_read_tp, Instr->getDest(), 0);
John Porto7e93c622015-06-23 10:58:57 -07004298 lowerCall(Call);
4299 }
4300 return;
4301 }
4302 case Intrinsics::Setjmp: {
Karl Schimpf20070e82016-03-17 13:30:13 -07004303 InstCall *Call =
4304 makeHelperCall(RuntimeHelper::H_call_setjmp, Instr->getDest(), 1);
John Porto7e93c622015-06-23 10:58:57 -07004305 Call->addArg(Instr->getArg(0));
4306 lowerCall(Call);
4307 return;
4308 }
4309 case Intrinsics::Sqrt: {
4310 Operand *Src = legalize(Instr->getArg(0));
4311 Variable *Dest = Instr->getDest();
4312 Variable *T = makeReg(Dest->getType());
4313 _sqrtss(T, Src);
4314 _mov(Dest, T);
4315 return;
4316 }
4317 case Intrinsics::Stacksave: {
John Porto56958cb2016-01-14 09:18:18 -08004318 if (!Traits::Is64Bit || !NeedSandboxing) {
4319 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg(),
4320 Traits::WordType);
4321 Variable *Dest = Instr->getDest();
4322 _mov(Dest, esp);
4323 return;
4324 }
4325 Variable *esp = Func->getTarget()->getPhysicalRegister(
4326 Traits::RegisterSet::Reg_esp, IceType_i32);
John Porto7e93c622015-06-23 10:58:57 -07004327 Variable *Dest = Instr->getDest();
4328 _mov(Dest, esp);
John Porto56958cb2016-01-14 09:18:18 -08004329
John Porto7e93c622015-06-23 10:58:57 -07004330 return;
4331 }
4332 case Intrinsics::Stackrestore: {
John Porto008f4ce2015-12-24 13:22:18 -08004333 Operand *Src = Instr->getArg(0);
John Porto56958cb2016-01-14 09:18:18 -08004334 _mov_sp(Src);
John Porto7e93c622015-06-23 10:58:57 -07004335 return;
4336 }
John Porto56958cb2016-01-14 09:18:18 -08004337
John Porto7e93c622015-06-23 10:58:57 -07004338 case Intrinsics::Trap:
4339 _ud2();
4340 return;
4341 case Intrinsics::UnknownIntrinsic:
4342 Func->setError("Should not be lowering UnknownIntrinsic");
4343 return;
4344 }
4345 return;
4346}
4347
John Porto4a566862016-01-04 09:33:41 -08004348template <typename TraitsType>
4349void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev,
4350 Operand *Ptr,
4351 Operand *Expected,
4352 Operand *Desired) {
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004353 Type Ty = Expected->getType();
4354 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004355 // Reserve the pre-colored registers first, before adding any more
4356 // infinite-weight variables from formMemoryOperand's legalization.
John Porto5d0acff2015-06-30 15:29:21 -07004357 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
4358 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
4359 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
4360 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto7e93c622015-06-23 10:58:57 -07004361 _mov(T_eax, loOperand(Expected));
4362 _mov(T_edx, hiOperand(Expected));
4363 _mov(T_ebx, loOperand(Desired));
4364 _mov(T_ecx, hiOperand(Desired));
John Porto4a566862016-01-04 09:33:41 -08004365 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004366 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004367 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004368 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
4369 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
John Porto7e93c622015-06-23 10:58:57 -07004370 _mov(DestLo, T_eax);
4371 _mov(DestHi, T_edx);
4372 return;
4373 }
Jim Stichnoth8aa39662016-02-10 11:20:30 -08004374 RegNumT Eax;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004375 switch (Ty) {
4376 default:
John Porto3c275ce2015-12-22 08:14:00 -08004377 llvm::report_fatal_error("Bad type for cmpxchg");
4378 case IceType_i64:
4379 Eax = Traits::getRaxOrDie();
4380 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004381 case IceType_i32:
4382 Eax = Traits::RegisterSet::Reg_eax;
4383 break;
4384 case IceType_i16:
4385 Eax = Traits::RegisterSet::Reg_ax;
4386 break;
4387 case IceType_i8:
4388 Eax = Traits::RegisterSet::Reg_al;
4389 break;
4390 }
4391 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07004392 _mov(T_eax, Expected);
John Porto4a566862016-01-04 09:33:41 -08004393 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Andrew Scull97f460d2015-07-21 10:07:42 -07004394 Variable *DesiredReg = legalizeToReg(Desired);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004395 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004396 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
4397 _mov(DestPrev, T_eax);
4398}
4399
John Porto4a566862016-01-04 09:33:41 -08004400template <typename TraitsType>
4401bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
4402 Operand *PtrToMem,
4403 Operand *Expected,
4404 Operand *Desired) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07004405 if (Func->getOptLevel() == Opt_m1)
John Porto7e93c622015-06-23 10:58:57 -07004406 return false;
4407 // Peek ahead a few instructions and see how Dest is used.
4408 // It's very common to have:
4409 //
4410 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
4411 // [%y_phi = ...] // list of phi stores
4412 // %p = icmp eq i32 %x, %expected
4413 // br i1 %p, label %l1, label %l2
4414 //
4415 // which we can optimize into:
4416 //
4417 // %x = <cmpxchg code>
4418 // [%y_phi = ...] // list of phi stores
4419 // br eq, %l1, %l2
4420 InstList::iterator I = Context.getCur();
4421 // I is currently the InstIntrinsicCall. Peek past that.
4422 // This assumes that the atomic cmpxchg has not been lowered yet,
4423 // so that the instructions seen in the scan from "Cur" is simple.
4424 assert(llvm::isa<InstIntrinsicCall>(*I));
4425 Inst *NextInst = Context.getNextInst(I);
4426 if (!NextInst)
4427 return false;
4428 // There might be phi assignments right before the compare+branch, since this
4429 // could be a backward branch for a loop. This placement of assignments is
4430 // determined by placePhiStores().
John Portoe82b5602016-02-24 15:58:55 -08004431 CfgVector<InstAssign *> PhiAssigns;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004432 while (auto *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004433 if (PhiAssign->getDest() == Dest)
4434 return false;
4435 PhiAssigns.push_back(PhiAssign);
4436 NextInst = Context.getNextInst(I);
4437 if (!NextInst)
4438 return false;
4439 }
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004440 if (auto *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004441 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
4442 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
4443 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
4444 return false;
4445 }
4446 NextInst = Context.getNextInst(I);
4447 if (!NextInst)
4448 return false;
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004449 if (auto *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
John Porto7e93c622015-06-23 10:58:57 -07004450 if (!NextBr->isUnconditional() &&
4451 NextCmp->getDest() == NextBr->getCondition() &&
4452 NextBr->isLastUse(NextCmp->getDest())) {
4453 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
4454 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
4455 // Lower the phi assignments now, before the branch (same placement
4456 // as before).
4457 InstAssign *PhiAssign = PhiAssigns[i];
4458 PhiAssign->setDeleted();
4459 lowerAssign(PhiAssign);
4460 Context.advanceNext();
4461 }
John Porto5d0acff2015-06-30 15:29:21 -07004462 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
4463 NextBr->getTargetFalse());
John Porto7e93c622015-06-23 10:58:57 -07004464 // Skip over the old compare and branch, by deleting them.
4465 NextCmp->setDeleted();
4466 NextBr->setDeleted();
4467 Context.advanceNext();
4468 Context.advanceNext();
4469 return true;
4470 }
4471 }
4472 }
4473 return false;
4474}
4475
John Porto4a566862016-01-04 09:33:41 -08004476template <typename TraitsType>
4477void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest,
4478 uint32_t Operation, Operand *Ptr,
4479 Operand *Val) {
John Porto7e93c622015-06-23 10:58:57 -07004480 bool NeedsCmpxchg = false;
4481 LowerBinOp Op_Lo = nullptr;
4482 LowerBinOp Op_Hi = nullptr;
4483 switch (Operation) {
4484 default:
4485 Func->setError("Unknown AtomicRMW operation");
4486 return;
4487 case Intrinsics::AtomicAdd: {
John Porto1d235422015-08-12 12:37:53 -07004488 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004489 // All the fall-through paths must set this to true, but use this
4490 // for asserting.
4491 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004492 Op_Lo = &TargetX86Base<TraitsType>::_add;
4493 Op_Hi = &TargetX86Base<TraitsType>::_adc;
John Porto7e93c622015-06-23 10:58:57 -07004494 break;
4495 }
John Porto4a566862016-01-04 09:33:41 -08004496 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004497 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004498 Variable *T = nullptr;
4499 _mov(T, Val);
4500 _xadd(Addr, T, Locked);
4501 _mov(Dest, T);
4502 return;
4503 }
4504 case Intrinsics::AtomicSub: {
John Porto1d235422015-08-12 12:37:53 -07004505 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004506 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004507 Op_Lo = &TargetX86Base<TraitsType>::_sub;
4508 Op_Hi = &TargetX86Base<TraitsType>::_sbb;
John Porto7e93c622015-06-23 10:58:57 -07004509 break;
4510 }
John Porto4a566862016-01-04 09:33:41 -08004511 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004512 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004513 Variable *T = nullptr;
4514 _mov(T, Val);
4515 _neg(T);
4516 _xadd(Addr, T, Locked);
4517 _mov(Dest, T);
4518 return;
4519 }
4520 case Intrinsics::AtomicOr:
4521 // TODO(jvoung): If Dest is null or dead, then some of these
4522 // operations do not need an "exchange", but just a locked op.
4523 // That appears to be "worth" it for sub, or, and, and xor.
4524 // xadd is probably fine vs lock add for add, and xchg is fine
4525 // vs an atomic store.
4526 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004527 Op_Lo = &TargetX86Base<TraitsType>::_or;
4528 Op_Hi = &TargetX86Base<TraitsType>::_or;
John Porto7e93c622015-06-23 10:58:57 -07004529 break;
4530 case Intrinsics::AtomicAnd:
4531 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004532 Op_Lo = &TargetX86Base<TraitsType>::_and;
4533 Op_Hi = &TargetX86Base<TraitsType>::_and;
John Porto7e93c622015-06-23 10:58:57 -07004534 break;
4535 case Intrinsics::AtomicXor:
4536 NeedsCmpxchg = true;
John Porto4a566862016-01-04 09:33:41 -08004537 Op_Lo = &TargetX86Base<TraitsType>::_xor;
4538 Op_Hi = &TargetX86Base<TraitsType>::_xor;
John Porto7e93c622015-06-23 10:58:57 -07004539 break;
4540 case Intrinsics::AtomicExchange:
John Porto1d235422015-08-12 12:37:53 -07004541 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
John Porto7e93c622015-06-23 10:58:57 -07004542 NeedsCmpxchg = true;
4543 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
4544 // just need to be moved to the ecx and ebx registers.
4545 Op_Lo = nullptr;
4546 Op_Hi = nullptr;
4547 break;
4548 }
John Porto4a566862016-01-04 09:33:41 -08004549 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
John Porto7e93c622015-06-23 10:58:57 -07004550 Variable *T = nullptr;
4551 _mov(T, Val);
4552 _xchg(Addr, T);
4553 _mov(Dest, T);
4554 return;
4555 }
4556 // Otherwise, we need a cmpxchg loop.
4557 (void)NeedsCmpxchg;
4558 assert(NeedsCmpxchg);
4559 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
4560}
4561
John Porto4a566862016-01-04 09:33:41 -08004562template <typename TraitsType>
4563void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
4564 LowerBinOp Op_Hi,
4565 Variable *Dest,
4566 Operand *Ptr,
4567 Operand *Val) {
John Porto7e93c622015-06-23 10:58:57 -07004568 // Expand a more complex RMW operation as a cmpxchg loop:
4569 // For 64-bit:
4570 // mov eax, [ptr]
4571 // mov edx, [ptr + 4]
4572 // .LABEL:
4573 // mov ebx, eax
4574 // <Op_Lo> ebx, <desired_adj_lo>
4575 // mov ecx, edx
4576 // <Op_Hi> ecx, <desired_adj_hi>
4577 // lock cmpxchg8b [ptr]
4578 // jne .LABEL
4579 // mov <dest_lo>, eax
4580 // mov <dest_lo>, edx
4581 //
4582 // For 32-bit:
4583 // mov eax, [ptr]
4584 // .LABEL:
4585 // mov <reg>, eax
4586 // op <reg>, [desired_adj]
4587 // lock cmpxchg [ptr], <reg>
4588 // jne .LABEL
4589 // mov <dest>, eax
4590 //
4591 // If Op_{Lo,Hi} are nullptr, then just copy the value.
4592 Val = legalize(Val);
4593 Type Ty = Val->getType();
John Porto1d235422015-08-12 12:37:53 -07004594 if (!Traits::Is64Bit && Ty == IceType_i64) {
John Porto5d0acff2015-06-30 15:29:21 -07004595 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
4596 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
John Porto4a566862016-01-04 09:33:41 -08004597 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
John Porto7e93c622015-06-23 10:58:57 -07004598 _mov(T_eax, loOperand(Addr));
4599 _mov(T_edx, hiOperand(Addr));
John Porto5d0acff2015-06-30 15:29:21 -07004600 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
4601 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
John Porto4a566862016-01-04 09:33:41 -08004602 InstX86Label *Label = InstX86Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07004603 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
4604 if (!IsXchg8b) {
4605 Context.insert(Label);
4606 _mov(T_ebx, T_eax);
4607 (this->*Op_Lo)(T_ebx, loOperand(Val));
4608 _mov(T_ecx, T_edx);
4609 (this->*Op_Hi)(T_ecx, hiOperand(Val));
4610 } else {
4611 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
4612 // It just needs the Val loaded into ebx and ecx.
4613 // That can also be done before the loop.
4614 _mov(T_ebx, loOperand(Val));
4615 _mov(T_ecx, hiOperand(Val));
4616 Context.insert(Label);
4617 }
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004618 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004619 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07004620 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07004621 if (!IsXchg8b) {
4622 // If Val is a variable, model the extended live range of Val through
4623 // the end of the loop, since it will be re-used by the loop.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004624 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
4625 auto *ValLo = llvm::cast<Variable>(loOperand(ValVar));
4626 auto *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
John Porto1d937a82015-12-17 06:19:34 -08004627 Context.insert<InstFakeUse>(ValLo);
4628 Context.insert<InstFakeUse>(ValHi);
John Porto7e93c622015-06-23 10:58:57 -07004629 }
4630 } else {
4631 // For xchg, the loop is slightly smaller and ebx/ecx are used.
John Porto1d937a82015-12-17 06:19:34 -08004632 Context.insert<InstFakeUse>(T_ebx);
4633 Context.insert<InstFakeUse>(T_ecx);
John Porto7e93c622015-06-23 10:58:57 -07004634 }
4635 // The address base (if any) is also reused in the loop.
4636 if (Variable *Base = Addr->getBase())
John Porto1d937a82015-12-17 06:19:34 -08004637 Context.insert<InstFakeUse>(Base);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004638 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4639 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004640 _mov(DestLo, T_eax);
4641 _mov(DestHi, T_edx);
4642 return;
4643 }
John Porto4a566862016-01-04 09:33:41 -08004644 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
Jim Stichnoth8aa39662016-02-10 11:20:30 -08004645 RegNumT Eax;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004646 switch (Ty) {
4647 default:
John Porto3c275ce2015-12-22 08:14:00 -08004648 llvm::report_fatal_error("Bad type for atomicRMW");
4649 case IceType_i64:
4650 Eax = Traits::getRaxOrDie();
4651 break;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004652 case IceType_i32:
4653 Eax = Traits::RegisterSet::Reg_eax;
4654 break;
4655 case IceType_i16:
4656 Eax = Traits::RegisterSet::Reg_ax;
4657 break;
4658 case IceType_i8:
4659 Eax = Traits::RegisterSet::Reg_al;
4660 break;
4661 }
4662 Variable *T_eax = makeReg(Ty, Eax);
John Porto7e93c622015-06-23 10:58:57 -07004663 _mov(T_eax, Addr);
John Porto4a566862016-01-04 09:33:41 -08004664 auto *Label = Context.insert<InstX86Label>(this);
John Porto7e93c622015-06-23 10:58:57 -07004665 // We want to pick a different register for T than Eax, so don't use
4666 // _mov(T == nullptr, T_eax).
4667 Variable *T = makeReg(Ty);
4668 _mov(T, T_eax);
4669 (this->*Op_Lo)(T, Val);
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07004670 constexpr bool Locked = true;
John Porto7e93c622015-06-23 10:58:57 -07004671 _cmpxchg(Addr, T_eax, T, Locked);
John Porto5d0acff2015-06-30 15:29:21 -07004672 _br(Traits::Cond::Br_ne, Label);
John Porto7e93c622015-06-23 10:58:57 -07004673 // If Val is a variable, model the extended live range of Val through
4674 // the end of the loop, since it will be re-used by the loop.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004675 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
John Porto1d937a82015-12-17 06:19:34 -08004676 Context.insert<InstFakeUse>(ValVar);
John Porto7e93c622015-06-23 10:58:57 -07004677 }
4678 // The address base (if any) is also reused in the loop.
4679 if (Variable *Base = Addr->getBase())
John Porto1d937a82015-12-17 06:19:34 -08004680 Context.insert<InstFakeUse>(Base);
John Porto7e93c622015-06-23 10:58:57 -07004681 _mov(Dest, T_eax);
4682}
4683
Andrew Scull9612d322015-07-06 14:53:25 -07004684/// Lowers count {trailing, leading} zeros intrinsic.
4685///
4686/// We could do constant folding here, but that should have
4687/// been done by the front-end/middle-end optimizations.
John Porto4a566862016-01-04 09:33:41 -08004688template <typename TraitsType>
4689void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty,
4690 Variable *Dest,
4691 Operand *FirstVal,
4692 Operand *SecondVal) {
John Porto7e93c622015-06-23 10:58:57 -07004693 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
4694 // Then the instructions will handle the Val == 0 case much more simply
4695 // and won't require conversion from bit position to number of zeros.
4696 //
4697 // Otherwise:
4698 // bsr IF_NOT_ZERO, Val
John Porto34d276a2016-01-27 06:31:53 -08004699 // mov T_DEST, ((Ty == i32) ? 63 : 127)
John Porto7e93c622015-06-23 10:58:57 -07004700 // cmovne T_DEST, IF_NOT_ZERO
John Porto34d276a2016-01-27 06:31:53 -08004701 // xor T_DEST, ((Ty == i32) ? 31 : 63)
John Porto7e93c622015-06-23 10:58:57 -07004702 // mov DEST, T_DEST
4703 //
4704 // NOTE: T_DEST must be a register because cmov requires its dest to be a
4705 // register. Also, bsf and bsr require their dest to be a register.
4706 //
John Porto34d276a2016-01-27 06:31:53 -08004707 // The xor DEST, C(31|63) converts a bit position to # of leading zeroes.
John Porto7e93c622015-06-23 10:58:57 -07004708 // E.g., for 000... 00001100, bsr will say that the most significant bit
4709 // set is at position 3, while the number of leading zeros is 28. Xor is
John Porto34d276a2016-01-27 06:31:53 -08004710 // like (M - N) for N <= M, and converts 63 to 32, and 127 to 64 (for the
4711 // all-zeros case).
John Porto7e93c622015-06-23 10:58:57 -07004712 //
John Porto34d276a2016-01-27 06:31:53 -08004713 // X8632 only: Similar for 64-bit, but start w/ speculating that the upper 32
4714 // bits are all zero, and compute the result for that case (checking the
4715 // lower 32 bits). Then actually compute the result for the upper bits and
John Porto7e93c622015-06-23 10:58:57 -07004716 // cmov in the result from the lower computation if the earlier speculation
4717 // was correct.
4718 //
4719 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
4720 // bit position conversion, and the speculation is reversed.
John Porto34d276a2016-01-27 06:31:53 -08004721
4722 // TODO(jpp): refactor this method.
John Porto7e93c622015-06-23 10:58:57 -07004723 assert(Ty == IceType_i32 || Ty == IceType_i64);
John Porto3c275ce2015-12-22 08:14:00 -08004724 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32;
4725 Variable *T = makeReg(DestTy);
John Porto7e93c622015-06-23 10:58:57 -07004726 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
4727 if (Cttz) {
4728 _bsf(T, FirstValRM);
4729 } else {
4730 _bsr(T, FirstValRM);
4731 }
John Porto3c275ce2015-12-22 08:14:00 -08004732 Variable *T_Dest = makeReg(DestTy);
4733 Constant *_31 = Ctx->getConstantInt32(31);
4734 Constant *_32 = Ctx->getConstantInt(DestTy, 32);
John Porto34d276a2016-01-27 06:31:53 -08004735 Constant *_63 = Ctx->getConstantInt(DestTy, 63);
4736 Constant *_64 = Ctx->getConstantInt(DestTy, 64);
John Porto7e93c622015-06-23 10:58:57 -07004737 if (Cttz) {
John Porto34d276a2016-01-27 06:31:53 -08004738 if (DestTy == IceType_i64) {
4739 _mov(T_Dest, _64);
4740 } else {
4741 _mov(T_Dest, _32);
4742 }
John Porto7e93c622015-06-23 10:58:57 -07004743 } else {
John Porto34d276a2016-01-27 06:31:53 -08004744 Constant *_127 = Ctx->getConstantInt(DestTy, 127);
4745 if (DestTy == IceType_i64) {
4746 _mov(T_Dest, _127);
4747 } else {
4748 _mov(T_Dest, _63);
4749 }
John Porto7e93c622015-06-23 10:58:57 -07004750 }
John Porto5d0acff2015-06-30 15:29:21 -07004751 _cmov(T_Dest, T, Traits::Cond::Br_ne);
John Porto7e93c622015-06-23 10:58:57 -07004752 if (!Cttz) {
John Porto34d276a2016-01-27 06:31:53 -08004753 if (DestTy == IceType_i64) {
4754 // Even though there's a _63 available at this point, that constant might
4755 // not be an i32, which will cause the xor emission to fail.
4756 Constant *_63 = Ctx->getConstantInt32(63);
4757 _xor(T_Dest, _63);
4758 } else {
4759 _xor(T_Dest, _31);
4760 }
John Porto7e93c622015-06-23 10:58:57 -07004761 }
John Porto1d235422015-08-12 12:37:53 -07004762 if (Traits::Is64Bit || Ty == IceType_i32) {
John Porto7e93c622015-06-23 10:58:57 -07004763 _mov(Dest, T_Dest);
4764 return;
4765 }
John Porto3c275ce2015-12-22 08:14:00 -08004766 _add(T_Dest, _32);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08004767 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4768 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
John Porto7e93c622015-06-23 10:58:57 -07004769 // Will be using "test" on this, so we need a registerized variable.
Andrew Scull97f460d2015-07-21 10:07:42 -07004770 Variable *SecondVar = legalizeToReg(SecondVal);
John Porto7e93c622015-06-23 10:58:57 -07004771 Variable *T_Dest2 = makeReg(IceType_i32);
4772 if (Cttz) {
4773 _bsf(T_Dest2, SecondVar);
4774 } else {
4775 _bsr(T_Dest2, SecondVar);
John Porto3c275ce2015-12-22 08:14:00 -08004776 _xor(T_Dest2, _31);
John Porto7e93c622015-06-23 10:58:57 -07004777 }
4778 _test(SecondVar, SecondVar);
John Porto5d0acff2015-06-30 15:29:21 -07004779 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
John Porto7e93c622015-06-23 10:58:57 -07004780 _mov(DestLo, T_Dest2);
4781 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4782}
4783
John Porto4a566862016-01-04 09:33:41 -08004784template <typename TraitsType>
4785void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,
4786 Variable *Base, Constant *Offset) {
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08004787 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4788 // legalize Mem properly.
4789 if (Offset)
4790 assert(!llvm::isa<ConstantRelocatable>(Offset));
4791
John Porto4a566862016-01-04 09:33:41 -08004792 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004793
4794 if (isVectorType(Ty))
4795 _movp(Dest, Mem);
4796 else if (Ty == IceType_f64)
4797 _movq(Dest, Mem);
4798 else
4799 _mov(Dest, Mem);
4800}
4801
John Porto4a566862016-01-04 09:33:41 -08004802template <typename TraitsType>
4803void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value,
4804 Variable *Base, Constant *Offset) {
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08004805 // If Offset is a ConstantRelocatable in Non-SFI mode, we will need to
4806 // legalize Mem properly.
4807 if (Offset)
4808 assert(!llvm::isa<ConstantRelocatable>(Offset));
4809
John Porto4a566862016-01-04 09:33:41 -08004810 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004811
4812 if (isVectorType(Ty))
4813 _storep(Value, Mem);
4814 else if (Ty == IceType_f64)
4815 _storeq(Value, Mem);
4816 else
4817 _store(Value, Mem);
4818}
4819
John Porto4a566862016-01-04 09:33:41 -08004820template <typename TraitsType>
4821void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest,
4822 Variable *Src, int32_t OffsetAmt) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004823 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
4824 // TODO(ascull): this or add nullptr test to _movp, _movq
4825 Variable *Data = makeReg(Ty);
4826
4827 typedLoad(Ty, Data, Src, Offset);
4828 typedStore(Ty, Data, Dest, Offset);
4829}
4830
John Porto4a566862016-01-04 09:33:41 -08004831template <typename TraitsType>
4832void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src,
4833 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004834 // There is a load and store for each chunk in the unroll
Andrew Scull9df4a372015-08-07 09:19:35 -07004835 constexpr uint32_t BytesPerStorep = 16;
Andrew Scull9df4a372015-08-07 09:19:35 -07004836
4837 // Check if the operands are constants
4838 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4839 const bool IsCountConst = CountConst != nullptr;
4840 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4841
Andrew Scullcfa628b2015-08-20 14:23:05 -07004842 if (shouldOptimizeMemIntrins() && IsCountConst &&
4843 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004844 // Unlikely, but nothing to do if it does happen
4845 if (CountValue == 0)
4846 return;
4847
4848 Variable *SrcBase = legalizeToReg(Src);
4849 Variable *DestBase = legalizeToReg(Dest);
4850
Andrew Scullcfa628b2015-08-20 14:23:05 -07004851 // Find the largest type that can be used and use it as much as possible in
4852 // reverse order. Then handle any remainder with overlapping copies. Since
4853 // the remainder will be at the end, there will be reduced pressure on the
4854 // memory unit as the accesses to the same memory are far apart.
4855 Type Ty = largestTypeInSize(CountValue);
4856 uint32_t TyWidth = typeWidthInBytes(Ty);
Andrew Scull9df4a372015-08-07 09:19:35 -07004857
Andrew Scullcfa628b2015-08-20 14:23:05 -07004858 uint32_t RemainingBytes = CountValue;
4859 int32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
4860 while (RemainingBytes >= TyWidth) {
4861 copyMemory(Ty, DestBase, SrcBase, Offset);
4862 RemainingBytes -= TyWidth;
4863 Offset -= TyWidth;
Andrew Scull9df4a372015-08-07 09:19:35 -07004864 }
4865
Andrew Scullcfa628b2015-08-20 14:23:05 -07004866 if (RemainingBytes == 0)
Andrew Scull9df4a372015-08-07 09:19:35 -07004867 return;
Andrew Scull9df4a372015-08-07 09:19:35 -07004868
Andrew Scullcfa628b2015-08-20 14:23:05 -07004869 // Lower the remaining bytes. Adjust to larger types in order to make use
4870 // of overlaps in the copies.
4871 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
4872 Offset = CountValue - typeWidthInBytes(LeftOverTy);
4873 copyMemory(LeftOverTy, DestBase, SrcBase, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07004874 return;
4875 }
4876
4877 // Fall back on a function call
Karl Schimpf20070e82016-03-17 13:30:13 -07004878 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memcpy, nullptr, 3);
Andrew Scull9df4a372015-08-07 09:19:35 -07004879 Call->addArg(Dest);
4880 Call->addArg(Src);
4881 Call->addArg(Count);
4882 lowerCall(Call);
4883}
4884
John Porto4a566862016-01-04 09:33:41 -08004885template <typename TraitsType>
4886void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src,
4887 Operand *Count) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07004888 // There is a load and store for each chunk in the unroll
4889 constexpr uint32_t BytesPerStorep = 16;
4890
4891 // Check if the operands are constants
4892 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4893 const bool IsCountConst = CountConst != nullptr;
4894 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4895
4896 if (shouldOptimizeMemIntrins() && IsCountConst &&
4897 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) {
4898 // Unlikely, but nothing to do if it does happen
4899 if (CountValue == 0)
4900 return;
4901
4902 Variable *SrcBase = legalizeToReg(Src);
4903 Variable *DestBase = legalizeToReg(Dest);
4904
4905 std::tuple<Type, Constant *, Variable *>
4906 Moves[Traits::MEMMOVE_UNROLL_LIMIT];
4907 Constant *Offset;
4908 Variable *Reg;
4909
4910 // Copy the data into registers as the source and destination could overlap
Andrew Scull57e12682015-09-16 11:30:19 -07004911 // so make sure not to clobber the memory. This also means overlapping
4912 // moves can be used as we are taking a safe snapshot of the memory.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004913 Type Ty = largestTypeInSize(CountValue);
4914 uint32_t TyWidth = typeWidthInBytes(Ty);
4915
4916 uint32_t RemainingBytes = CountValue;
4917 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;
4918 size_t N = 0;
4919 while (RemainingBytes >= TyWidth) {
4920 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4921 Offset = Ctx->getConstantInt32(OffsetAmt);
4922 Reg = makeReg(Ty);
4923 typedLoad(Ty, Reg, SrcBase, Offset);
4924 RemainingBytes -= TyWidth;
4925 OffsetAmt -= TyWidth;
4926 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4927 }
4928
4929 if (RemainingBytes != 0) {
4930 // Lower the remaining bytes. Adjust to larger types in order to make use
4931 // of overlaps in the copies.
4932 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
4933 Ty = firstTypeThatFitsSize(RemainingBytes);
4934 Offset = Ctx->getConstantInt32(CountValue - typeWidthInBytes(Ty));
4935 Reg = makeReg(Ty);
4936 typedLoad(Ty, Reg, SrcBase, Offset);
4937 Moves[N++] = std::make_tuple(Ty, Offset, Reg);
4938 }
4939
4940 // Copy the data out into the destination memory
4941 for (size_t i = 0; i < N; ++i) {
4942 std::tie(Ty, Offset, Reg) = Moves[i];
4943 typedStore(Ty, Reg, DestBase, Offset);
4944 }
4945
4946 return;
4947 }
4948
4949 // Fall back on a function call
Karl Schimpf20070e82016-03-17 13:30:13 -07004950 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memmove, nullptr, 3);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004951 Call->addArg(Dest);
4952 Call->addArg(Src);
4953 Call->addArg(Count);
4954 lowerCall(Call);
4955}
4956
John Porto4a566862016-01-04 09:33:41 -08004957template <typename TraitsType>
4958void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val,
4959 Operand *Count) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004960 constexpr uint32_t BytesPerStorep = 16;
4961 constexpr uint32_t BytesPerStoreq = 8;
4962 constexpr uint32_t BytesPerStorei32 = 4;
Andrew Scull713dbde2015-08-04 14:25:27 -07004963 assert(Val->getType() == IceType_i8);
4964
4965 // Check if the operands are constants
4966 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4967 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
4968 const bool IsCountConst = CountConst != nullptr;
4969 const bool IsValConst = ValConst != nullptr;
4970 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4971 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
4972
4973 // Unlikely, but nothing to do if it does happen
4974 if (IsCountConst && CountValue == 0)
4975 return;
4976
4977 // TODO(ascull): if the count is constant but val is not it would be possible
4978 // to inline by spreading the value across 4 bytes and accessing subregs e.g.
4979 // eax, ax and al.
Andrew Scullcfa628b2015-08-20 14:23:05 -07004980 if (shouldOptimizeMemIntrins() && IsCountConst && IsValConst) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004981 Variable *Base = nullptr;
Andrew Scullcfa628b2015-08-20 14:23:05 -07004982 Variable *VecReg = nullptr;
Andrew Scull9df4a372015-08-07 09:19:35 -07004983 const uint32_t SpreadValue =
4984 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
Andrew Scull713dbde2015-08-04 14:25:27 -07004985
Andrew Scull9df4a372015-08-07 09:19:35 -07004986 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
Jim Stichnoth992f91d2015-08-10 11:18:38 -07004987 uint32_t OffsetAmt) {
Andrew Scull9df4a372015-08-07 09:19:35 -07004988 assert(Base != nullptr);
4989 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
Andrew Scull713dbde2015-08-04 14:25:27 -07004990
Andrew Scull9df4a372015-08-07 09:19:35 -07004991 // TODO(ascull): is 64-bit better with vector or scalar movq?
John Porto4a566862016-01-04 09:33:41 -08004992 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
Andrew Scull9df4a372015-08-07 09:19:35 -07004993 if (isVectorType(Ty)) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004994 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004995 _storep(VecReg, Mem);
Andrew Scullcfa628b2015-08-20 14:23:05 -07004996 } else if (Ty == IceType_f64) {
Andrew Scull713dbde2015-08-04 14:25:27 -07004997 assert(VecReg != nullptr);
Andrew Scull713dbde2015-08-04 14:25:27 -07004998 _storeq(VecReg, Mem);
Andrew Scull9df4a372015-08-07 09:19:35 -07004999 } else {
John Porto3c275ce2015-12-22 08:14:00 -08005000 assert(Ty != IceType_i64);
Andrew Scull9df4a372015-08-07 09:19:35 -07005001 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
Andrew Scull713dbde2015-08-04 14:25:27 -07005002 }
5003 };
5004
Andrew Scullcfa628b2015-08-20 14:23:05 -07005005 // Find the largest type that can be used and use it as much as possible in
5006 // reverse order. Then handle any remainder with overlapping copies. Since
5007 // the remainder will be at the end, there will be reduces pressure on the
5008 // memory unit as the access to the same memory are far apart.
5009 Type Ty;
Andrew Scull9df4a372015-08-07 09:19:35 -07005010 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
Andrew Scullcfa628b2015-08-20 14:23:05 -07005011 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
5012 // When the value is zero it can be loaded into a vector register cheaply
5013 // using the xor trick.
Andrew Scull9df4a372015-08-07 09:19:35 -07005014 Base = legalizeToReg(Dest);
5015 VecReg = makeVectorOfZeros(IceType_v16i8);
Andrew Scullcfa628b2015-08-20 14:23:05 -07005016 Ty = largestTypeInSize(CountValue);
5017 } else if (CountValue <= BytesPerStorei32 * Traits::MEMCPY_UNROLL_LIMIT) {
5018 // When the value is non-zero or the count is small we can't use vector
5019 // instructions so are limited to 32-bit stores.
5020 Base = legalizeToReg(Dest);
5021 constexpr uint32_t MaxSize = 4;
5022 Ty = largestTypeInSize(CountValue, MaxSize);
Andrew Scull713dbde2015-08-04 14:25:27 -07005023 }
5024
Andrew Scullcfa628b2015-08-20 14:23:05 -07005025 if (Base) {
5026 uint32_t TyWidth = typeWidthInBytes(Ty);
5027
5028 uint32_t RemainingBytes = CountValue;
5029 uint32_t Offset = (CountValue & ~(TyWidth - 1)) - TyWidth;
5030 while (RemainingBytes >= TyWidth) {
5031 lowerSet(Ty, Offset);
5032 RemainingBytes -= TyWidth;
5033 Offset -= TyWidth;
Andrew Scull713dbde2015-08-04 14:25:27 -07005034 }
Andrew Scull9df4a372015-08-07 09:19:35 -07005035
Andrew Scullcfa628b2015-08-20 14:23:05 -07005036 if (RemainingBytes == 0)
5037 return;
5038
5039 // Lower the remaining bytes. Adjust to larger types in order to make use
5040 // of overlaps in the copies.
5041 Type LeftOverTy = firstTypeThatFitsSize(RemainingBytes);
5042 Offset = CountValue - typeWidthInBytes(LeftOverTy);
5043 lowerSet(LeftOverTy, Offset);
Andrew Scull713dbde2015-08-04 14:25:27 -07005044 return;
5045 }
5046 }
5047
5048 // Fall back on calling the memset function. The value operand needs to be
5049 // extended to a stack slot size because the PNaCl ABI requires arguments to
5050 // be at least 32 bits wide.
5051 Operand *ValExt;
5052 if (IsValConst) {
5053 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
5054 } else {
5055 Variable *ValExtVar = Func->makeVariable(stackSlotType());
5056 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
5057 ValExt = ValExtVar;
5058 }
Karl Schimpf20070e82016-03-17 13:30:13 -07005059 InstCall *Call = makeHelperCall(RuntimeHelper::H_call_memset, nullptr, 3);
Andrew Scull713dbde2015-08-04 14:25:27 -07005060 Call->addArg(Dest);
5061 Call->addArg(ValExt);
5062 Call->addArg(Count);
5063 lowerCall(Call);
5064}
5065
John Portoac2388c2016-01-22 07:10:56 -08005066class AddressOptimizer {
5067 AddressOptimizer() = delete;
5068 AddressOptimizer(const AddressOptimizer &) = delete;
5069 AddressOptimizer &operator=(const AddressOptimizer &) = delete;
John Porto7e93c622015-06-23 10:58:57 -07005070
John Portoac2388c2016-01-22 07:10:56 -08005071public:
5072 explicit AddressOptimizer(const Cfg *Func)
5073 : Func(Func), VMetadata(Func->getVMetadata()) {}
5074
5075 inline void dumpAddressOpt(const ConstantRelocatable *const Relocatable,
5076 int32_t Offset, const Variable *Base,
5077 const Variable *Index, uint16_t Shift,
5078 const Inst *Reason) const;
5079
5080 inline const Inst *matchAssign(Variable **Var,
5081 ConstantRelocatable **Relocatable,
5082 int32_t *Offset);
5083
5084 inline const Inst *matchCombinedBaseIndex(Variable **Base, Variable **Index,
5085 uint16_t *Shift);
5086
5087 inline const Inst *matchShiftedIndex(Variable **Index, uint16_t *Shift);
5088
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005089 inline const Inst *matchOffsetIndexOrBase(Variable **IndexOrBase,
5090 const uint16_t Shift,
5091 ConstantRelocatable **Relocatable,
5092 int32_t *Offset);
John Portoac2388c2016-01-22 07:10:56 -08005093
5094private:
5095 const Cfg *const Func;
5096 const VariablesMetadata *const VMetadata;
5097
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005098 static bool isAdd(const Inst *Instr) {
5099 if (auto *Arith = llvm::dyn_cast_or_null<const InstArithmetic>(Instr)) {
John Portoac2388c2016-01-22 07:10:56 -08005100 return (Arith->getOp() == InstArithmetic::Add);
5101 }
5102 return false;
5103 }
5104};
5105
5106void AddressOptimizer::dumpAddressOpt(
5107 const ConstantRelocatable *const Relocatable, int32_t Offset,
5108 const Variable *Base, const Variable *Index, uint16_t Shift,
5109 const Inst *Reason) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07005110 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07005111 return;
5112 if (!Func->isVerbose(IceV_AddrOpt))
5113 return;
5114 OstreamLocker L(Func->getContext());
5115 Ostream &Str = Func->getContext()->getStrDump();
5116 Str << "Instruction: ";
5117 Reason->dumpDecorated(Func);
5118 Str << " results in Base=";
5119 if (Base)
5120 Base->dump(Func);
5121 else
5122 Str << "<null>";
5123 Str << ", Index=";
5124 if (Index)
5125 Index->dump(Func);
5126 else
5127 Str << "<null>";
David Sehraa0b1a12015-10-27 16:55:40 -07005128 Str << ", Shift=" << Shift << ", Offset=" << Offset
5129 << ", Relocatable=" << Relocatable << "\n";
John Porto7e93c622015-06-23 10:58:57 -07005130}
5131
John Portoac2388c2016-01-22 07:10:56 -08005132const Inst *AddressOptimizer::matchAssign(Variable **Var,
5133 ConstantRelocatable **Relocatable,
5134 int32_t *Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07005135 // Var originates from Var=SrcVar ==> set Var:=SrcVar
John Portoac2388c2016-01-22 07:10:56 -08005136 if (*Var == nullptr)
5137 return nullptr;
5138 if (const Inst *VarAssign = VMetadata->getSingleDefinition(*Var)) {
5139 assert(!VMetadata->isMultiDef(*Var));
John Porto7e93c622015-06-23 10:58:57 -07005140 if (llvm::isa<InstAssign>(VarAssign)) {
5141 Operand *SrcOp = VarAssign->getSrc(0);
5142 assert(SrcOp);
David Sehraa0b1a12015-10-27 16:55:40 -07005143 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
John Porto7e93c622015-06-23 10:58:57 -07005144 if (!VMetadata->isMultiDef(SrcVar) &&
5145 // TODO: ensure SrcVar stays single-BB
5146 true) {
John Portoac2388c2016-01-22 07:10:56 -08005147 *Var = SrcVar;
5148 return VarAssign;
John Porto7e93c622015-06-23 10:58:57 -07005149 }
David Sehraa0b1a12015-10-27 16:55:40 -07005150 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5151 int32_t MoreOffset = Const->getValue();
John Portoac2388c2016-01-22 07:10:56 -08005152 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5153 return nullptr;
5154 *Var = nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005155 Offset += MoreOffset;
John Portoac2388c2016-01-22 07:10:56 -08005156 return VarAssign;
David Sehraa0b1a12015-10-27 16:55:40 -07005157 } else if (auto *AddReloc = llvm::dyn_cast<ConstantRelocatable>(SrcOp)) {
John Portoac2388c2016-01-22 07:10:56 -08005158 if (*Relocatable == nullptr) {
5159 // It is always safe to fold a relocatable through assignment -- the
5160 // assignment frees a slot in the address operand that can be used to
5161 // hold the Sandbox Pointer -- if any.
5162 *Var = nullptr;
5163 *Relocatable = AddReloc;
5164 return VarAssign;
David Sehraa0b1a12015-10-27 16:55:40 -07005165 }
John Porto7e93c622015-06-23 10:58:57 -07005166 }
5167 }
5168 }
John Portoac2388c2016-01-22 07:10:56 -08005169 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005170}
5171
John Portoac2388c2016-01-22 07:10:56 -08005172const Inst *AddressOptimizer::matchCombinedBaseIndex(Variable **Base,
5173 Variable **Index,
5174 uint16_t *Shift) {
John Porto7e93c622015-06-23 10:58:57 -07005175 // Index==nullptr && Base is Base=Var1+Var2 ==>
5176 // set Base=Var1, Index=Var2, Shift=0
John Portoac2388c2016-01-22 07:10:56 -08005177 if (*Base == nullptr)
5178 return nullptr;
5179 if (*Index != nullptr)
5180 return nullptr;
5181 auto *BaseInst = VMetadata->getSingleDefinition(*Base);
John Porto7e93c622015-06-23 10:58:57 -07005182 if (BaseInst == nullptr)
John Portoac2388c2016-01-22 07:10:56 -08005183 return nullptr;
5184 assert(!VMetadata->isMultiDef(*Base));
John Porto7e93c622015-06-23 10:58:57 -07005185 if (BaseInst->getSrcSize() < 2)
John Portoac2388c2016-01-22 07:10:56 -08005186 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005187 if (auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
John Porto7e93c622015-06-23 10:58:57 -07005188 if (VMetadata->isMultiDef(Var1))
John Portoac2388c2016-01-22 07:10:56 -08005189 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005190 if (auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
John Porto7e93c622015-06-23 10:58:57 -07005191 if (VMetadata->isMultiDef(Var2))
John Portoac2388c2016-01-22 07:10:56 -08005192 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005193 if (isAdd(BaseInst) &&
5194 // TODO: ensure Var1 and Var2 stay single-BB
5195 true) {
John Portoac2388c2016-01-22 07:10:56 -08005196 *Base = Var1;
5197 *Index = Var2;
5198 *Shift = 0; // should already have been 0
5199 return BaseInst;
John Porto7e93c622015-06-23 10:58:57 -07005200 }
5201 }
5202 }
John Portoac2388c2016-01-22 07:10:56 -08005203 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005204}
5205
John Portoac2388c2016-01-22 07:10:56 -08005206const Inst *AddressOptimizer::matchShiftedIndex(Variable **Index,
5207 uint16_t *Shift) {
John Porto7e93c622015-06-23 10:58:57 -07005208 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
5209 // Index=Var, Shift+=log2(Const)
John Portoac2388c2016-01-22 07:10:56 -08005210 if (*Index == nullptr)
5211 return nullptr;
5212 auto *IndexInst = VMetadata->getSingleDefinition(*Index);
John Porto7e93c622015-06-23 10:58:57 -07005213 if (IndexInst == nullptr)
John Portoac2388c2016-01-22 07:10:56 -08005214 return nullptr;
5215 assert(!VMetadata->isMultiDef(*Index));
John Porto7e93c622015-06-23 10:58:57 -07005216 if (IndexInst->getSrcSize() < 2)
John Portoac2388c2016-01-22 07:10:56 -08005217 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005218 if (auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst)) {
5219 if (auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
5220 if (auto *Const =
John Porto7e93c622015-06-23 10:58:57 -07005221 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
David Sehraa0b1a12015-10-27 16:55:40 -07005222 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
John Portoac2388c2016-01-22 07:10:56 -08005223 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005224 switch (ArithInst->getOp()) {
5225 default:
John Portoac2388c2016-01-22 07:10:56 -08005226 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005227 case InstArithmetic::Mul: {
5228 uint32_t Mult = Const->getValue();
John Porto7e93c622015-06-23 10:58:57 -07005229 uint32_t LogMult;
5230 switch (Mult) {
5231 case 1:
5232 LogMult = 0;
5233 break;
5234 case 2:
5235 LogMult = 1;
5236 break;
5237 case 4:
5238 LogMult = 2;
5239 break;
5240 case 8:
5241 LogMult = 3;
5242 break;
5243 default:
John Portoac2388c2016-01-22 07:10:56 -08005244 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005245 }
John Portoac2388c2016-01-22 07:10:56 -08005246 if (*Shift + LogMult <= 3) {
5247 *Index = Var;
5248 *Shift += LogMult;
5249 return IndexInst;
John Porto7e93c622015-06-23 10:58:57 -07005250 }
5251 }
David Sehraa0b1a12015-10-27 16:55:40 -07005252 case InstArithmetic::Shl: {
5253 uint32_t ShiftAmount = Const->getValue();
5254 switch (ShiftAmount) {
5255 case 0:
5256 case 1:
5257 case 2:
5258 case 3:
5259 break;
5260 default:
John Portoac2388c2016-01-22 07:10:56 -08005261 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005262 }
John Portoac2388c2016-01-22 07:10:56 -08005263 if (*Shift + ShiftAmount <= 3) {
5264 *Index = Var;
5265 *Shift += ShiftAmount;
5266 return IndexInst;
David Sehraa0b1a12015-10-27 16:55:40 -07005267 }
5268 }
5269 }
John Porto7e93c622015-06-23 10:58:57 -07005270 }
5271 }
5272 }
John Portoac2388c2016-01-22 07:10:56 -08005273 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005274}
5275
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005276const Inst *AddressOptimizer::matchOffsetIndexOrBase(
5277 Variable **IndexOrBase, const uint16_t Shift,
5278 ConstantRelocatable **Relocatable, int32_t *Offset) {
John Porto7e93c622015-06-23 10:58:57 -07005279 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5280 // set Base=Var, Offset+=Const
5281 // Base is Base=Var-Const ==>
5282 // set Base=Var, Offset-=Const
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005283 // Index is Index=Var+Const ==>
5284 // set Index=Var, Offset+=(Const<<Shift)
5285 // Index is Index=Const+Var ==>
5286 // set Index=Var, Offset+=(Const<<Shift)
5287 // Index is Index=Var-Const ==>
5288 // set Index=Var, Offset-=(Const<<Shift)
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005289 // Treat Index=Var Or Const as Index=Var + Const
5290 // when Var = Var' << N and log2(Const) <= N
5291 // or when Var = (2^M) * (2^N) and log2(Const) <= (M+N)
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005292
5293 if (*IndexOrBase == nullptr) {
John Portoac2388c2016-01-22 07:10:56 -08005294 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005295 }
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005296 const Inst *Definition = VMetadata->getSingleDefinition(*IndexOrBase);
5297 if (Definition == nullptr) {
John Portoac2388c2016-01-22 07:10:56 -08005298 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005299 }
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005300 assert(!VMetadata->isMultiDef(*IndexOrBase));
5301 if (auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(Definition)) {
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005302 switch (ArithInst->getOp()) {
5303 case InstArithmetic::Add:
5304 case InstArithmetic::Sub:
5305 case InstArithmetic::Or:
5306 break;
5307 default:
John Portoac2388c2016-01-22 07:10:56 -08005308 return nullptr;
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005309 }
5310
David Sehraa0b1a12015-10-27 16:55:40 -07005311 Operand *Src0 = ArithInst->getSrc(0);
5312 Operand *Src1 = ArithInst->getSrc(1);
5313 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5314 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5315 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5316 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5317 auto *Reloc0 = llvm::dyn_cast<ConstantRelocatable>(Src0);
5318 auto *Reloc1 = llvm::dyn_cast<ConstantRelocatable>(Src1);
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005319
5320 bool IsAdd = false;
5321 if (ArithInst->getOp() == InstArithmetic::Or) {
5322 Variable *Var = nullptr;
5323 ConstantInteger32 *Const = nullptr;
5324 if (Var0 && Const1) {
5325 Var = Var0;
5326 Const = Const1;
5327 } else if (Const0 && Var1) {
5328 Var = Var1;
5329 Const = Const0;
5330 } else {
5331 return nullptr;
5332 }
5333 auto *VarDef =
5334 llvm::dyn_cast<InstArithmetic>(VMetadata->getSingleDefinition(Var));
5335 if (VarDef == nullptr)
5336 return nullptr;
5337
5338 SizeT ZeroesAvailable = 0;
5339 if (VarDef->getOp() == InstArithmetic::Shl) {
5340 if (auto *ConstInt =
5341 llvm::dyn_cast<ConstantInteger32>(VarDef->getSrc(1))) {
5342 ZeroesAvailable = ConstInt->getValue();
5343 }
5344 } else if (VarDef->getOp() == InstArithmetic::Mul) {
5345 SizeT PowerOfTwo = 0;
Jim Stichnothd615c862016-06-27 16:11:03 -07005346 if (auto *MultConst =
5347 llvm::dyn_cast<ConstantInteger32>(VarDef->getSrc(0))) {
5348 if (llvm::isPowerOf2_32(MultConst->getValue())) {
5349 PowerOfTwo += MultConst->getValue();
5350 }
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005351 }
Jim Stichnothd615c862016-06-27 16:11:03 -07005352 if (auto *MultConst =
5353 llvm::dyn_cast<ConstantInteger32>(VarDef->getSrc(1))) {
5354 if (llvm::isPowerOf2_32(MultConst->getValue())) {
5355 PowerOfTwo += MultConst->getValue();
5356 }
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005357 }
5358 ZeroesAvailable = llvm::Log2_32(PowerOfTwo) + 1;
5359 }
5360 SizeT ZeroesNeeded = llvm::Log2_32(Const->getValue()) + 1;
5361 if (ZeroesNeeded == 0 || ZeroesNeeded > ZeroesAvailable)
5362 return nullptr;
5363 IsAdd = true; // treat it as an add if the above conditions hold
5364 } else {
5365 IsAdd = ArithInst->getOp() == InstArithmetic::Add;
5366 }
5367
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005368 Variable *NewIndexOrBase = nullptr;
5369 int32_t NewOffset = 0;
John Portoac2388c2016-01-22 07:10:56 -08005370 ConstantRelocatable *NewRelocatable = *Relocatable;
David Sehraa0b1a12015-10-27 16:55:40 -07005371 if (Var0 && Var1)
5372 // TODO(sehr): merge base/index splitting into here.
John Portoac2388c2016-01-22 07:10:56 -08005373 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005374 if (!IsAdd && Var1)
John Portoac2388c2016-01-22 07:10:56 -08005375 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005376 if (Var0)
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005377 NewIndexOrBase = Var0;
David Sehraa0b1a12015-10-27 16:55:40 -07005378 else if (Var1)
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005379 NewIndexOrBase = Var1;
David Sehraa0b1a12015-10-27 16:55:40 -07005380 // Don't know how to add/subtract two relocatables.
John Portoac2388c2016-01-22 07:10:56 -08005381 if ((*Relocatable && (Reloc0 || Reloc1)) || (Reloc0 && Reloc1))
5382 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005383 // Don't know how to subtract a relocatable.
5384 if (!IsAdd && Reloc1)
John Portoac2388c2016-01-22 07:10:56 -08005385 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005386 // Incorporate ConstantRelocatables.
5387 if (Reloc0)
5388 NewRelocatable = Reloc0;
5389 else if (Reloc1)
5390 NewRelocatable = Reloc1;
5391 // Compute the updated constant offset.
5392 if (Const0) {
John Porto56958cb2016-01-14 09:18:18 -08005393 const int32_t MoreOffset =
5394 IsAdd ? Const0->getValue() : -Const0->getValue();
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005395 if (Utils::WouldOverflowAdd(*Offset + NewOffset, MoreOffset))
John Portoac2388c2016-01-22 07:10:56 -08005396 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005397 NewOffset += MoreOffset;
John Porto7e93c622015-06-23 10:58:57 -07005398 }
David Sehraa0b1a12015-10-27 16:55:40 -07005399 if (Const1) {
John Porto56958cb2016-01-14 09:18:18 -08005400 const int32_t MoreOffset =
5401 IsAdd ? Const1->getValue() : -Const1->getValue();
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005402 if (Utils::WouldOverflowAdd(*Offset + NewOffset, MoreOffset))
John Portoac2388c2016-01-22 07:10:56 -08005403 return nullptr;
David Sehraa0b1a12015-10-27 16:55:40 -07005404 NewOffset += MoreOffset;
5405 }
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005406 if (Utils::WouldOverflowAdd(*Offset, NewOffset << Shift))
Manasij Mukherjee9dd397d2016-06-27 13:47:38 -07005407 return nullptr;
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005408 *IndexOrBase = NewIndexOrBase;
5409 *Offset += (NewOffset << Shift);
5410 // Shift is always zero if this is called with the base
John Portoac2388c2016-01-22 07:10:56 -08005411 *Relocatable = NewRelocatable;
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005412 return Definition;
John Porto7e93c622015-06-23 10:58:57 -07005413 }
John Portoac2388c2016-01-22 07:10:56 -08005414 return nullptr;
John Porto7e93c622015-06-23 10:58:57 -07005415}
5416
John Portoac2388c2016-01-22 07:10:56 -08005417template <typename TypeTraits>
5418typename TargetX86Base<TypeTraits>::X86OperandMem *
5419TargetX86Base<TypeTraits>::computeAddressOpt(const Inst *Instr, Type MemType,
5420 Operand *Addr) {
John Porto7e93c622015-06-23 10:58:57 -07005421 Func->resetCurrentNode();
5422 if (Func->isVerbose(IceV_AddrOpt)) {
5423 OstreamLocker L(Func->getContext());
5424 Ostream &Str = Func->getContext()->getStrDump();
5425 Str << "\nStarting computeAddressOpt for instruction:\n ";
5426 Instr->dumpDecorated(Func);
5427 }
John Portoac2388c2016-01-22 07:10:56 -08005428
5429 OptAddr NewAddr;
5430 NewAddr.Base = llvm::dyn_cast<Variable>(Addr);
5431 if (NewAddr.Base == nullptr)
5432 return nullptr;
5433
Andrew Scull57e12682015-09-16 11:30:19 -07005434 // If the Base has more than one use or is live across multiple blocks, then
5435 // don't go further. Alternatively (?), never consider a transformation that
5436 // would change a variable that is currently *not* live across basic block
5437 // boundaries into one that *is*.
Manasij Mukherjeef47d5202016-07-12 16:59:17 -07005438 if (!getFlags().getLoopInvariantCodeMotion()) {
5439 // Need multi block address opt when licm is enabled.
5440 // Might make sense to restrict to current node and loop header.
5441 if (Func->getVMetadata()->isMultiBlock(
5442 NewAddr.Base) /* || Base->getUseCount() > 1*/)
5443 return nullptr;
5444 }
John Portoac2388c2016-01-22 07:10:56 -08005445 AddressOptimizer AddrOpt(Func);
Karl Schimpfd4699942016-04-02 09:55:31 -07005446 const bool MockBounds = getFlags().getMockBoundsCheck();
David Sehraa0b1a12015-10-27 16:55:40 -07005447 const Inst *Reason = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005448 bool AddressWasOptimized = false;
5449 // The following unnamed struct identifies the address mode formation steps
5450 // that could potentially create an invalid memory operand (i.e., no free
5451 // slots for RebasePtr.) We add all those variables to this struct so that we
5452 // can use memset() to reset all members to false.
5453 struct {
5454 bool AssignBase = false;
5455 bool AssignIndex = false;
5456 bool OffsetFromBase = false;
5457 bool OffsetFromIndex = false;
5458 bool CombinedBaseIndex = false;
5459 } Skip;
5460 // This points to the boolean in Skip that represents the last folding
5461 // performed. This is used to disable a pattern match that generated an
5462 // invalid address. Without this, the algorithm would never finish.
5463 bool *SkipLastFolding = nullptr;
5464 // NewAddrCheckpoint is used to rollback the address being formed in case an
5465 // invalid address is formed.
5466 OptAddr NewAddrCheckpoint;
5467 Reason = Instr;
David Sehraa0b1a12015-10-27 16:55:40 -07005468 do {
John Portoac2388c2016-01-22 07:10:56 -08005469 if (SandboxingType != ST_None) {
5470 // When sandboxing, we defer the sandboxing of NewAddr to the Concrete
5471 // Target. If our optimization was overly aggressive, then we simply undo
5472 // what the previous iteration did, and set the previous pattern's skip
5473 // bit to true.
5474 if (!legalizeOptAddrForSandbox(&NewAddr)) {
5475 *SkipLastFolding = true;
5476 SkipLastFolding = nullptr;
5477 NewAddr = NewAddrCheckpoint;
5478 Reason = nullptr;
5479 }
5480 }
5481
David Sehraa0b1a12015-10-27 16:55:40 -07005482 if (Reason) {
John Portoac2388c2016-01-22 07:10:56 -08005483 AddrOpt.dumpAddressOpt(NewAddr.Relocatable, NewAddr.Offset, NewAddr.Base,
5484 NewAddr.Index, NewAddr.Shift, Reason);
David Sehraa0b1a12015-10-27 16:55:40 -07005485 AddressWasOptimized = true;
5486 Reason = nullptr;
John Portoac2388c2016-01-22 07:10:56 -08005487 SkipLastFolding = nullptr;
5488 memset(&Skip, 0, sizeof(Skip));
John Porto7e93c622015-06-23 10:58:57 -07005489 }
John Portoac2388c2016-01-22 07:10:56 -08005490
5491 NewAddrCheckpoint = NewAddr;
5492
David Sehraa0b1a12015-10-27 16:55:40 -07005493 // Update Base and Index to follow through assignments to definitions.
John Portoac2388c2016-01-22 07:10:56 -08005494 if (!Skip.AssignBase &&
5495 (Reason = AddrOpt.matchAssign(&NewAddr.Base, &NewAddr.Relocatable,
5496 &NewAddr.Offset))) {
5497 SkipLastFolding = &Skip.AssignBase;
David Sehraa0b1a12015-10-27 16:55:40 -07005498 // Assignments of Base from a Relocatable or ConstantInt32 can result
5499 // in Base becoming nullptr. To avoid code duplication in this loop we
5500 // prefer that Base be non-nullptr if possible.
John Portoac2388c2016-01-22 07:10:56 -08005501 if ((NewAddr.Base == nullptr) && (NewAddr.Index != nullptr) &&
5502 NewAddr.Shift == 0) {
5503 std::swap(NewAddr.Base, NewAddr.Index);
5504 }
David Sehraa0b1a12015-10-27 16:55:40 -07005505 continue;
5506 }
John Portoac2388c2016-01-22 07:10:56 -08005507 if (!Skip.AssignBase &&
5508 (Reason = AddrOpt.matchAssign(&NewAddr.Index, &NewAddr.Relocatable,
5509 &NewAddr.Offset))) {
5510 SkipLastFolding = &Skip.AssignIndex;
David Sehraa0b1a12015-10-27 16:55:40 -07005511 continue;
John Portoac2388c2016-01-22 07:10:56 -08005512 }
John Porto7e93c622015-06-23 10:58:57 -07005513
David Sehraa0b1a12015-10-27 16:55:40 -07005514 if (!MockBounds) {
5515 // Transition from:
5516 // <Relocatable + Offset>(Base) to
5517 // <Relocatable + Offset>(Base, Index)
John Portoac2388c2016-01-22 07:10:56 -08005518 if (!Skip.CombinedBaseIndex &&
5519 (Reason = AddrOpt.matchCombinedBaseIndex(
5520 &NewAddr.Base, &NewAddr.Index, &NewAddr.Shift))) {
5521 SkipLastFolding = &Skip.CombinedBaseIndex;
David Sehraa0b1a12015-10-27 16:55:40 -07005522 continue;
John Portoac2388c2016-01-22 07:10:56 -08005523 }
5524
David Sehraa0b1a12015-10-27 16:55:40 -07005525 // Recognize multiply/shift and update Shift amount.
5526 // Index becomes Index=Var<<Const && Const+Shift<=3 ==>
5527 // Index=Var, Shift+=Const
5528 // Index becomes Index=Const*Var && log2(Const)+Shift<=3 ==>
5529 // Index=Var, Shift+=log2(Const)
John Portoac2388c2016-01-22 07:10:56 -08005530 if ((Reason =
5531 AddrOpt.matchShiftedIndex(&NewAddr.Index, &NewAddr.Shift))) {
David Sehraa0b1a12015-10-27 16:55:40 -07005532 continue;
John Portoac2388c2016-01-22 07:10:56 -08005533 }
5534
David Sehraa0b1a12015-10-27 16:55:40 -07005535 // If Shift is zero, the choice of Base and Index was purely arbitrary.
5536 // Recognize multiply/shift and set Shift amount.
5537 // Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
5538 // swap(Index,Base)
5539 // Similar for Base=Const*Var and Base=Var<<Const
John Portoac2388c2016-01-22 07:10:56 -08005540 if (NewAddr.Shift == 0 &&
5541 (Reason = AddrOpt.matchShiftedIndex(&NewAddr.Base, &NewAddr.Shift))) {
5542 std::swap(NewAddr.Base, NewAddr.Index);
David Sehraa0b1a12015-10-27 16:55:40 -07005543 continue;
5544 }
5545 }
John Portoac2388c2016-01-22 07:10:56 -08005546
David Sehraa0b1a12015-10-27 16:55:40 -07005547 // Update Offset to reflect additions/subtractions with constants and
5548 // relocatables.
John Porto7e93c622015-06-23 10:58:57 -07005549 // TODO: consider overflow issues with respect to Offset.
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005550 if (!Skip.OffsetFromBase && (Reason = AddrOpt.matchOffsetIndexOrBase(
5551 &NewAddr.Base, /*Shift =*/0,
5552 &NewAddr.Relocatable, &NewAddr.Offset))) {
John Portoac2388c2016-01-22 07:10:56 -08005553 SkipLastFolding = &Skip.OffsetFromBase;
David Sehraa0b1a12015-10-27 16:55:40 -07005554 continue;
John Portoac2388c2016-01-22 07:10:56 -08005555 }
Manasij Mukherjee2e4b9602016-06-21 14:22:43 -07005556 if (!Skip.OffsetFromIndex && (Reason = AddrOpt.matchOffsetIndexOrBase(
5557 &NewAddr.Index, NewAddr.Shift,
5558 &NewAddr.Relocatable, &NewAddr.Offset))) {
John Portoac2388c2016-01-22 07:10:56 -08005559 SkipLastFolding = &Skip.OffsetFromIndex;
David Sehr69e92902015-11-04 14:46:29 -08005560 continue;
John Portoac2388c2016-01-22 07:10:56 -08005561 }
5562
David Sehraa0b1a12015-10-27 16:55:40 -07005563 break;
5564 } while (Reason);
John Portoac2388c2016-01-22 07:10:56 -08005565
5566 if (!AddressWasOptimized) {
5567 return nullptr;
5568 }
5569
5570 // Undo any addition of RebasePtr. It will be added back when the mem
5571 // operand is sandboxed.
5572 if (NewAddr.Base == RebasePtr) {
5573 NewAddr.Base = nullptr;
5574 }
5575
5576 if (NewAddr.Index == RebasePtr) {
5577 NewAddr.Index = nullptr;
5578 NewAddr.Shift = 0;
5579 }
5580
5581 Constant *OffsetOp = nullptr;
5582 if (NewAddr.Relocatable == nullptr) {
5583 OffsetOp = Ctx->getConstantInt32(NewAddr.Offset);
5584 } else {
5585 OffsetOp =
5586 Ctx->getConstantSym(NewAddr.Relocatable->getOffset() + NewAddr.Offset,
Jim Stichnoth98ba0062016-03-07 09:26:22 -08005587 NewAddr.Relocatable->getName());
John Portoac2388c2016-01-22 07:10:56 -08005588 }
5589 // Vanilla ICE load instructions should not use the segment registers, and
5590 // computeAddressOpt only works at the level of Variables and Constants, not
5591 // other X86OperandMem, so there should be no mention of segment
5592 // registers there either.
5593 static constexpr auto SegmentReg =
5594 X86OperandMem::SegmentRegisters::DefaultSegment;
5595
5596 return X86OperandMem::create(Func, MemType, NewAddr.Base, OffsetOp,
5597 NewAddr.Index, NewAddr.Shift, SegmentReg);
John Porto7e93c622015-06-23 10:58:57 -07005598}
5599
Jim Stichnothad2989b2015-09-15 10:21:42 -07005600/// Add a mock bounds check on the memory address before using it as a load or
5601/// store operand. The basic idea is that given a memory operand [reg], we
5602/// would first add bounds-check code something like:
5603///
5604/// cmp reg, <lb>
5605/// jl out_of_line_error
5606/// cmp reg, <ub>
5607/// jg out_of_line_error
5608///
5609/// In reality, the specific code will depend on how <lb> and <ub> are
5610/// represented, e.g. an immediate, a global, or a function argument.
5611///
5612/// As such, we need to enforce that the memory operand does not have the form
5613/// [reg1+reg2], because then there is no simple cmp instruction that would
5614/// suffice. However, we consider [reg+offset] to be OK because the offset is
5615/// usually small, and so <ub> could have a safety buffer built in and then we
5616/// could instead branch to a custom out_of_line_error that does the precise
5617/// check and jumps back if it turns out OK.
5618///
5619/// For the purpose of mocking the bounds check, we'll do something like this:
5620///
5621/// cmp reg, 0
5622/// je label
5623/// cmp reg, 1
5624/// je label
5625/// label:
5626///
5627/// Also note that we don't need to add a bounds check to a dereference of a
5628/// simple global variable address.
John Porto4a566862016-01-04 09:33:41 -08005629template <typename TraitsType>
5630void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) {
Karl Schimpfd4699942016-04-02 09:55:31 -07005631 if (!getFlags().getMockBoundsCheck())
Jim Stichnothad2989b2015-09-15 10:21:42 -07005632 return;
John Porto4a566862016-01-04 09:33:41 -08005633 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) {
Jim Stichnothad2989b2015-09-15 10:21:42 -07005634 if (Mem->getIndex()) {
5635 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg");
5636 }
5637 Opnd = Mem->getBase();
5638 }
5639 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps
5640 // something else. We only care if it is Variable.
5641 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);
5642 if (Var == nullptr)
5643 return;
5644 // We use lowerStore() to copy out-args onto the stack. This creates a memory
5645 // operand with the stack pointer as the base register. Don't do bounds
5646 // checks on that.
Jim Stichnoth8aa39662016-02-10 11:20:30 -08005647 if (Var->getRegNum() == getStackReg())
Jim Stichnothad2989b2015-09-15 10:21:42 -07005648 return;
5649
John Porto4a566862016-01-04 09:33:41 -08005650 auto *Label = InstX86Label::create(Func, this);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005651 _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
5652 _br(Traits::Cond::Br_e, Label);
5653 _cmp(Opnd, Ctx->getConstantInt32(1));
5654 _br(Traits::Cond::Br_e, Label);
5655 Context.insert(Label);
5656}
5657
John Porto4a566862016-01-04 09:33:41 -08005658template <typename TraitsType>
5659void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {
John Porto921856d2015-07-07 11:56:26 -07005660 // A Load instruction can be treated the same as an Assign instruction, after
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08005661 // the source operand is transformed into an X86OperandMem operand. Note that
5662 // the address mode optimization already creates an X86OperandMem operand, so
5663 // it doesn't need another level of transformation.
John Porto7e93c622015-06-23 10:58:57 -07005664 Variable *DestLoad = Load->getDest();
5665 Type Ty = DestLoad->getType();
5666 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07005667 doMockBoundsCheck(Src0);
Jim Stichnoth54f3d512015-12-11 09:53:00 -08005668 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
John Porto7e93c622015-06-23 10:58:57 -07005669 lowerAssign(Assign);
5670}
5671
John Porto4a566862016-01-04 09:33:41 -08005672template <typename TraitsType>
Manasij Mukherjee0c704172016-07-21 12:40:24 -07005673void TargetX86Base<TraitsType>::doAddressOptOther() {
5674 // Inverts some Icmp instructions which helps doAddressOptLoad later.
5675 // TODO(manasijm): Refactor to unify the conditions for Var0 and Var1
5676 Inst *Instr = Context.getCur();
5677 auto *VMetadata = Func->getVMetadata();
5678 if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Instr)) {
5679 if (llvm::isa<Constant>(Icmp->getSrc(0)) ||
5680 llvm::isa<Constant>(Icmp->getSrc(1)))
5681 return;
5682 auto *Var0 = llvm::dyn_cast<Variable>(Icmp->getSrc(0));
5683 if (Var0 == nullptr)
5684 return;
5685 if (!VMetadata->isTracked(Var0))
5686 return;
5687 auto *Op0Def = VMetadata->getFirstDefinitionSingleBlock(Var0);
5688 if (Op0Def == nullptr || !llvm::isa<InstLoad>(Op0Def))
5689 return;
5690 if (VMetadata->getLocalUseNode(Var0) != Context.getNode())
5691 return;
5692
5693 auto *Var1 = llvm::dyn_cast<Variable>(Icmp->getSrc(1));
5694 if (Var1 != nullptr && VMetadata->isTracked(Var1)) {
5695 auto *Op1Def = VMetadata->getFirstDefinitionSingleBlock(Var1);
5696 if (Op1Def != nullptr && !VMetadata->isMultiBlock(Var1) &&
5697 llvm::isa<InstLoad>(Op1Def)) {
5698 return; // Both are loads
5699 }
5700 }
5701 Icmp->reverseConditionAndOperands();
5702 }
5703}
5704
5705template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08005706void TargetX86Base<TraitsType>::doAddressOptLoad() {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005707 Inst *Instr = Context.getCur();
5708 Operand *Addr = Instr->getSrc(0);
5709 Variable *Dest = Instr->getDest();
5710 if (auto *OptAddr = computeAddressOpt(Instr, Dest->getType(), Addr)) {
5711 Instr->setDeleted();
John Portoac2388c2016-01-22 07:10:56 -08005712 Context.insert<InstLoad>(Dest, OptAddr);
John Porto7e93c622015-06-23 10:58:57 -07005713 }
5714}
5715
John Porto4a566862016-01-04 09:33:41 -08005716template <typename TraitsType>
5717void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability,
5718 RandomNumberGenerator &RNG) {
Qining Luaee5fa82015-08-20 14:59:03 -07005719 RandomNumberGeneratorWrapper RNGW(RNG);
5720 if (RNGW.getTrueWithProbability(Probability)) {
5721 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
John Porto7e93c622015-06-23 10:58:57 -07005722 }
5723}
5724
John Porto4a566862016-01-04 09:33:41 -08005725template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005726void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Instr*/) {
John Porto7e93c622015-06-23 10:58:57 -07005727 Func->setError("Phi found in regular instruction list");
5728}
5729
John Porto4a566862016-01-04 09:33:41 -08005730template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005731void TargetX86Base<TraitsType>::lowerRet(const InstRet *Instr) {
David Sehr0c68bef2016-01-20 10:00:23 -08005732 Variable *Reg = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08005733 if (Instr->hasRetValue()) {
5734 Operand *RetValue = legalize(Instr->getRetValue());
David Sehr0c68bef2016-01-20 10:00:23 -08005735 const Type ReturnType = RetValue->getType();
5736 assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) ||
5737 (ReturnType == IceType_i32) || (ReturnType == IceType_i64));
5738 Reg = moveReturnValueToRegister(RetValue, ReturnType);
5739 }
5740 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5741 // explicitly looks for a ret instruction as a marker for where to insert the
5742 // frame removal instructions.
5743 _ret(Reg);
5744 // Add a fake use of esp to make sure esp stays alive for the entire
5745 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5746 keepEspLiveAtExit();
5747}
5748
John Portoae15f0f2016-04-26 04:26:33 -07005749inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
5750 SizeT Index3) {
5751 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
5752 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
5753 assert(Mask < 256);
5754 return Mask;
5755}
5756
5757template <typename TraitsType>
5758Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
5759 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
5760 constexpr SizeT SrcBit = 1 << 2;
5761 assert((Index0 & SrcBit) == (Index1 & SrcBit));
5762 assert((Index0 & SrcBit) == (Index2 & SrcBit));
5763 assert((Index0 & SrcBit) == (Index3 & SrcBit));
5764 (void)SrcBit;
5765
5766 const Type SrcTy = Src->getType();
5767 auto *T = makeReg(SrcTy);
5768 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
5769 auto *Mask =
5770 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5771 _pshufd(T, SrcRM, Mask);
5772 return T;
5773}
5774
5775template <typename TraitsType>
5776Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
5777 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
5778 SizeT Index3) {
5779 constexpr SizeT SrcBit = 1 << 2;
5780 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
5781 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
5782 (void)SrcBit;
5783
5784 const Type SrcTy = Src0->getType();
5785 assert(Src1->getType() == SrcTy);
5786 auto *T = makeReg(SrcTy);
5787 auto *Src0R = legalizeToReg(Src0);
5788 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5789 auto *Mask =
5790 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5791 _movp(T, Src0R);
5792 _shufps(T, Src1RM, Mask);
5793 return T;
5794}
5795
5796template <typename TraitsType>
5797Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
5798 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
5799 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
5800 Index1, IGNORE_INDEX);
5801}
5802
5803inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
5804 SizeT Index3) {
5805 constexpr SizeT SrcBit = 1 << 2;
5806 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
5807 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
5808 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
5809 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
5810 return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
5811}
5812
David Sehr0c68bef2016-01-20 10:00:23 -08005813template <typename TraitsType>
John Portode29f122016-04-26 19:16:07 -07005814GlobalString TargetX86Base<TraitsType>::lowerShuffleVector_NewMaskName() {
5815 GlobalString FuncName = Func->getFunctionName();
5816 const SizeT Id = PshufbMaskCount++;
5817 if (!BuildDefs::dump() || !FuncName.hasStdString()) {
5818 return GlobalString::createWithString(
5819 Ctx,
5820 "$PS" + std::to_string(FuncName.getID()) + "_" + std::to_string(Id));
5821 }
5822 return GlobalString::createWithString(
5823 Ctx, "Pshufb$" + Func->getFunctionName() + "$" + std::to_string(Id));
5824}
5825
5826template <typename TraitsType>
5827ConstantRelocatable *
5828TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask(
5829 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
5830 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
5831 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
5832 int8_t Idx15) {
5833 static constexpr uint8_t NumElements = 16;
5834 const char Initializer[NumElements] = {
5835 Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7,
5836 Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15,
5837 };
5838
5839 static constexpr Type V4VectorType = IceType_v4i32;
5840 const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType);
5841 auto *Mask = VariableDeclaration::create(Func->getGlobalPool());
5842 GlobalString MaskName = lowerShuffleVector_NewMaskName();
5843 Mask->setIsConstant(true);
5844 Mask->addInitializer(VariableDeclaration::DataInitializer::create(
5845 Func->getGlobalPool(), Initializer, NumElements));
5846 Mask->setName(MaskName);
5847 // Mask needs to be 16-byte aligned, or pshufb will seg fault.
5848 Mask->setAlignment(MaskAlignment);
5849 Func->addGlobal(Mask);
5850
5851 constexpr RelocOffsetT Offset = 0;
5852 return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName));
5853}
5854
5855template <typename TraitsType>
5856void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb(
5857 Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1,
5858 int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6,
5859 int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11,
5860 int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) {
5861 const Type DestTy = Dest->getType();
5862 static constexpr bool NotRebased = false;
5863 static constexpr Variable *NoBase = nullptr;
5864 // We use void for the memory operand instead of DestTy because using the
5865 // latter causes a validation failure: the X86 Inst layer complains that
5866 // vector mem operands could be under aligned. Thus, using void we avoid the
5867 // validation error. Note that the mask global declaration is aligned, so it
5868 // can be used as an XMM mem operand.
5869 static constexpr Type MaskType = IceType_void;
5870#define IDX_IN_SRC(N, S) \
5871 ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS)
5872 auto *Mask0M = X86OperandMem::create(
5873 Func, MaskType, NoBase,
5874 lowerShuffleVector_CreatePshufbMask(
5875 IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0),
5876 IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0),
5877 IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0),
5878 IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0),
5879 IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0),
5880 IDX_IN_SRC(Idx15, 0)),
5881 NotRebased);
5882 auto *Mask1M = X86OperandMem::create(
5883 Func, MaskType, NoBase,
5884 lowerShuffleVector_CreatePshufbMask(
5885 IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1),
5886 IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1),
5887 IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1),
5888 IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1),
5889 IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1),
5890 IDX_IN_SRC(Idx15, 1)),
5891 NotRebased);
5892#undef IDX_IN_SRC
5893 auto *T0 = makeReg(DestTy);
5894 auto *T1 = makeReg(DestTy);
5895 auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
5896 _movp(T0, Src0RM);
5897 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5898 _movp(T1, Src1RM);
5899
5900 _pshufb(T1, Mask1M);
5901 _pshufb(T0, Mask0M);
5902 _por(T1, T0);
5903 _movp(Dest, T1);
5904}
5905
5906template <typename TraitsType>
John Portoa47c11c2016-04-21 05:53:42 -07005907void TargetX86Base<TraitsType>::lowerShuffleVector(
5908 const InstShuffleVector *Instr) {
5909 auto *Dest = Instr->getDest();
5910 const Type DestTy = Dest->getType();
John Portoae15f0f2016-04-26 04:26:33 -07005911 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5912 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5913 const SizeT NumElements = typeNumElements(DestTy);
John Portoa47c11c2016-04-21 05:53:42 -07005914
5915 auto *T = makeReg(DestTy);
5916
5917 switch (DestTy) {
5918 default:
John Portode29f122016-04-26 19:16:07 -07005919 llvm::report_fatal_error("Unexpected vector type.");
5920 case IceType_v16i1:
5921 case IceType_v16i8: {
5922 if (InstructionSet < Traits::SSE4_1) {
5923 // TODO(jpp): figure out how to lower with sse2.
5924 break;
5925 }
5926 static constexpr SizeT ExpectedNumElements = 16;
5927 assert(ExpectedNumElements == Instr->getNumIndexes());
5928 (void)ExpectedNumElements;
5929 const SizeT Index0 = Instr->getIndex(0)->getValue();
5930 const SizeT Index1 = Instr->getIndex(1)->getValue();
5931 const SizeT Index2 = Instr->getIndex(2)->getValue();
5932 const SizeT Index3 = Instr->getIndex(3)->getValue();
5933 const SizeT Index4 = Instr->getIndex(4)->getValue();
5934 const SizeT Index5 = Instr->getIndex(5)->getValue();
5935 const SizeT Index6 = Instr->getIndex(6)->getValue();
5936 const SizeT Index7 = Instr->getIndex(7)->getValue();
5937 const SizeT Index8 = Instr->getIndex(8)->getValue();
5938 const SizeT Index9 = Instr->getIndex(9)->getValue();
5939 const SizeT Index10 = Instr->getIndex(10)->getValue();
5940 const SizeT Index11 = Instr->getIndex(11)->getValue();
5941 const SizeT Index12 = Instr->getIndex(12)->getValue();
5942 const SizeT Index13 = Instr->getIndex(13)->getValue();
5943 const SizeT Index14 = Instr->getIndex(14)->getValue();
5944 const SizeT Index15 = Instr->getIndex(15)->getValue();
5945 lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2,
5946 Index3, Index4, Index5, Index6, Index7,
5947 Index8, Index9, Index10, Index11, Index12,
5948 Index13, Index14, Index15);
5949 return;
5950 }
5951 case IceType_v8i1:
5952 case IceType_v8i16: {
5953 if (InstructionSet < Traits::SSE4_1) {
5954 // TODO(jpp): figure out how to lower with sse2.
5955 break;
5956 }
5957 static constexpr SizeT ExpectedNumElements = 8;
5958 assert(ExpectedNumElements == Instr->getNumIndexes());
5959 (void)ExpectedNumElements;
5960 const SizeT Index0 = Instr->getIndex(0)->getValue();
5961 const SizeT Index1 = Instr->getIndex(1)->getValue();
5962 const SizeT Index2 = Instr->getIndex(2)->getValue();
5963 const SizeT Index3 = Instr->getIndex(3)->getValue();
5964 const SizeT Index4 = Instr->getIndex(4)->getValue();
5965 const SizeT Index5 = Instr->getIndex(5)->getValue();
5966 const SizeT Index6 = Instr->getIndex(6)->getValue();
5967 const SizeT Index7 = Instr->getIndex(7)->getValue();
5968#define TO_BYTE_INDEX(I) ((I) << 1)
5969 lowerShuffleVector_UsingPshufb(
5970 Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1,
5971 TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2),
5972 TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3),
5973 TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4),
5974 TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5),
5975 TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6),
5976 TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7),
5977 TO_BYTE_INDEX(Index7) + 1);
5978#undef TO_BYTE_INDEX
5979 return;
5980 }
John Portoae15f0f2016-04-26 04:26:33 -07005981 case IceType_v4i1:
5982 case IceType_v4i32:
5983 case IceType_v4f32: {
5984 static constexpr SizeT ExpectedNumElements = 4;
5985 assert(ExpectedNumElements == Instr->getNumIndexes());
5986 const SizeT Index0 = Instr->getIndex(0)->getValue();
5987 const SizeT Index1 = Instr->getIndex(1)->getValue();
5988 const SizeT Index2 = Instr->getIndex(2)->getValue();
5989 const SizeT Index3 = Instr->getIndex(3)->getValue();
5990 Variable *T = nullptr;
5991 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
5992#define CASE_SRCS_IN(S0, S1, S2, S3) \
5993 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
5994 CASE_SRCS_IN(0, 0, 0, 0) : {
5995 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
5996 Index3);
5997 }
5998 break;
5999 CASE_SRCS_IN(0, 0, 0, 1) : {
6000 assert(false && "Following code is untested but likely correct; test "
6001 "and remove assert.");
6002 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
6003 Src1, Index3);
6004 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
6005 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6006 }
6007 break;
6008 CASE_SRCS_IN(0, 0, 1, 0) : {
6009 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
6010 Src0, Index3);
6011 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
6012 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6013 }
6014 break;
6015 CASE_SRCS_IN(0, 0, 1, 1) : {
6016 assert(false && "Following code is untested but likely correct; test "
6017 "and remove assert.");
6018 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
6019 Index2, Index3);
6020 }
6021 break;
6022 CASE_SRCS_IN(0, 1, 0, 0) : {
6023 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
6024 Src1, Index1);
6025 T = lowerShuffleVector_TwoFromSameSrc(
6026 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
6027 }
6028 break;
6029 CASE_SRCS_IN(0, 1, 0, 1) : {
6030 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
6031 (Index3 - ExpectedNumElements) == 1) {
6032 assert(false && "Following code is untested but likely correct; test "
6033 "and remove assert.");
6034 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
6035 auto *Src0R = legalizeToReg(Src0);
6036 T = makeReg(DestTy);
6037 _movp(T, Src0R);
6038 _punpckl(T, Src1RM);
6039 } else if (Index0 == Index2 && Index1 == Index3) {
6040 assert(false && "Following code is untested but likely correct; test "
6041 "and remove assert.");
6042 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
6043 Src0, Index0, Src1, Index1);
6044 T = lowerShuffleVector_AllFromSameSrc(
6045 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
6046 UNIFIED_INDEX_1);
6047 } else {
6048 assert(false && "Following code is untested but likely correct; test "
6049 "and remove assert.");
6050 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
6051 Src0, Index0, Src1, Index1);
6052 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
6053 Src0, Index2, Src1, Index3);
6054 T = lowerShuffleVector_TwoFromSameSrc(
6055 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
6056 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6057 }
6058 }
6059 break;
6060 CASE_SRCS_IN(0, 1, 1, 0) : {
6061 if (Index0 == Index3 && Index1 == Index2) {
6062 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
6063 Src0, Index0, Src1, Index1);
6064 T = lowerShuffleVector_AllFromSameSrc(
6065 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
6066 UNIFIED_INDEX_0);
6067 } else {
6068 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
6069 Src0, Index0, Src1, Index1);
6070 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
6071 Src1, Index2, Src0, Index3);
6072 T = lowerShuffleVector_TwoFromSameSrc(
6073 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
6074 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6075 }
6076 }
6077 break;
6078 CASE_SRCS_IN(0, 1, 1, 1) : {
6079 assert(false && "Following code is untested but likely correct; test "
6080 "and remove assert.");
6081 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
6082 Src1, Index1);
6083 T = lowerShuffleVector_TwoFromSameSrc(
6084 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
6085 }
6086 break;
6087 CASE_SRCS_IN(1, 0, 0, 0) : {
6088 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
6089 Src0, Index1);
6090 T = lowerShuffleVector_TwoFromSameSrc(
6091 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
6092 }
6093 break;
6094 CASE_SRCS_IN(1, 0, 0, 1) : {
6095 if (Index0 == Index3 && Index1 == Index2) {
6096 assert(false && "Following code is untested but likely correct; test "
6097 "and remove assert.");
6098 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
6099 Src1, Index0, Src0, Index1);
6100 T = lowerShuffleVector_AllFromSameSrc(
6101 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
6102 UNIFIED_INDEX_0);
6103 } else {
6104 assert(false && "Following code is untested but likely correct; test "
6105 "and remove assert.");
6106 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
6107 Src1, Index0, Src0, Index1);
6108 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
6109 Src0, Index2, Src1, Index3);
6110 T = lowerShuffleVector_TwoFromSameSrc(
6111 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
6112 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6113 }
6114 }
6115 break;
6116 CASE_SRCS_IN(1, 0, 1, 0) : {
6117 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
6118 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
6119 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
6120 auto *Src0R = legalizeToReg(Src1);
6121 T = makeReg(DestTy);
6122 _movp(T, Src0R);
6123 _punpckl(T, Src1RM);
6124 } else if (Index0 == Index2 && Index1 == Index3) {
6125 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
6126 Src1, Index0, Src0, Index1);
6127 T = lowerShuffleVector_AllFromSameSrc(
6128 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
6129 UNIFIED_INDEX_1);
6130 } else {
6131 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
6132 Src1, Index0, Src0, Index1);
6133 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
6134 Src1, Index2, Src0, Index3);
6135 T = lowerShuffleVector_TwoFromSameSrc(
6136 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
6137 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6138 }
6139 }
6140 break;
6141 CASE_SRCS_IN(1, 0, 1, 1) : {
6142 assert(false && "Following code is untested but likely correct; test "
6143 "and remove assert.");
6144 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
6145 Src0, Index1);
6146 T = lowerShuffleVector_TwoFromSameSrc(
6147 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
6148 }
6149 break;
6150 CASE_SRCS_IN(1, 1, 0, 0) : {
6151 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
6152 Index2, Index3);
6153 }
6154 break;
6155 CASE_SRCS_IN(1, 1, 0, 1) : {
6156 assert(false && "Following code is untested but likely correct; test "
6157 "and remove assert.");
6158 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
6159 Src1, Index3);
6160 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
6161 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6162 }
6163 break;
6164 CASE_SRCS_IN(1, 1, 1, 0) : {
6165 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
6166 Src0, Index3);
6167 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
6168 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
6169 }
6170 break;
6171 CASE_SRCS_IN(1, 1, 1, 1) : {
6172 assert(false && "Following code is untested but likely correct; test "
6173 "and remove assert.");
6174 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
6175 Index3);
6176 }
6177 break;
6178#undef CASE_SRCS_IN
6179 }
6180
6181 assert(T != nullptr);
6182 assert(T->getType() == DestTy);
6183 _movp(Dest, T);
6184 return;
6185 } break;
John Portoa47c11c2016-04-21 05:53:42 -07006186 }
6187
6188 // Unoptimized shuffle. Perform a series of inserts and extracts.
6189 Context.insert<InstFakeDef>(T);
John Portoa47c11c2016-04-21 05:53:42 -07006190 const Type ElementType = typeElementType(DestTy);
6191 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6192 auto *Index = Instr->getIndex(I);
6193 const SizeT Elem = Index->getValue();
6194 auto *ExtElmt = makeReg(ElementType);
6195 if (Elem < NumElements) {
6196 lowerExtractElement(
6197 InstExtractElement::create(Func, ExtElmt, Src0, Index));
6198 } else {
6199 lowerExtractElement(InstExtractElement::create(
6200 Func, ExtElmt, Src1,
6201 Ctx->getConstantInt32(Index->getValue() - NumElements)));
6202 }
6203 auto *NewT = makeReg(DestTy);
6204 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6205 Ctx->getConstantInt32(I)));
6206 T = NewT;
6207 }
6208 _movp(Dest, T);
6209}
6210
6211template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08006212void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
David Sehre3984282015-12-15 17:34:55 -08006213 Variable *Dest = Select->getDest();
John Porto7e93c622015-06-23 10:58:57 -07006214
David Sehre3984282015-12-15 17:34:55 -08006215 if (isVectorType(Dest->getType())) {
6216 lowerSelectVector(Select);
John Porto7e93c622015-06-23 10:58:57 -07006217 return;
6218 }
6219
David Sehre3984282015-12-15 17:34:55 -08006220 Operand *Condition = Select->getCondition();
John Porto7e93c622015-06-23 10:58:57 -07006221 // Handle folding opportunities.
David Sehre3984282015-12-15 17:34:55 -08006222 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
John Porto7e93c622015-06-23 10:58:57 -07006223 assert(Producer->isDeleted());
Jim Stichnothcaeaa272016-01-10 12:53:44 -08006224 switch (BoolFolding<Traits>::getProducerKind(Producer)) {
John Porto7e93c622015-06-23 10:58:57 -07006225 default:
6226 break;
Jim Stichnothcaeaa272016-01-10 12:53:44 -08006227 case BoolFolding<Traits>::PK_Icmp32:
6228 case BoolFolding<Traits>::PK_Icmp64: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006229 lowerIcmpAndConsumer(llvm::cast<InstIcmp>(Producer), Select);
David Sehre3984282015-12-15 17:34:55 -08006230 return;
6231 }
Jim Stichnothcaeaa272016-01-10 12:53:44 -08006232 case BoolFolding<Traits>::PK_Fcmp: {
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006233 lowerFcmpAndConsumer(llvm::cast<InstFcmp>(Producer), Select);
David Sehre3984282015-12-15 17:34:55 -08006234 return;
6235 }
John Porto7e93c622015-06-23 10:58:57 -07006236 }
6237 }
John Porto7e93c622015-06-23 10:58:57 -07006238
David Sehre3984282015-12-15 17:34:55 -08006239 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
6240 Operand *Zero = Ctx->getConstantZero(IceType_i32);
6241 _cmp(CmpResult, Zero);
6242 Operand *SrcT = Select->getTrueOperand();
6243 Operand *SrcF = Select->getFalseOperand();
John Porto4a566862016-01-04 09:33:41 -08006244 const BrCond Cond = Traits::Cond::Br_ne;
David Sehre3984282015-12-15 17:34:55 -08006245 lowerSelectMove(Dest, Cond, SrcT, SrcF);
6246}
6247
John Porto4a566862016-01-04 09:33:41 -08006248template <typename TraitsType>
6249void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,
6250 Operand *SrcT, Operand *SrcF) {
David Sehre3984282015-12-15 17:34:55 -08006251 Type DestTy = Dest->getType();
John Porto7e93c622015-06-23 10:58:57 -07006252 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
Andrew Scull57e12682015-09-16 11:30:19 -07006253 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
6254 // explicit control flow.
John Porto7e93c622015-06-23 10:58:57 -07006255 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
John Porto4a566862016-01-04 09:33:41 -08006256 auto *Label = InstX86Label::create(Func, this);
John Porto7e93c622015-06-23 10:58:57 -07006257 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
6258 _mov(Dest, SrcT);
6259 _br(Cond, Label);
6260 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
David Sehre3984282015-12-15 17:34:55 -08006261 _redefined(_mov(Dest, SrcF));
John Porto7e93c622015-06-23 10:58:57 -07006262 Context.insert(Label);
6263 return;
6264 }
6265 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
Andrew Scull57e12682015-09-16 11:30:19 -07006266 // But if SrcT is immediate, we might be able to do better, as the cmov
6267 // instruction doesn't allow an immediate operand:
John Porto7e93c622015-06-23 10:58:57 -07006268 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
6269 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
6270 std::swap(SrcT, SrcF);
John Porto4a566862016-01-04 09:33:41 -08006271 Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond);
John Porto7e93c622015-06-23 10:58:57 -07006272 }
John Porto1d235422015-08-12 12:37:53 -07006273 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006274 SrcT = legalizeUndef(SrcT);
6275 SrcF = legalizeUndef(SrcF);
John Porto7e93c622015-06-23 10:58:57 -07006276 // Set the low portion.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006277 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08006278 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
John Porto7e93c622015-06-23 10:58:57 -07006279 // Set the high portion.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006280 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08006281 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
John Porto7e93c622015-06-23 10:58:57 -07006282 return;
6283 }
6284
John Porto1d235422015-08-12 12:37:53 -07006285 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
6286 (Traits::Is64Bit && DestTy == IceType_i64));
David Sehre3984282015-12-15 17:34:55 -08006287 lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
6288}
6289
John Porto4a566862016-01-04 09:33:41 -08006290template <typename TraitsType>
6291void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond,
6292 Operand *SrcT,
6293 Operand *SrcF) {
John Porto7e93c622015-06-23 10:58:57 -07006294 Variable *T = nullptr;
6295 SrcF = legalize(SrcF);
6296 _mov(T, SrcF);
6297 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
6298 _cmov(T, SrcT, Cond);
6299 _mov(Dest, T);
6300}
6301
John Porto4a566862016-01-04 09:33:41 -08006302template <typename TraitsType>
6303void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src,
6304 bool IsRedefinition) {
David Sehre3984282015-12-15 17:34:55 -08006305 assert(Dest->getType() == Src->getType());
6306 assert(!Dest->isRematerializable());
6307 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
6308 Src = legalize(Src);
6309 Operand *SrcLo = loOperand(Src);
6310 Operand *SrcHi = hiOperand(Src);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006311 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
6312 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
David Sehre3984282015-12-15 17:34:55 -08006313 Variable *T_Lo = nullptr, *T_Hi = nullptr;
6314 _mov(T_Lo, SrcLo);
6315 _redefined(_mov(DestLo, T_Lo), IsRedefinition);
6316 _mov(T_Hi, SrcHi);
6317 _redefined(_mov(DestHi, T_Hi), IsRedefinition);
6318 } else {
6319 Operand *SrcLegal;
6320 if (Dest->hasReg()) {
6321 // If Dest already has a physical register, then only basic legalization
6322 // is needed, as the source operand can be a register, immediate, or
6323 // memory.
6324 SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum());
6325 } else {
6326 // If Dest could be a stack operand, then RI must be a physical register
6327 // or a scalar integer immediate.
6328 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
6329 }
6330 if (isVectorType(Dest->getType())) {
6331 _redefined(_movp(Dest, SrcLegal), IsRedefinition);
6332 } else {
6333 _redefined(_mov(Dest, SrcLegal), IsRedefinition);
6334 }
6335 }
6336}
6337
John Porto4a566862016-01-04 09:33:41 -08006338template <typename TraitsType>
6339bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
6340 const InstFcmp *Fcmp, const InstSelect *Select) {
David Sehre3984282015-12-15 17:34:55 -08006341 Operand *CmpSrc0 = Fcmp->getSrc(0);
6342 Operand *CmpSrc1 = Fcmp->getSrc(1);
6343 Operand *SelectSrcT = Select->getTrueOperand();
6344 Operand *SelectSrcF = Select->getFalseOperand();
6345
6346 if (CmpSrc0->getType() != SelectSrcT->getType())
6347 return false;
6348
6349 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
6350 InstFcmp::FCond Condition = Fcmp->getCondition();
6351 switch (Condition) {
6352 default:
6353 return false;
6354 case InstFcmp::True:
6355 case InstFcmp::False:
6356 case InstFcmp::Ogt:
6357 case InstFcmp::Olt:
6358 (void)CmpSrc0;
6359 (void)CmpSrc1;
6360 (void)SelectSrcT;
6361 (void)SelectSrcF;
6362 break;
6363 }
6364 return false;
6365}
6366
John Porto4a566862016-01-04 09:33:41 -08006367template <typename TraitsType>
6368void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) {
David Sehre3984282015-12-15 17:34:55 -08006369 Variable *Dest = Icmp->getDest();
6370 if (isVectorType(Dest->getType())) {
6371 lowerIcmpVector(Icmp);
6372 } else {
6373 constexpr Inst *Consumer = nullptr;
6374 lowerIcmpAndConsumer(Icmp, Consumer);
6375 }
6376}
6377
John Porto4a566862016-01-04 09:33:41 -08006378template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006379void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Instr) {
6380 Variable *Dest = Instr->getDest();
David Sehre3984282015-12-15 17:34:55 -08006381 Type DestTy = Dest->getType();
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006382 Operand *SrcT = Instr->getTrueOperand();
6383 Operand *SrcF = Instr->getFalseOperand();
6384 Operand *Condition = Instr->getCondition();
David Sehre3984282015-12-15 17:34:55 -08006385
6386 if (!isVectorType(DestTy))
6387 llvm::report_fatal_error("Expected a vector select");
6388
6389 Type SrcTy = SrcT->getType();
6390 Variable *T = makeReg(SrcTy);
6391 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
6392 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
6393 if (InstructionSet >= Traits::SSE4_1) {
6394 // TODO(wala): If the condition operand is a constant, use blendps or
6395 // pblendw.
6396 //
6397 // Use blendvps or pblendvb to implement select.
6398 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
6399 SrcTy == IceType_v4f32) {
6400 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
6401 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
6402 _movp(xmm0, ConditionRM);
6403 _psll(xmm0, Ctx->getConstantInt8(31));
6404 _movp(T, SrcFRM);
6405 _blendvps(T, SrcTRM, xmm0);
6406 _movp(Dest, T);
6407 } else {
6408 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
6409 Type SignExtTy =
6410 Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8;
6411 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
6412 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
6413 _movp(T, SrcFRM);
6414 _pblendvb(T, SrcTRM, xmm0);
6415 _movp(Dest, T);
6416 }
6417 return;
6418 }
6419 // Lower select without Traits::SSE4.1:
6420 // a=d?b:c ==>
6421 // if elementtype(d) != i1:
6422 // d=sext(d);
6423 // a=(b&d)|(c&~d);
6424 Variable *T2 = makeReg(SrcTy);
6425 // Sign extend the condition operand if applicable.
6426 if (SrcTy == IceType_v4f32) {
6427 // The sext operation takes only integer arguments.
6428 Variable *T3 = Func->makeVariable(IceType_v4i32);
6429 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
6430 _movp(T, T3);
6431 } else if (typeElementType(SrcTy) != IceType_i1) {
6432 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6433 } else {
6434 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
6435 _movp(T, ConditionRM);
6436 }
6437 _movp(T2, T);
6438 _pand(T, SrcTRM);
6439 _pandn(T2, SrcFRM);
6440 _por(T, T2);
6441 _movp(Dest, T);
6442
6443 return;
6444}
6445
John Porto4a566862016-01-04 09:33:41 -08006446template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006447void TargetX86Base<TraitsType>::lowerStore(const InstStore *Instr) {
6448 Operand *Value = Instr->getData();
6449 Operand *Addr = Instr->getAddr();
John Porto4a566862016-01-04 09:33:41 -08006450 X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
Jim Stichnothad2989b2015-09-15 10:21:42 -07006451 doMockBoundsCheck(NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07006452 Type Ty = NewAddr->getType();
6453
John Porto1d235422015-08-12 12:37:53 -07006454 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006455 Value = legalizeUndef(Value);
John Porto7e93c622015-06-23 10:58:57 -07006456 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
John Porto4a566862016-01-04 09:33:41 -08006457 _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
Jim Stichnothb40595a2016-01-29 06:14:31 -08006458 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
John Porto4a566862016-01-04 09:33:41 -08006459 _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
John Porto7e93c622015-06-23 10:58:57 -07006460 } else if (isVectorType(Ty)) {
Andrew Scull97f460d2015-07-21 10:07:42 -07006461 _storep(legalizeToReg(Value), NewAddr);
John Porto7e93c622015-06-23 10:58:57 -07006462 } else {
6463 Value = legalize(Value, Legal_Reg | Legal_Imm);
6464 _store(Value, NewAddr);
6465 }
6466}
6467
John Porto4a566862016-01-04 09:33:41 -08006468template <typename TraitsType>
6469void TargetX86Base<TraitsType>::doAddressOptStore() {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006470 auto *Instr = llvm::cast<InstStore>(Context.getCur());
6471 Operand *Addr = Instr->getAddr();
6472 Operand *Data = Instr->getData();
6473 if (auto *OptAddr = computeAddressOpt(Instr, Data->getType(), Addr)) {
6474 Instr->setDeleted();
John Portoac2388c2016-01-22 07:10:56 -08006475 auto *NewStore = Context.insert<InstStore>(Data, OptAddr);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006476 if (Instr->getDest())
6477 NewStore->setRmwBeacon(Instr->getRmwBeacon());
John Porto7e93c622015-06-23 10:58:57 -07006478 }
6479}
6480
John Porto4a566862016-01-04 09:33:41 -08006481template <typename TraitsType>
6482Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison,
6483 uint64_t Min, uint64_t Max) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006484 // TODO(ascull): 64-bit should not reach here but only because it is not
6485 // implemented yet. This should be able to handle the 64-bit case.
John Porto1d235422015-08-12 12:37:53 -07006486 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
Andrew Scull87f80c12015-07-20 10:19:16 -07006487 // Subtracting 0 is a nop so don't do it
6488 if (Min != 0) {
6489 // Avoid clobbering the comparison by copying it
6490 Variable *T = nullptr;
6491 _mov(T, Comparison);
6492 _sub(T, Ctx->getConstantInt32(Min));
6493 Comparison = T;
6494 }
6495
6496 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
6497
6498 return Comparison;
6499}
6500
John Porto4a566862016-01-04 09:33:41 -08006501template <typename TraitsType>
6502void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case,
6503 Operand *Comparison,
6504 bool DoneCmp,
6505 CfgNode *DefaultTarget) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006506 switch (Case.getKind()) {
6507 case CaseCluster::JumpTable: {
John Porto4a566862016-01-04 09:33:41 -08006508 InstX86Label *SkipJumpTable;
Andrew Scull87f80c12015-07-20 10:19:16 -07006509
6510 Operand *RangeIndex =
6511 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07006512 if (DefaultTarget == nullptr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006513 // Skip over jump table logic if comparison not in range and no default
John Porto4a566862016-01-04 09:33:41 -08006514 SkipJumpTable = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006515 _br(Traits::Cond::Br_a, SkipJumpTable);
Andrew Scull86df4e92015-07-30 13:54:44 -07006516 } else {
6517 _br(Traits::Cond::Br_a, DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07006518 }
Andrew Scull87f80c12015-07-20 10:19:16 -07006519
6520 InstJumpTable *JumpTable = Case.getJumpTable();
6521 Context.insert(JumpTable);
6522
6523 // Make sure the index is a register of the same width as the base
6524 Variable *Index;
John Porto56958cb2016-01-14 09:18:18 -08006525 const Type PointerType = getPointerType();
6526 if (RangeIndex->getType() != PointerType) {
6527 Index = makeReg(PointerType);
Jim Stichnothe641e922016-02-29 09:54:55 -08006528 if (RangeIndex->getType() == IceType_i64) {
6529 assert(Traits::Is64Bit);
6530 _mov(Index, RangeIndex); // trunc
6531 } else {
6532 _movzx(Index, RangeIndex);
6533 }
Andrew Scull87f80c12015-07-20 10:19:16 -07006534 } else {
Andrew Scull97f460d2015-07-21 10:07:42 -07006535 Index = legalizeToReg(RangeIndex);
Andrew Scull87f80c12015-07-20 10:19:16 -07006536 }
6537
6538 constexpr RelocOffsetT RelocOffset = 0;
John Portoac2388c2016-01-22 07:10:56 -08006539 constexpr Variable *NoBase = nullptr;
John Porto03077212016-04-05 06:30:21 -07006540 auto JTName = GlobalString::createWithString(Ctx, JumpTable->getName());
Jim Stichnoth467ffe52016-03-29 15:01:06 -07006541 Constant *Offset = Ctx->getConstantSym(RelocOffset, JTName);
John Porto56958cb2016-01-14 09:18:18 -08006542 uint16_t Shift = typeWidthInBytesLog2(PointerType);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006543 constexpr auto Segment = X86OperandMem::SegmentRegisters::DefaultSegment;
John Porto56958cb2016-01-14 09:18:18 -08006544
Andrew Scull87f80c12015-07-20 10:19:16 -07006545 Variable *Target = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08006546 if (Traits::Is64Bit && NeedSandboxing) {
John Porto56958cb2016-01-14 09:18:18 -08006547 assert(Index != nullptr && Index->getType() == IceType_i32);
6548 }
John Portoac2388c2016-01-22 07:10:56 -08006549 auto *TargetInMemory = X86OperandMem::create(Func, PointerType, NoBase,
6550 Offset, Index, Shift, Segment);
Andrew Scull86df4e92015-07-30 13:54:44 -07006551 _mov(Target, TargetInMemory);
John Porto56958cb2016-01-14 09:18:18 -08006552
Andrew Scull86df4e92015-07-30 13:54:44 -07006553 lowerIndirectJump(Target);
Andrew Scull87f80c12015-07-20 10:19:16 -07006554
Andrew Scull86df4e92015-07-30 13:54:44 -07006555 if (DefaultTarget == nullptr)
Andrew Scull87f80c12015-07-20 10:19:16 -07006556 Context.insert(SkipJumpTable);
6557 return;
6558 }
6559 case CaseCluster::Range: {
Andrew Scull86df4e92015-07-30 13:54:44 -07006560 if (Case.isUnitRange()) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006561 // Single item
Andrew Scull86df4e92015-07-30 13:54:44 -07006562 if (!DoneCmp) {
6563 Constant *Value = Ctx->getConstantInt32(Case.getLow());
Andrew Scull87f80c12015-07-20 10:19:16 -07006564 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07006565 }
6566 _br(Traits::Cond::Br_e, Case.getTarget());
6567 } else if (DoneCmp && Case.isPairRange()) {
6568 // Range of two items with first item aleady compared against
6569 _br(Traits::Cond::Br_e, Case.getTarget());
6570 Constant *Value = Ctx->getConstantInt32(Case.getHigh());
6571 _cmp(Comparison, Value);
6572 _br(Traits::Cond::Br_e, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07006573 } else {
6574 // Range
6575 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
Andrew Scull86df4e92015-07-30 13:54:44 -07006576 _br(Traits::Cond::Br_be, Case.getTarget());
Andrew Scull87f80c12015-07-20 10:19:16 -07006577 }
Andrew Scull86df4e92015-07-30 13:54:44 -07006578 if (DefaultTarget != nullptr)
6579 _br(DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006580 return;
6581 }
6582 }
6583}
6584
John Porto4a566862016-01-04 09:33:41 -08006585template <typename TraitsType>
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006586void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Instr) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006587 // Group cases together and navigate through them with a binary search
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006588 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Instr);
6589 Operand *Src0 = Instr->getComparison();
6590 CfgNode *DefaultTarget = Instr->getLabelDefault();
Andrew Scull87f80c12015-07-20 10:19:16 -07006591
6592 assert(CaseClusters.size() != 0); // Should always be at least one
6593
John Porto1d235422015-08-12 12:37:53 -07006594 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Andrew Scull87f80c12015-07-20 10:19:16 -07006595 Src0 = legalize(Src0); // get Base/Index into physical registers
6596 Operand *Src0Lo = loOperand(Src0);
6597 Operand *Src0Hi = hiOperand(Src0);
6598 if (CaseClusters.back().getHigh() > UINT32_MAX) {
6599 // TODO(ascull): handle 64-bit case properly (currently naive version)
6600 // This might be handled by a higher level lowering of switches.
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006601 SizeT NumCases = Instr->getNumCases();
Andrew Scull87f80c12015-07-20 10:19:16 -07006602 if (NumCases >= 2) {
Andrew Scull97f460d2015-07-21 10:07:42 -07006603 Src0Lo = legalizeToReg(Src0Lo);
6604 Src0Hi = legalizeToReg(Src0Hi);
Andrew Scull87f80c12015-07-20 10:19:16 -07006605 } else {
6606 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
6607 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
6608 }
6609 for (SizeT I = 0; I < NumCases; ++I) {
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006610 Constant *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6611 Constant *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
John Porto4a566862016-01-04 09:33:41 -08006612 InstX86Label *Label = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006613 _cmp(Src0Lo, ValueLo);
6614 _br(Traits::Cond::Br_ne, Label);
6615 _cmp(Src0Hi, ValueHi);
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006616 _br(Traits::Cond::Br_e, Instr->getLabel(I));
Andrew Scull87f80c12015-07-20 10:19:16 -07006617 Context.insert(Label);
6618 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006619 _br(Instr->getLabelDefault());
Andrew Scull87f80c12015-07-20 10:19:16 -07006620 return;
6621 } else {
6622 // All the values are 32-bit so just check the operand is too and then
6623 // fall through to the 32-bit implementation. This is a common case.
6624 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
6625 Constant *Zero = Ctx->getConstantInt32(0);
6626 _cmp(Src0Hi, Zero);
Andrew Scull86df4e92015-07-30 13:54:44 -07006627 _br(Traits::Cond::Br_ne, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006628 Src0 = Src0Lo;
6629 }
John Porto7e93c622015-06-23 10:58:57 -07006630 }
6631
Andrew Scull87f80c12015-07-20 10:19:16 -07006632 // 32-bit lowering
6633
6634 if (CaseClusters.size() == 1) {
6635 // Jump straight to default if needed. Currently a common case as jump
6636 // tables occur on their own.
6637 constexpr bool DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07006638 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006639 return;
6640 }
6641
6642 // Going to be using multiple times so get it in a register early
Andrew Scull97f460d2015-07-21 10:07:42 -07006643 Variable *Comparison = legalizeToReg(Src0);
Andrew Scull87f80c12015-07-20 10:19:16 -07006644
6645 // A span is over the clusters
6646 struct SearchSpan {
John Porto4a566862016-01-04 09:33:41 -08006647 SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label)
Andrew Scull87f80c12015-07-20 10:19:16 -07006648 : Begin(Begin), Size(Size), Label(Label) {}
6649
6650 SizeT Begin;
6651 SizeT Size;
John Porto4a566862016-01-04 09:33:41 -08006652 InstX86Label *Label;
Andrew Scull87f80c12015-07-20 10:19:16 -07006653 };
Andrew Scull8447bba2015-07-23 11:41:18 -07006654 // The stack will only grow to the height of the tree so 12 should be plenty
6655 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
Andrew Scull87f80c12015-07-20 10:19:16 -07006656 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
6657 bool DoneCmp = false;
6658
6659 while (!SearchSpanStack.empty()) {
6660 SearchSpan Span = SearchSpanStack.top();
6661 SearchSpanStack.pop();
6662
6663 if (Span.Label != nullptr)
6664 Context.insert(Span.Label);
6665
6666 switch (Span.Size) {
6667 case 0:
6668 llvm::report_fatal_error("Invalid SearchSpan size");
6669 break;
6670
6671 case 1:
6672 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp,
Andrew Scull86df4e92015-07-30 13:54:44 -07006673 SearchSpanStack.empty() ? nullptr : DefaultTarget);
Andrew Scull87f80c12015-07-20 10:19:16 -07006674 DoneCmp = false;
6675 break;
6676
Andrew Scull86df4e92015-07-30 13:54:44 -07006677 case 2: {
6678 const CaseCluster *CaseA = &CaseClusters[Span.Begin];
6679 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1];
6680
6681 // Placing a range last may allow register clobbering during the range
6682 // test. That means there is no need to clone the register. If it is a
6683 // unit range the comparison may have already been done in the binary
6684 // search (DoneCmp) and so it should be placed first. If this is a range
6685 // of two items and the comparison with the low value has already been
6686 // done, comparing with the other element is cheaper than a range test.
6687 // If the low end of the range is zero then there is no subtraction and
6688 // nothing to be gained.
6689 if (!CaseA->isUnitRange() &&
6690 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) {
6691 std::swap(CaseA, CaseB);
6692 DoneCmp = false;
6693 }
6694
6695 lowerCaseCluster(*CaseA, Comparison, DoneCmp);
Andrew Scull87f80c12015-07-20 10:19:16 -07006696 DoneCmp = false;
Andrew Scull86df4e92015-07-30 13:54:44 -07006697 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
6698 SearchSpanStack.empty() ? nullptr : DefaultTarget);
6699 } break;
Andrew Scull87f80c12015-07-20 10:19:16 -07006700
6701 default:
6702 // Pick the middle item and branch b or ae
6703 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
6704 const CaseCluster &Pivot = CaseClusters[PivotIndex];
6705 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
John Porto4a566862016-01-04 09:33:41 -08006706 InstX86Label *Label = InstX86Label::create(Func, this);
Andrew Scull87f80c12015-07-20 10:19:16 -07006707 _cmp(Comparison, Value);
Andrew Scull86df4e92015-07-30 13:54:44 -07006708 // TODO(ascull): does it alway have to be far?
John Porto4a566862016-01-04 09:33:41 -08006709 _br(Traits::Cond::Br_b, Label, InstX86Br::Far);
Andrew Scull87f80c12015-07-20 10:19:16 -07006710 // Lower the left and (pivot+right) sides, falling through to the right
6711 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
6712 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
6713 DoneCmp = true;
6714 break;
6715 }
6716 }
6717
Andrew Scull86df4e92015-07-30 13:54:44 -07006718 _br(DefaultTarget);
John Porto7e93c622015-06-23 10:58:57 -07006719}
6720
Andrew Scull9612d322015-07-06 14:53:25 -07006721/// The following pattern occurs often in lowered C and C++ code:
6722///
6723/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
6724/// %cmp.ext = sext <n x i1> %cmp to <n x ty>
6725///
6726/// We can eliminate the sext operation by copying the result of pcmpeqd,
Andrew Scull57e12682015-09-16 11:30:19 -07006727/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
6728/// sext operation.
John Porto4a566862016-01-04 09:33:41 -08006729template <typename TraitsType>
6730void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction(
John Porto7e93c622015-06-23 10:58:57 -07006731 Variable *SignExtendedResult) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08006732 if (auto *NextCast =
John Porto7e93c622015-06-23 10:58:57 -07006733 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
6734 if (NextCast->getCastKind() == InstCast::Sext &&
6735 NextCast->getSrc(0) == SignExtendedResult) {
6736 NextCast->setDeleted();
Andrew Scull97f460d2015-07-21 10:07:42 -07006737 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
John Porto7e93c622015-06-23 10:58:57 -07006738 // Skip over the instruction.
6739 Context.advanceNext();
6740 }
6741 }
6742}
6743
John Porto4a566862016-01-04 09:33:41 -08006744template <typename TraitsType>
6745void TargetX86Base<TraitsType>::lowerUnreachable(
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08006746 const InstUnreachable * /*Instr*/) {
John Porto7e93c622015-06-23 10:58:57 -07006747 _ud2();
David Sehr21fd1032015-11-13 16:32:37 -08006748 // Add a fake use of esp to make sure esp adjustments after the unreachable
6749 // do not get dead-code eliminated.
6750 keepEspLiveAtExit();
John Porto7e93c622015-06-23 10:58:57 -07006751}
6752
John Porto4a566862016-01-04 09:33:41 -08006753template <typename TraitsType>
Eric Holk67c7c412016-04-15 13:05:37 -07006754void TargetX86Base<TraitsType>::lowerBreakpoint(
6755 const InstBreakpoint * /*Instr*/) {
6756 _int3();
6757}
6758
6759template <typename TraitsType>
John Porto4a566862016-01-04 09:33:41 -08006760void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) {
Andrew Scull57e12682015-09-16 11:30:19 -07006761 // If the beacon variable's live range does not end in this instruction, then
6762 // it must end in the modified Store instruction that follows. This means
6763 // that the original Store instruction is still there, either because the
6764 // value being stored is used beyond the Store instruction, or because dead
6765 // code elimination did not happen. In either case, we cancel RMW lowering
6766 // (and the caller deletes the RMW instruction).
John Porto7e93c622015-06-23 10:58:57 -07006767 if (!RMW->isLastUse(RMW->getBeacon()))
6768 return;
6769 Operand *Src = RMW->getData();
6770 Type Ty = Src->getType();
John Porto4a566862016-01-04 09:33:41 -08006771 X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
Jim Stichnothad2989b2015-09-15 10:21:42 -07006772 doMockBoundsCheck(Addr);
John Porto1d235422015-08-12 12:37:53 -07006773 if (!Traits::Is64Bit && Ty == IceType_i64) {
Jan Voungfbdd2442015-07-15 12:36:20 -07006774 Src = legalizeUndef(Src);
John Porto7e93c622015-06-23 10:58:57 -07006775 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
6776 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08006777 auto *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr));
6778 auto *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr));
John Porto7e93c622015-06-23 10:58:57 -07006779 switch (RMW->getOp()) {
6780 default:
6781 // TODO(stichnot): Implement other arithmetic operators.
6782 break;
6783 case InstArithmetic::Add:
6784 _add_rmw(AddrLo, SrcLo);
6785 _adc_rmw(AddrHi, SrcHi);
6786 return;
6787 case InstArithmetic::Sub:
6788 _sub_rmw(AddrLo, SrcLo);
6789 _sbb_rmw(AddrHi, SrcHi);
6790 return;
6791 case InstArithmetic::And:
6792 _and_rmw(AddrLo, SrcLo);
6793 _and_rmw(AddrHi, SrcHi);
6794 return;
6795 case InstArithmetic::Or:
6796 _or_rmw(AddrLo, SrcLo);
6797 _or_rmw(AddrHi, SrcHi);
6798 return;
6799 case InstArithmetic::Xor:
6800 _xor_rmw(AddrLo, SrcLo);
6801 _xor_rmw(AddrHi, SrcHi);
6802 return;
6803 }
6804 } else {
John Porto1d235422015-08-12 12:37:53 -07006805 // x86-32: i8, i16, i32
6806 // x86-64: i8, i16, i32, i64
John Porto7e93c622015-06-23 10:58:57 -07006807 switch (RMW->getOp()) {
6808 default:
6809 // TODO(stichnot): Implement other arithmetic operators.
6810 break;
6811 case InstArithmetic::Add:
6812 Src = legalize(Src, Legal_Reg | Legal_Imm);
6813 _add_rmw(Addr, Src);
6814 return;
6815 case InstArithmetic::Sub:
6816 Src = legalize(Src, Legal_Reg | Legal_Imm);
6817 _sub_rmw(Addr, Src);
6818 return;
6819 case InstArithmetic::And:
6820 Src = legalize(Src, Legal_Reg | Legal_Imm);
6821 _and_rmw(Addr, Src);
6822 return;
6823 case InstArithmetic::Or:
6824 Src = legalize(Src, Legal_Reg | Legal_Imm);
6825 _or_rmw(Addr, Src);
6826 return;
6827 case InstArithmetic::Xor:
6828 Src = legalize(Src, Legal_Reg | Legal_Imm);
6829 _xor_rmw(Addr, Src);
6830 return;
6831 }
6832 }
6833 llvm::report_fatal_error("Couldn't lower RMW instruction");
6834}
6835
John Porto4a566862016-01-04 09:33:41 -08006836template <typename TraitsType>
6837void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) {
6838 if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) {
John Porto7e93c622015-06-23 10:58:57 -07006839 lowerRMW(RMW);
6840 } else {
6841 TargetLowering::lowerOther(Instr);
6842 }
6843}
6844
Andrew Scull57e12682015-09-16 11:30:19 -07006845/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
6846/// integrity of liveness analysis. Undef values are also turned into zeroes,
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006847/// since loOperand() and hiOperand() don't expect Undef input. Also, in
John Portoac2388c2016-01-22 07:10:56 -08006848/// Non-SFI mode, add a FakeUse(RebasePtr) for every pooled constant operand.
John Porto4a566862016-01-04 09:33:41 -08006849template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() {
Karl Schimpfd4699942016-04-02 09:55:31 -07006850 if (getFlags().getUseNonsfi()) {
John Portoac2388c2016-01-22 07:10:56 -08006851 assert(RebasePtr);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006852 CfgNode *Node = Context.getNode();
John Portoac2388c2016-01-22 07:10:56 -08006853 uint32_t RebasePtrUseCount = 0;
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006854 for (Inst &I : Node->getPhis()) {
6855 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
6856 if (Phi->isDeleted())
6857 continue;
6858 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6859 Operand *Src = Phi->getSrc(I);
6860 // TODO(stichnot): This over-counts for +0.0, and under-counts for other
6861 // kinds of pooling.
6862 if (llvm::isa<ConstantRelocatable>(Src) ||
6863 llvm::isa<ConstantFloat>(Src) || llvm::isa<ConstantDouble>(Src)) {
John Portoac2388c2016-01-22 07:10:56 -08006864 ++RebasePtrUseCount;
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006865 }
6866 }
6867 }
John Portoac2388c2016-01-22 07:10:56 -08006868 if (RebasePtrUseCount) {
6869 Node->getInsts().push_front(InstFakeUse::create(Func, RebasePtr));
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08006870 }
6871 }
John Porto1d235422015-08-12 12:37:53 -07006872 if (Traits::Is64Bit) {
6873 // On x86-64 we don't need to prelower phis -- the architecture can handle
6874 // 64-bit integer natively.
6875 return;
6876 }
6877
Andrew Scull57e12682015-09-16 11:30:19 -07006878 // Pause constant blinding or pooling, blinding or pooling will be done later
6879 // during phi lowering assignments
John Porto7e93c622015-06-23 10:58:57 -07006880 BoolFlagSaver B(RandomizationPoolingPaused, true);
John Porto4a566862016-01-04 09:33:41 -08006881 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(
Jan Voung53483692015-07-16 10:47:46 -07006882 this, Context.getNode(), Func);
John Porto7e93c622015-06-23 10:58:57 -07006883}
6884
John Porto4a566862016-01-04 09:33:41 -08006885template <typename TraitsType>
6886void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
David Sehr26217e32015-11-26 13:03:50 -08006887 uint32_t StackArgumentsSize = 0;
6888 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006889 RuntimeHelper HelperID = RuntimeHelper::H_Num;
David Sehr26217e32015-11-26 13:03:50 -08006890 Variable *Dest = Arith->getDest();
6891 Type DestTy = Dest->getType();
6892 if (!Traits::Is64Bit && DestTy == IceType_i64) {
6893 switch (Arith->getOp()) {
6894 default:
6895 return;
6896 case InstArithmetic::Udiv:
Karl Schimpf20070e82016-03-17 13:30:13 -07006897 HelperID = RuntimeHelper::H_udiv_i64;
David Sehr26217e32015-11-26 13:03:50 -08006898 break;
6899 case InstArithmetic::Sdiv:
Karl Schimpf20070e82016-03-17 13:30:13 -07006900 HelperID = RuntimeHelper::H_sdiv_i64;
David Sehr26217e32015-11-26 13:03:50 -08006901 break;
6902 case InstArithmetic::Urem:
Karl Schimpf20070e82016-03-17 13:30:13 -07006903 HelperID = RuntimeHelper::H_urem_i64;
David Sehr26217e32015-11-26 13:03:50 -08006904 break;
6905 case InstArithmetic::Srem:
Karl Schimpf20070e82016-03-17 13:30:13 -07006906 HelperID = RuntimeHelper::H_srem_i64;
David Sehr26217e32015-11-26 13:03:50 -08006907 break;
6908 }
6909 } else if (isVectorType(DestTy)) {
6910 Variable *Dest = Arith->getDest();
6911 Operand *Src0 = Arith->getSrc(0);
6912 Operand *Src1 = Arith->getSrc(1);
6913 switch (Arith->getOp()) {
6914 default:
6915 return;
6916 case InstArithmetic::Mul:
6917 if (DestTy == IceType_v16i8) {
6918 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
6919 Arith->setDeleted();
6920 }
6921 return;
6922 case InstArithmetic::Shl:
6923 case InstArithmetic::Lshr:
6924 case InstArithmetic::Ashr:
6925 case InstArithmetic::Udiv:
6926 case InstArithmetic::Urem:
6927 case InstArithmetic::Sdiv:
6928 case InstArithmetic::Srem:
6929 case InstArithmetic::Frem:
6930 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
6931 Arith->setDeleted();
6932 return;
6933 }
6934 } else {
6935 switch (Arith->getOp()) {
6936 default:
6937 return;
6938 case InstArithmetic::Frem:
6939 if (isFloat32Asserting32Or64(DestTy))
Karl Schimpf20070e82016-03-17 13:30:13 -07006940 HelperID = RuntimeHelper::H_frem_f32;
David Sehr26217e32015-11-26 13:03:50 -08006941 else
Karl Schimpf20070e82016-03-17 13:30:13 -07006942 HelperID = RuntimeHelper::H_frem_f64;
David Sehr26217e32015-11-26 13:03:50 -08006943 }
6944 }
6945 constexpr SizeT MaxSrcs = 2;
Karl Schimpf20070e82016-03-17 13:30:13 -07006946 InstCall *Call = makeHelperCall(HelperID, Dest, MaxSrcs);
David Sehr26217e32015-11-26 13:03:50 -08006947 Call->addArg(Arith->getSrc(0));
6948 Call->addArg(Arith->getSrc(1));
6949 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
6950 Context.insert(Call);
6951 Arith->setDeleted();
6952 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
6953 InstCast::OpKind CastKind = Cast->getCastKind();
6954 Operand *Src0 = Cast->getSrc(0);
6955 const Type SrcType = Src0->getType();
6956 Variable *Dest = Cast->getDest();
6957 const Type DestTy = Dest->getType();
Karl Schimpf20070e82016-03-17 13:30:13 -07006958 RuntimeHelper HelperID = RuntimeHelper::H_Num;
David Sehrb19d39c2016-01-13 14:17:37 -08006959 Variable *CallDest = Dest;
David Sehr26217e32015-11-26 13:03:50 -08006960 switch (CastKind) {
6961 default:
6962 return;
6963 case InstCast::Fptosi:
6964 if (!Traits::Is64Bit && DestTy == IceType_i64) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006965 HelperID = isFloat32Asserting32Or64(SrcType)
6966 ? RuntimeHelper::H_fptosi_f32_i64
6967 : RuntimeHelper::H_fptosi_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006968 } else {
6969 return;
6970 }
6971 break;
6972 case InstCast::Fptoui:
6973 if (isVectorType(DestTy)) {
6974 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);
Karl Schimpf20070e82016-03-17 13:30:13 -07006975 HelperID = RuntimeHelper::H_fptoui_4xi32_f32;
David Sehr26217e32015-11-26 13:03:50 -08006976 } else if (DestTy == IceType_i64 ||
6977 (!Traits::Is64Bit && DestTy == IceType_i32)) {
6978 if (Traits::Is64Bit) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006979 HelperID = isFloat32Asserting32Or64(SrcType)
6980 ? RuntimeHelper::H_fptoui_f32_i64
6981 : RuntimeHelper::H_fptoui_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006982 } else if (isInt32Asserting32Or64(DestTy)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006983 HelperID = isFloat32Asserting32Or64(SrcType)
6984 ? RuntimeHelper::H_fptoui_f32_i32
6985 : RuntimeHelper::H_fptoui_f64_i32;
David Sehr26217e32015-11-26 13:03:50 -08006986 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07006987 HelperID = isFloat32Asserting32Or64(SrcType)
6988 ? RuntimeHelper::H_fptoui_f32_i64
6989 : RuntimeHelper::H_fptoui_f64_i64;
David Sehr26217e32015-11-26 13:03:50 -08006990 }
6991 } else {
6992 return;
6993 }
6994 break;
6995 case InstCast::Sitofp:
6996 if (!Traits::Is64Bit && SrcType == IceType_i64) {
Karl Schimpf20070e82016-03-17 13:30:13 -07006997 HelperID = isFloat32Asserting32Or64(DestTy)
6998 ? RuntimeHelper::H_sitofp_i64_f32
6999 : RuntimeHelper::H_sitofp_i64_f64;
David Sehr26217e32015-11-26 13:03:50 -08007000 } else {
7001 return;
7002 }
7003 break;
7004 case InstCast::Uitofp:
7005 if (isVectorType(SrcType)) {
7006 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);
Karl Schimpf20070e82016-03-17 13:30:13 -07007007 HelperID = RuntimeHelper::H_uitofp_4xi32_4xf32;
David Sehr26217e32015-11-26 13:03:50 -08007008 } else if (SrcType == IceType_i64 ||
7009 (!Traits::Is64Bit && SrcType == IceType_i32)) {
7010 if (isInt32Asserting32Or64(SrcType)) {
Karl Schimpf20070e82016-03-17 13:30:13 -07007011 HelperID = isFloat32Asserting32Or64(DestTy)
7012 ? RuntimeHelper::H_uitofp_i32_f32
7013 : RuntimeHelper::H_uitofp_i32_f64;
David Sehr26217e32015-11-26 13:03:50 -08007014 } else {
Karl Schimpf20070e82016-03-17 13:30:13 -07007015 HelperID = isFloat32Asserting32Or64(DestTy)
7016 ? RuntimeHelper::H_uitofp_i64_f32
7017 : RuntimeHelper::H_uitofp_i64_f64;
David Sehr26217e32015-11-26 13:03:50 -08007018 }
7019 } else {
7020 return;
7021 }
7022 break;
7023 case InstCast::Bitcast: {
7024 if (DestTy == Src0->getType())
7025 return;
7026 switch (DestTy) {
7027 default:
7028 return;
7029 case IceType_i8:
7030 assert(Src0->getType() == IceType_v8i1);
Karl Schimpf20070e82016-03-17 13:30:13 -07007031 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
David Sehrb19d39c2016-01-13 14:17:37 -08007032 CallDest = Func->makeVariable(IceType_i32);
David Sehr26217e32015-11-26 13:03:50 -08007033 break;
7034 case IceType_i16:
7035 assert(Src0->getType() == IceType_v16i1);
Karl Schimpf20070e82016-03-17 13:30:13 -07007036 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
David Sehrb19d39c2016-01-13 14:17:37 -08007037 CallDest = Func->makeVariable(IceType_i32);
David Sehr26217e32015-11-26 13:03:50 -08007038 break;
7039 case IceType_v8i1: {
7040 assert(Src0->getType() == IceType_i8);
Karl Schimpf20070e82016-03-17 13:30:13 -07007041 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
David Sehr26217e32015-11-26 13:03:50 -08007042 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
7043 // Arguments to functions are required to be at least 32 bits wide.
John Porto1d937a82015-12-17 06:19:34 -08007044 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
David Sehr26217e32015-11-26 13:03:50 -08007045 Src0 = Src0AsI32;
7046 } break;
7047 case IceType_v16i1: {
7048 assert(Src0->getType() == IceType_i16);
Karl Schimpf20070e82016-03-17 13:30:13 -07007049 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
David Sehr26217e32015-11-26 13:03:50 -08007050 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
7051 // Arguments to functions are required to be at least 32 bits wide.
John Porto1d937a82015-12-17 06:19:34 -08007052 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
David Sehr26217e32015-11-26 13:03:50 -08007053 Src0 = Src0AsI32;
7054 } break;
7055 }
7056 } break;
7057 }
7058 constexpr SizeT MaxSrcs = 1;
Karl Schimpf20070e82016-03-17 13:30:13 -07007059 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
David Sehr26217e32015-11-26 13:03:50 -08007060 Call->addArg(Src0);
7061 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
7062 Context.insert(Call);
David Sehrb19d39c2016-01-13 14:17:37 -08007063 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper call
7064 // result to the appropriate type as necessary.
7065 if (CallDest->getType() != Dest->getType())
7066 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
David Sehr26217e32015-11-26 13:03:50 -08007067 Cast->setDeleted();
7068 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
John Portoe82b5602016-02-24 15:58:55 -08007069 CfgVector<Type> ArgTypes;
David Sehr26217e32015-11-26 13:03:50 -08007070 Type ReturnType = IceType_void;
7071 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {
7072 default:
7073 return;
7074 case Intrinsics::Ctpop: {
7075 Operand *Val = Intrinsic->getArg(0);
7076 Type ValTy = Val->getType();
7077 if (ValTy == IceType_i64)
7078 ArgTypes = {IceType_i64};
7079 else
7080 ArgTypes = {IceType_i32};
7081 ReturnType = IceType_i32;
7082 } break;
7083 case Intrinsics::Longjmp:
7084 ArgTypes = {IceType_i32, IceType_i32};
7085 ReturnType = IceType_void;
7086 break;
7087 case Intrinsics::Memcpy:
7088 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
7089 ReturnType = IceType_void;
7090 break;
7091 case Intrinsics::Memmove:
7092 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
7093 ReturnType = IceType_void;
7094 break;
7095 case Intrinsics::Memset:
7096 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
7097 ReturnType = IceType_void;
7098 break;
7099 case Intrinsics::NaClReadTP:
7100 ReturnType = IceType_i32;
7101 break;
7102 case Intrinsics::Setjmp:
7103 ArgTypes = {IceType_i32};
7104 ReturnType = IceType_i32;
7105 break;
7106 }
7107 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
7108 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) {
7109 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
7110 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) {
7111 if (!Ret->hasRetValue())
7112 return;
7113 Operand *RetValue = Ret->getRetValue();
7114 Type ReturnType = RetValue->getType();
7115 if (!isScalarFloatingType(ReturnType))
7116 return;
7117 StackArgumentsSize = typeWidthInBytes(ReturnType);
7118 } else {
7119 return;
7120 }
7121 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);
7122 updateMaxOutArgsSizeBytes(StackArgumentsSize);
7123}
7124
John Porto4a566862016-01-04 09:33:41 -08007125template <typename TraitsType>
7126uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
John Portoe82b5602016-02-24 15:58:55 -08007127 const CfgVector<Type> &ArgTypes, Type ReturnType) {
David Sehr4163b9f2015-11-20 21:09:31 -08007128 uint32_t OutArgumentsSizeBytes = 0;
7129 uint32_t XmmArgCount = 0;
7130 uint32_t GprArgCount = 0;
David Sehr26217e32015-11-26 13:03:50 -08007131 for (Type Ty : ArgTypes) {
David Sehr4163b9f2015-11-20 21:09:31 -08007132 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
7133 assert(typeWidthInBytes(Ty) >= 4);
7134 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
7135 ++XmmArgCount;
Jim Stichnothc5777272016-06-20 06:46:07 -07007136 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
7137 XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
7138 ++XmmArgCount;
David Sehr4163b9f2015-11-20 21:09:31 -08007139 } else if (isScalarIntegerType(Ty) &&
7140 GprArgCount < Traits::X86_MAX_GPR_ARGS) {
7141 // The 64 bit ABI allows some integers to be passed in GPRs.
7142 ++GprArgCount;
7143 } else {
David Sehr26217e32015-11-26 13:03:50 -08007144 if (isVectorType(Ty)) {
David Sehr4163b9f2015-11-20 21:09:31 -08007145 OutArgumentsSizeBytes =
7146 Traits::applyStackAlignment(OutArgumentsSizeBytes);
7147 }
David Sehr26217e32015-11-26 13:03:50 -08007148 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty);
David Sehr4163b9f2015-11-20 21:09:31 -08007149 }
7150 }
7151 if (Traits::Is64Bit)
7152 return OutArgumentsSizeBytes;
7153 // The 32 bit ABI requires floating point values to be returned on the x87 FP
7154 // stack. Ensure there is enough space for the fstp/movs for floating returns.
David Sehr26217e32015-11-26 13:03:50 -08007155 if (isScalarFloatingType(ReturnType)) {
David Sehr4163b9f2015-11-20 21:09:31 -08007156 OutArgumentsSizeBytes =
7157 std::max(OutArgumentsSizeBytes,
David Sehr26217e32015-11-26 13:03:50 -08007158 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));
David Sehr4163b9f2015-11-20 21:09:31 -08007159 }
7160 return OutArgumentsSizeBytes;
7161}
7162
John Porto4a566862016-01-04 09:33:41 -08007163template <typename TraitsType>
7164uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
7165 const InstCall *Instr) {
David Sehr26217e32015-11-26 13:03:50 -08007166 // Build a vector of the arguments' types.
John Portoe82b5602016-02-24 15:58:55 -08007167 const SizeT NumArgs = Instr->getNumArgs();
7168 CfgVector<Type> ArgTypes;
7169 ArgTypes.reserve(NumArgs);
7170 for (SizeT i = 0; i < NumArgs; ++i) {
David Sehr26217e32015-11-26 13:03:50 -08007171 Operand *Arg = Instr->getArg(i);
7172 ArgTypes.emplace_back(Arg->getType());
7173 }
7174 // Compute the return type (if any);
7175 Type ReturnType = IceType_void;
7176 Variable *Dest = Instr->getDest();
7177 if (Dest != nullptr)
7178 ReturnType = Dest->getType();
7179 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
7180}
7181
John Porto4a566862016-01-04 09:33:41 -08007182template <typename TraitsType>
7183Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007184 RegNumT RegNum) {
Jim Stichnoth99165662015-11-13 14:20:40 -08007185 Variable *Reg = makeReg(Ty, RegNum);
7186 switch (Ty) {
7187 case IceType_i1:
7188 case IceType_i8:
7189 case IceType_i16:
7190 case IceType_i32:
7191 case IceType_i64:
7192 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
7193 _mov(Reg, Ctx->getConstantZero(Ty));
7194 break;
7195 case IceType_f32:
7196 case IceType_f64:
John Porto1d937a82015-12-17 06:19:34 -08007197 Context.insert<InstFakeDef>(Reg);
David Sehre3984282015-12-15 17:34:55 -08007198 _xorps(Reg, Reg);
Jim Stichnoth99165662015-11-13 14:20:40 -08007199 break;
7200 default:
7201 // All vector types use the same pxor instruction.
7202 assert(isVectorType(Ty));
John Porto1d937a82015-12-17 06:19:34 -08007203 Context.insert<InstFakeDef>(Reg);
Jim Stichnoth99165662015-11-13 14:20:40 -08007204 _pxor(Reg, Reg);
7205 break;
7206 }
7207 return Reg;
7208}
7209
Andrew Scull57e12682015-09-16 11:30:19 -07007210// There is no support for loading or emitting vector constants, so the vector
7211// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
7212// initialized with register operations.
John Porto7e93c622015-06-23 10:58:57 -07007213//
Andrew Scull57e12682015-09-16 11:30:19 -07007214// TODO(wala): Add limited support for vector constants so that complex
7215// initialization in registers is unnecessary.
John Porto7e93c622015-06-23 10:58:57 -07007216
John Porto4a566862016-01-04 09:33:41 -08007217template <typename TraitsType>
7218Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007219 RegNumT RegNum) {
Jim Stichnoth99165662015-11-13 14:20:40 -08007220 return makeZeroedRegister(Ty, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07007221}
7222
John Porto4a566862016-01-04 09:33:41 -08007223template <typename TraitsType>
7224Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007225 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007226 Variable *MinusOnes = makeReg(Ty, RegNum);
7227 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
John Porto1d937a82015-12-17 06:19:34 -08007228 Context.insert<InstFakeDef>(MinusOnes);
David Sehrb19d39c2016-01-13 14:17:37 -08007229 if (Ty == IceType_f64)
7230 // Making a vector of minus ones of type f64 is currently only used for the
7231 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq
7232 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the
7233 // same job and only requires SSE2.
7234 _pcmpeq(MinusOnes, MinusOnes, IceType_f32);
7235 else
7236 _pcmpeq(MinusOnes, MinusOnes);
John Porto7e93c622015-06-23 10:58:57 -07007237 return MinusOnes;
7238}
7239
John Porto4a566862016-01-04 09:33:41 -08007240template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007241Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007242 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
7243 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
7244 _psub(Dest, MinusOne);
7245 return Dest;
7246}
7247
John Porto4a566862016-01-04 09:33:41 -08007248template <typename TraitsType>
7249Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007250 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007251 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
7252 Ty == IceType_v16i8);
7253 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
7254 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
7255 SizeT Shift =
7256 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
7257 _psll(Reg, Ctx->getConstantInt8(Shift));
7258 return Reg;
7259 } else {
7260 // SSE has no left shift operation for vectors of 8 bit integers.
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07007261 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
John Porto7e93c622015-06-23 10:58:57 -07007262 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
7263 Variable *Reg = makeReg(Ty, RegNum);
7264 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
7265 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
7266 return Reg;
7267 }
7268}
7269
Andrew Scull57e12682015-09-16 11:30:19 -07007270/// Construct a mask in a register that can be and'ed with a floating-point
7271/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
7272/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
7273/// ones logically right shifted one bit.
7274// TODO(stichnot): Fix the wala
7275// TODO: above, to represent vector constants in memory.
John Porto4a566862016-01-04 09:33:41 -08007276template <typename TraitsType>
7277Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007278 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007279 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
7280 _psrl(Reg, Ctx->getConstantInt8(1));
7281 return Reg;
7282}
7283
John Porto4a566862016-01-04 09:33:41 -08007284template <typename TraitsType>
7285typename TargetX86Base<TraitsType>::X86OperandMem *
7286TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
7287 uint32_t Offset) {
John Porto7e93c622015-06-23 10:58:57 -07007288 // Ensure that Loc is a stack slot.
Andrew Scull11c9a322015-08-28 14:24:14 -07007289 assert(Slot->mustNotHaveReg());
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007290 assert(Slot->getRegNum().hasNoValue());
John Porto7e93c622015-06-23 10:58:57 -07007291 // Compute the location of Loc in memory.
Andrew Scull57e12682015-09-16 11:30:19 -07007292 // TODO(wala,stichnot): lea should not
7293 // be required. The address of the stack slot is known at compile time
7294 // (although not until after addProlog()).
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07007295 constexpr Type PointerType = IceType_i32;
John Porto7e93c622015-06-23 10:58:57 -07007296 Variable *Loc = makeReg(PointerType);
7297 _lea(Loc, Slot);
7298 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
John Porto4a566862016-01-04 09:33:41 -08007299 return X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
John Porto7e93c622015-06-23 10:58:57 -07007300}
7301
Jim Stichnothc59288b2015-11-09 11:38:40 -08007302/// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
7303/// Src is assumed to already be legalized. If the source operand is known to
7304/// be a memory or immediate operand, a simple mov will suffice. But if the
7305/// source operand can be a physical register, then it must first be copied into
7306/// a physical register that is truncable to 8-bit, then truncated into a
7307/// physical register that can receive a truncation, and finally copied into the
7308/// result 8-bit register (which in general can be any 8-bit register). For
7309/// example, moving %ebp into %ah may be accomplished as:
7310/// movl %ebp, %edx
7311/// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
7312/// movb %dl, %ah
7313/// On the other hand, moving a memory or immediate operand into ah:
7314/// movb 4(%ebp), %ah
7315/// movb $my_imm, %ah
7316///
7317/// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
7318/// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007319/// use RegNum=RegNumT() and then let the caller do a separate copy into
Jim Stichnothc59288b2015-11-09 11:38:40 -08007320/// Reg_ah.
7321///
7322/// Note #2. ConstantRelocatable operands are also put through this process
7323/// (not truncated directly) because our ELF emitter does R_386_32 relocations
7324/// but not R_386_8 relocations.
7325///
7326/// Note #3. If Src is a Variable, the result will be an infinite-weight i8
7327/// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
7328/// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
7329/// to the pinsrb instruction.
John Porto4a566862016-01-04 09:33:41 -08007330template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007331Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, RegNumT RegNum) {
Jim Stichnothc59288b2015-11-09 11:38:40 -08007332 Type Ty = Src->getType();
7333 assert(isScalarIntegerType(Ty));
7334 assert(Ty != IceType_i1);
7335 Variable *Reg = makeReg(IceType_i8, RegNum);
7336 Reg->setRegClass(RCX86_IsTrunc8Rcvr);
7337 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
7338 Variable *SrcTruncable = makeReg(Ty);
7339 switch (Ty) {
7340 case IceType_i64:
7341 SrcTruncable->setRegClass(RCX86_Is64To8);
7342 break;
7343 case IceType_i32:
7344 SrcTruncable->setRegClass(RCX86_Is32To8);
7345 break;
7346 case IceType_i16:
7347 SrcTruncable->setRegClass(RCX86_Is16To8);
7348 break;
7349 default:
7350 // i8 - just use default register class
7351 break;
7352 }
7353 Variable *SrcRcvr = makeReg(IceType_i8);
7354 SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
7355 _mov(SrcTruncable, Src);
7356 _mov(SrcRcvr, SrcTruncable);
7357 Src = SrcRcvr;
7358 }
7359 _mov(Reg, Src);
7360 return Reg;
7361}
7362
Andrew Scull9612d322015-07-06 14:53:25 -07007363/// Helper for legalize() to emit the right code to lower an operand to a
7364/// register of the appropriate type.
John Porto4a566862016-01-04 09:33:41 -08007365template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007366Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007367 Type Ty = Src->getType();
7368 Variable *Reg = makeReg(Ty, RegNum);
7369 if (isVectorType(Ty)) {
7370 _movp(Reg, Src);
7371 } else {
7372 _mov(Reg, Src);
7373 }
7374 return Reg;
7375}
7376
John Porto4a566862016-01-04 09:33:41 -08007377template <typename TraitsType>
7378Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007379 RegNumT RegNum) {
Karl Schimpfd4699942016-04-02 09:55:31 -07007380 const bool UseNonsfi = getFlags().getUseNonsfi();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007381 const Type Ty = From->getType();
Andrew Scull57e12682015-09-16 11:30:19 -07007382 // Assert that a physical register is allowed. To date, all calls to
7383 // legalize() allow a physical register. If a physical register needs to be
7384 // explicitly disallowed, then new code will need to be written to force a
7385 // spill.
John Porto7e93c622015-06-23 10:58:57 -07007386 assert(Allowed & Legal_Reg);
Andrew Scull57e12682015-09-16 11:30:19 -07007387 // If we're asking for a specific physical register, make sure we're not
7388 // allowing any other operand kinds. (This could be future work, e.g. allow
7389 // the shl shift amount to be either an immediate or in ecx.)
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007390 assert(RegNum.hasNoValue() || Allowed == Legal_Reg);
John Porto7e93c622015-06-23 10:58:57 -07007391
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007392 // Substitute with an available infinite-weight variable if possible. Only do
7393 // this when we are not asking for a specific register, and when the
7394 // substitution is not locked to a specific register, and when the types
7395 // match, in order to capture the vast majority of opportunities and avoid
7396 // corner cases in the lowering.
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007397 if (RegNum.hasNoValue()) {
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007398 if (Variable *Subst = getContext().availabilityGet(From)) {
7399 // At this point we know there is a potential substitution available.
7400 if (Subst->mustHaveReg() && !Subst->hasReg()) {
7401 // At this point we know the substitution will have a register.
7402 if (From->getType() == Subst->getType()) {
7403 // At this point we know the substitution's register is compatible.
7404 return Subst;
7405 }
7406 }
7407 }
7408 }
7409
John Porto4a566862016-01-04 09:33:41 -08007410 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07007411 // Before doing anything with a Mem operand, we need to ensure that the
7412 // Base and Index components are in physical registers.
John Porto7e93c622015-06-23 10:58:57 -07007413 Variable *Base = Mem->getBase();
7414 Variable *Index = Mem->getIndex();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007415 Constant *Offset = Mem->getOffset();
John Porto7e93c622015-06-23 10:58:57 -07007416 Variable *RegBase = nullptr;
7417 Variable *RegIndex = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08007418 uint16_t Shift = Mem->getShift();
John Porto7e93c622015-06-23 10:58:57 -07007419 if (Base) {
David Sehr4318a412015-11-11 15:01:55 -08007420 RegBase = llvm::cast<Variable>(
7421 legalize(Base, Legal_Reg | Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07007422 }
7423 if (Index) {
John Porto56958cb2016-01-14 09:18:18 -08007424 // TODO(jpp): perhaps we should only allow Legal_Reg if
7425 // Base->isRematerializable.
David Sehr4318a412015-11-11 15:01:55 -08007426 RegIndex = llvm::cast<Variable>(
7427 legalize(Index, Legal_Reg | Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07007428 }
John Portoac2388c2016-01-22 07:10:56 -08007429
John Porto7e93c622015-06-23 10:58:57 -07007430 if (Base != RegBase || Index != RegIndex) {
John Porto56958cb2016-01-14 09:18:18 -08007431 Mem = X86OperandMem::create(Func, Ty, RegBase, Offset, RegIndex, Shift,
John Portoac2388c2016-01-22 07:10:56 -08007432 Mem->getSegmentRegister());
John Porto7e93c622015-06-23 10:58:57 -07007433 }
7434
John Portoac2388c2016-01-22 07:10:56 -08007435 // For all Memory Operands, we do randomization/pooling here.
John Porto7e93c622015-06-23 10:58:57 -07007436 From = randomizeOrPoolImmediate(Mem);
7437
7438 if (!(Allowed & Legal_Mem)) {
7439 From = copyToReg(From, RegNum);
7440 }
7441 return From;
7442 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007443
John Porto7e93c622015-06-23 10:58:57 -07007444 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
7445 if (llvm::isa<ConstantUndef>(Const)) {
Jan Voungfbdd2442015-07-15 12:36:20 -07007446 From = legalizeUndef(Const, RegNum);
John Porto7e93c622015-06-23 10:58:57 -07007447 if (isVectorType(Ty))
Jan Voungfbdd2442015-07-15 12:36:20 -07007448 return From;
7449 Const = llvm::cast<Constant>(From);
John Porto7e93c622015-06-23 10:58:57 -07007450 }
7451 // There should be no constants of vector type (other than undef).
7452 assert(!isVectorType(Ty));
7453
John Porto1d235422015-08-12 12:37:53 -07007454 // If the operand is a 64 bit constant integer we need to legalize it to a
7455 // register in x86-64.
7456 if (Traits::Is64Bit) {
Jim Stichnoth9c2c0932016-06-14 07:27:22 -07007457 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Const)) {
7458 if (!Utils::IsInt(32, C64->getValue())) {
7459 if (RegNum.hasValue()) {
7460 assert(Traits::getGprForType(IceType_i64, RegNum) == RegNum);
7461 }
7462 return copyToReg(Const, RegNum);
John Porto008f4ce2015-12-24 13:22:18 -08007463 }
John Porto1d235422015-08-12 12:37:53 -07007464 }
7465 }
7466
Andrew Scull57e12682015-09-16 11:30:19 -07007467 // If the operand is an 32 bit constant integer, we should check whether we
7468 // need to randomize it or pool it.
Jim Stichnoth54f3d512015-12-11 09:53:00 -08007469 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
John Porto7e93c622015-06-23 10:58:57 -07007470 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
7471 if (NewConst != Const) {
7472 return NewConst;
7473 }
7474 }
7475
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007476 if (auto *CR = llvm::dyn_cast<ConstantRelocatable>(Const)) {
John Portoac2388c2016-01-22 07:10:56 -08007477 // If the operand is a ConstantRelocatable, and Legal_AddrAbs is not
7478 // specified, and UseNonsfi is indicated, we need to add RebasePtr.
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007479 if (UseNonsfi && !(Allowed & Legal_AddrAbs)) {
7480 assert(Ty == IceType_i32);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007481 Variable *NewVar = makeReg(Ty, RegNum);
John Portoac2388c2016-01-22 07:10:56 -08007482 auto *Mem = Traits::X86OperandMem::create(Func, Ty, nullptr, CR);
7483 // LEAs are not automatically sandboxed, thus we explicitly invoke
7484 // _sandbox_mem_reference.
7485 _lea(NewVar, _sandbox_mem_reference(Mem));
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007486 From = NewVar;
7487 }
John Portoac2388c2016-01-22 07:10:56 -08007488 } else if (isScalarFloatingType(Ty)) {
7489 // Convert a scalar floating point constant into an explicit memory
7490 // operand.
Jim Stichnoth99165662015-11-13 14:20:40 -08007491 if (auto *ConstFloat = llvm::dyn_cast<ConstantFloat>(Const)) {
John Portoccea7932015-11-17 04:58:36 -08007492 if (Utils::isPositiveZero(ConstFloat->getValue()))
Jim Stichnoth99165662015-11-13 14:20:40 -08007493 return makeZeroedRegister(Ty, RegNum);
7494 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
John Portoccea7932015-11-17 04:58:36 -08007495 if (Utils::isPositiveZero(ConstDouble->getValue()))
Jim Stichnoth99165662015-11-13 14:20:40 -08007496 return makeZeroedRegister(Ty, RegNum);
7497 }
John Portoac2388c2016-01-22 07:10:56 -08007498
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007499 auto *CFrom = llvm::cast<Constant>(From);
7500 assert(CFrom->getShouldBePooled());
7501 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007502 auto *Mem = X86OperandMem::create(Func, Ty, nullptr, Offset);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007503 From = Mem;
John Porto7e93c622015-06-23 10:58:57 -07007504 }
John Portoac2388c2016-01-22 07:10:56 -08007505
John Porto7e93c622015-06-23 10:58:57 -07007506 bool NeedsReg = false;
7507 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
John Portoac2388c2016-01-22 07:10:56 -08007508 // Immediate specifically not allowed.
John Porto7e93c622015-06-23 10:58:57 -07007509 NeedsReg = true;
7510 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
7511 // On x86, FP constants are lowered to mem operands.
7512 NeedsReg = true;
7513 if (NeedsReg) {
7514 From = copyToReg(From, RegNum);
7515 }
7516 return From;
7517 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007518
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07007519 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
Andrew Scull57e12682015-09-16 11:30:19 -07007520 // Check if the variable is guaranteed a physical register. This can happen
7521 // either when the variable is pre-colored or when it is assigned infinite
7522 // weight.
Andrew Scull11c9a322015-08-28 14:24:14 -07007523 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
David Sehr4318a412015-11-11 15:01:55 -08007524 bool MustRematerialize =
7525 (Var->isRematerializable() && !(Allowed & Legal_Rematerializable));
John Porto7e93c622015-06-23 10:58:57 -07007526 // We need a new physical register for the operand if:
David Sehr4318a412015-11-11 15:01:55 -08007527 // - Mem is not allowed and Var isn't guaranteed a physical register, or
7528 // - RegNum is required and Var->getRegNum() doesn't match, or
7529 // - Var is a rematerializable variable and rematerializable pass-through is
7530 // not allowed (in which case we need an lea instruction).
7531 if (MustRematerialize) {
7532 assert(Ty == IceType_i32);
7533 Variable *NewVar = makeReg(Ty, RegNum);
7534 // Since Var is rematerializable, the offset will be added when the lea is
7535 // emitted.
7536 constexpr Constant *NoOffset = nullptr;
John Porto4a566862016-01-04 09:33:41 -08007537 auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset);
David Sehr4318a412015-11-11 15:01:55 -08007538 _lea(NewVar, Mem);
7539 From = NewVar;
7540 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007541 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
John Porto7e93c622015-06-23 10:58:57 -07007542 From = copyToReg(From, RegNum);
7543 }
7544 return From;
7545 }
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007546
7547 llvm::report_fatal_error("Unhandled operand kind in legalize()");
John Porto7e93c622015-06-23 10:58:57 -07007548 return From;
7549}
7550
Andrew Scull9612d322015-07-06 14:53:25 -07007551/// Provide a trivial wrapper to legalize() for this common usage.
John Porto4a566862016-01-04 09:33:41 -08007552template <typename TraitsType>
7553Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007554 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007555 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
7556}
7557
Jan Voungfbdd2442015-07-15 12:36:20 -07007558/// Legalize undef values to concrete values.
John Porto4a566862016-01-04 09:33:41 -08007559template <typename TraitsType>
7560Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007561 RegNumT RegNum) {
Jan Voungfbdd2442015-07-15 12:36:20 -07007562 Type Ty = From->getType();
7563 if (llvm::isa<ConstantUndef>(From)) {
7564 // Lower undefs to zero. Another option is to lower undefs to an
Andrew Scull57e12682015-09-16 11:30:19 -07007565 // uninitialized register; however, using an uninitialized register results
7566 // in less predictable code.
Jan Voungfbdd2442015-07-15 12:36:20 -07007567 //
Andrew Scull57e12682015-09-16 11:30:19 -07007568 // If in the future the implementation is changed to lower undef values to
7569 // uninitialized registers, a FakeDef will be needed:
John Porto1d937a82015-12-17 06:19:34 -08007570 // Context.insert<InstFakeDef>(Reg);
Jan Voungfbdd2442015-07-15 12:36:20 -07007571 // This is in order to ensure that the live range of Reg is not
Andrew Scull57e12682015-09-16 11:30:19 -07007572 // overestimated. If the constant being lowered is a 64 bit value, then
7573 // the result should be split and the lo and hi components will need to go
7574 // in uninitialized registers.
Jan Voungfbdd2442015-07-15 12:36:20 -07007575 if (isVectorType(Ty))
7576 return makeVectorOfZeros(Ty, RegNum);
7577 return Ctx->getConstantZero(Ty);
7578 }
7579 return From;
7580}
7581
Andrew Scull57e12682015-09-16 11:30:19 -07007582/// For the cmp instruction, if Src1 is an immediate, or known to be a physical
7583/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
7584/// copied into a physical register. (Actually, either Src0 or Src1 can be
7585/// chosen for the physical register, but unfortunately we have to commit to one
7586/// or the other before register allocation.)
John Porto4a566862016-01-04 09:33:41 -08007587template <typename TraitsType>
7588Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0,
7589 Operand *Src1) {
John Porto7e93c622015-06-23 10:58:57 -07007590 bool IsSrc1ImmOrReg = false;
7591 if (llvm::isa<Constant>(Src1)) {
7592 IsSrc1ImmOrReg = true;
Jan Voungfbdd2442015-07-15 12:36:20 -07007593 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
John Porto7e93c622015-06-23 10:58:57 -07007594 if (Var->hasReg())
7595 IsSrc1ImmOrReg = true;
7596 }
7597 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
7598}
7599
John Porto4a566862016-01-04 09:33:41 -08007600template <typename TraitsType>
7601typename TargetX86Base<TraitsType>::X86OperandMem *
7602TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty,
7603 bool DoLegalize) {
7604 auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd);
John Porto921856d2015-07-07 11:56:26 -07007605 // It may be the case that address mode optimization already creates an
John Porto4a566862016-01-04 09:33:41 -08007606 // X86OperandMem, so in that case it wouldn't need another level of
John Porto921856d2015-07-07 11:56:26 -07007607 // transformation.
John Porto7e93c622015-06-23 10:58:57 -07007608 if (!Mem) {
Jim Stichnoth54f3d512015-12-11 09:53:00 -08007609 auto *Base = llvm::dyn_cast<Variable>(Opnd);
7610 auto *Offset = llvm::dyn_cast<Constant>(Opnd);
John Porto7e93c622015-06-23 10:58:57 -07007611 assert(Base || Offset);
7612 if (Offset) {
Andrew Scull57e12682015-09-16 11:30:19 -07007613 // During memory operand building, we do not blind or pool the constant
7614 // offset, we will work on the whole memory operand later as one entity
7615 // later, this save one instruction. By turning blinding and pooling off,
7616 // we guarantee legalize(Offset) will return a Constant*.
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007617 if (!llvm::isa<ConstantRelocatable>(Offset)) {
John Porto7e93c622015-06-23 10:58:57 -07007618 BoolFlagSaver B(RandomizationPoolingPaused, true);
7619
7620 Offset = llvm::cast<Constant>(legalize(Offset));
7621 }
7622
7623 assert(llvm::isa<ConstantInteger32>(Offset) ||
7624 llvm::isa<ConstantRelocatable>(Offset));
7625 }
John Porto56958cb2016-01-14 09:18:18 -08007626 // Not completely sure whether it's OK to leave IsRebased unset when
7627 // creating the mem operand. If DoLegalize is true, it will definitely be
7628 // applied during the legalize() call, but perhaps not during the
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007629 // randomizeOrPoolImmediate() call. In any case, the emit routines will
7630 // assert that PIC legalization has been applied.
John Porto4a566862016-01-04 09:33:41 -08007631 Mem = X86OperandMem::create(Func, Ty, Base, Offset);
John Porto7e93c622015-06-23 10:58:57 -07007632 }
Andrew Scull57e12682015-09-16 11:30:19 -07007633 // Do legalization, which contains randomization/pooling or do
7634 // randomization/pooling.
John Porto4a566862016-01-04 09:33:41 -08007635 return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem)
7636 : randomizeOrPoolImmediate(Mem));
John Porto7e93c622015-06-23 10:58:57 -07007637}
7638
John Porto4a566862016-01-04 09:33:41 -08007639template <typename TraitsType>
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007640Variable *TargetX86Base<TraitsType>::makeReg(Type Type, RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007641 // There aren't any 64-bit integer registers for x86-32.
John Porto1d235422015-08-12 12:37:53 -07007642 assert(Traits::Is64Bit || Type != IceType_i64);
John Porto5aeed952015-07-21 13:39:09 -07007643 Variable *Reg = Func->makeVariable(Type);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007644 if (RegNum.hasValue())
John Porto7e93c622015-06-23 10:58:57 -07007645 Reg->setRegNum(RegNum);
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007646 else
7647 Reg->setMustHaveReg();
John Porto7e93c622015-06-23 10:58:57 -07007648 return Reg;
7649}
7650
John Porto4a566862016-01-04 09:33:41 -08007651template <typename TraitsType>
7652const Type TargetX86Base<TraitsType>::TypeForSize[] = {
John Porto3c275ce2015-12-22 08:14:00 -08007653 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
John Porto4a566862016-01-04 09:33:41 -08007654template <typename TraitsType>
7655Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size,
7656 uint32_t MaxSize) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07007657 assert(Size != 0);
7658 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7659 uint32_t MaxIndex = MaxSize == NoSizeLimit
7660 ? llvm::array_lengthof(TypeForSize) - 1
7661 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7662 return TypeForSize[std::min(TyIndex, MaxIndex)];
7663}
7664
John Porto4a566862016-01-04 09:33:41 -08007665template <typename TraitsType>
7666Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size,
7667 uint32_t MaxSize) {
Andrew Scullcfa628b2015-08-20 14:23:05 -07007668 assert(Size != 0);
7669 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
7670 if (!llvm::isPowerOf2_32(Size))
7671 ++TyIndex;
7672 uint32_t MaxIndex = MaxSize == NoSizeLimit
7673 ? llvm::array_lengthof(TypeForSize) - 1
7674 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
7675 return TypeForSize[std::min(TyIndex, MaxIndex)];
7676}
7677
John Porto4a566862016-01-04 09:33:41 -08007678template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07007679 if (Func->getOptLevel() == Opt_m1)
John Porto7e93c622015-06-23 10:58:57 -07007680 return;
Jim Stichnoth230d4102015-09-25 17:40:32 -07007681 markRedefinitions();
Jim Stichnoth318f4cd2015-10-01 21:02:37 -07007682 Context.availabilityUpdate();
John Porto7e93c622015-06-23 10:58:57 -07007683}
7684
John Porto4a566862016-01-04 09:33:41 -08007685template <typename TraitsType>
7686void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007687 llvm::SmallVectorImpl<RegNumT> &Permutation,
John Portoe82b5602016-02-24 15:58:55 -08007688 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
Karl Schimpfd4699942016-04-02 09:55:31 -07007689 Traits::makeRandomRegisterPermutation(Func, Permutation, ExcludeRegisters,
7690 Salt);
John Porto7e93c622015-06-23 10:58:57 -07007691}
7692
John Porto4a566862016-01-04 09:33:41 -08007693template <typename TraitsType>
7694void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007695 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007696 return;
7697 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007698 Str << "$" << C->getValue();
John Porto7e93c622015-06-23 10:58:57 -07007699}
7700
John Porto4a566862016-01-04 09:33:41 -08007701template <typename TraitsType>
7702void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const {
John Porto1d235422015-08-12 12:37:53 -07007703 if (!Traits::Is64Bit) {
7704 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
7705 } else {
7706 if (!BuildDefs::dump())
7707 return;
7708 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007709 Str << "$" << C->getValue();
John Porto1d235422015-08-12 12:37:53 -07007710 }
John Porto7e93c622015-06-23 10:58:57 -07007711}
7712
John Porto4a566862016-01-04 09:33:41 -08007713template <typename TraitsType>
7714void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007715 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007716 return;
7717 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007718 Str << C->getLabelName();
John Porto7e93c622015-06-23 10:58:57 -07007719}
7720
John Porto4a566862016-01-04 09:33:41 -08007721template <typename TraitsType>
7722void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07007723 if (!BuildDefs::dump())
John Porto7e93c622015-06-23 10:58:57 -07007724 return;
7725 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007726 Str << C->getLabelName();
John Porto7e93c622015-06-23 10:58:57 -07007727}
7728
John Porto4a566862016-01-04 09:33:41 -08007729template <typename TraitsType>
7730void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const {
John Porto7e93c622015-06-23 10:58:57 -07007731 llvm::report_fatal_error("undef value encountered by emitter.");
7732}
7733
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007734template <class Machine>
7735void TargetX86Base<Machine>::emit(const ConstantRelocatable *C) const {
7736 if (!BuildDefs::dump())
7737 return;
Karl Schimpfd4699942016-04-02 09:55:31 -07007738 assert(!getFlags().getUseNonsfi() ||
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007739 C->getName().toString() == GlobalOffsetTable);
Jim Stichnoth8ff4b282016-01-04 15:39:06 -08007740 Ostream &Str = Ctx->getStrEmit();
7741 Str << "$";
7742 emitWithoutPrefix(C);
7743}
7744
Andrew Scull9612d322015-07-06 14:53:25 -07007745/// Randomize or pool an Immediate.
John Porto4a566862016-01-04 09:33:41 -08007746template <typename TraitsType>
7747Operand *
7748TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007749 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007750 assert(llvm::isa<ConstantInteger32>(Immediate) ||
7751 llvm::isa<ConstantRelocatable>(Immediate));
Karl Schimpfd4699942016-04-02 09:55:31 -07007752 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
John Porto7e93c622015-06-23 10:58:57 -07007753 RandomizationPoolingPaused == true) {
7754 // Immediates randomization/pooling off or paused
7755 return Immediate;
7756 }
John Porto56958cb2016-01-14 09:18:18 -08007757
7758 if (Traits::Is64Bit && NeedSandboxing) {
7759 // Immediate randomization/pooling is currently disabled for x86-64
7760 // sandboxing for it could generate invalid memory operands.
7761 assert(false &&
7762 "Constant pooling/randomization is disabled for x8664 sandbox.");
7763 return Immediate;
John Porto7e93c622015-06-23 10:58:57 -07007764 }
John Porto56958cb2016-01-14 09:18:18 -08007765
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007766 if (!Immediate->shouldBeRandomizedOrPooled()) {
John Porto56958cb2016-01-14 09:18:18 -08007767 // the constant Immediate is not eligible for blinding/pooling
7768 return Immediate;
7769 }
7770 Ctx->statsUpdateRPImms();
Karl Schimpfd4699942016-04-02 09:55:31 -07007771 switch (getFlags().getRandomizeAndPoolImmediatesOption()) {
John Porto56958cb2016-01-14 09:18:18 -08007772 default:
7773 llvm::report_fatal_error("Unsupported -randomize-pool-immediates option");
7774 case RPI_Randomize: {
7775 // blind the constant
7776 // FROM:
7777 // imm
7778 // TO:
7779 // insert: mov imm+cookie, Reg
7780 // insert: lea -cookie[Reg], Reg
7781 // => Reg
7782 // If we have already assigned a phy register, we must come from
7783 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
7784 // assigned register as this assignment is that start of its use-def
7785 // chain. So we add RegNum argument here. Note we use 'lea' instruction
7786 // instead of 'xor' to avoid affecting the flags.
7787 Variable *Reg = makeReg(IceType_i32, RegNum);
7788 auto *Integer = llvm::cast<ConstantInteger32>(Immediate);
7789 uint32_t Value = Integer->getValue();
7790 uint32_t Cookie = Func->getConstantBlindingCookie();
7791 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
7792 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
7793 _lea(Reg, X86OperandMem::create(Func, IceType_i32, Reg, Offset));
7794 if (Immediate->getType() == IceType_i32) {
7795 return Reg;
7796 }
7797 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
7798 _mov(TruncReg, Reg);
7799 return TruncReg;
7800 }
7801 case RPI_Pool: {
7802 // pool the constant
7803 // FROM:
7804 // imm
7805 // TO:
7806 // insert: mov $label, Reg
7807 // => Reg
Karl Schimpfd4699942016-04-02 09:55:31 -07007808 assert(getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007809 assert(Immediate->getShouldBePooled());
John Porto56958cb2016-01-14 09:18:18 -08007810 // if we have already assigned a phy register, we must come from
7811 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
7812 // assigned register as this assignment is that start of its use-def
7813 // chain. So we add RegNum argument here.
7814 Variable *Reg = makeReg(Immediate->getType(), RegNum);
John Porto56958cb2016-01-14 09:18:18 -08007815 constexpr RelocOffsetT Offset = 0;
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007816 Constant *Symbol = Ctx->getConstantSym(Offset, Immediate->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007817 constexpr Variable *NoBase = nullptr;
7818 X86OperandMem *MemOperand =
7819 X86OperandMem::create(Func, Immediate->getType(), NoBase, Symbol);
John Porto56958cb2016-01-14 09:18:18 -08007820 _mov(Reg, MemOperand);
7821 return Reg;
7822 }
7823 }
John Porto7e93c622015-06-23 10:58:57 -07007824}
7825
John Porto4a566862016-01-04 09:33:41 -08007826template <typename TraitsType>
7827typename TargetX86Base<TraitsType>::X86OperandMem *
7828TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand,
Jim Stichnoth8aa39662016-02-10 11:20:30 -08007829 RegNumT RegNum) {
John Porto7e93c622015-06-23 10:58:57 -07007830 assert(MemOperand);
Karl Schimpfd4699942016-04-02 09:55:31 -07007831 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
John Porto7e93c622015-06-23 10:58:57 -07007832 RandomizationPoolingPaused == true) {
7833 // immediates randomization/pooling is turned off
7834 return MemOperand;
7835 }
7836
John Porto56958cb2016-01-14 09:18:18 -08007837 if (Traits::Is64Bit && NeedSandboxing) {
7838 // Immediate randomization/pooling is currently disabled for x86-64
7839 // sandboxing for it could generate invalid memory operands.
7840 assert(false &&
7841 "Constant pooling/randomization is disabled for x8664 sandbox.");
7842 return MemOperand;
7843 }
7844
Andrew Scull57e12682015-09-16 11:30:19 -07007845 // If this memory operand is already a randomized one, we do not randomize it
7846 // again.
John Porto7e93c622015-06-23 10:58:57 -07007847 if (MemOperand->getRandomized())
7848 return MemOperand;
7849
John Porto56958cb2016-01-14 09:18:18 -08007850 auto *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset());
John Porto7e93c622015-06-23 10:58:57 -07007851
John Porto56958cb2016-01-14 09:18:18 -08007852 if (C == nullptr) {
7853 return MemOperand;
John Porto7e93c622015-06-23 10:58:57 -07007854 }
John Porto7e93c622015-06-23 10:58:57 -07007855
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007856 if (!C->shouldBeRandomizedOrPooled()) {
John Porto56958cb2016-01-14 09:18:18 -08007857 return MemOperand;
7858 }
7859
7860 // The offset of this mem operand should be blinded or pooled
7861 Ctx->statsUpdateRPImms();
Karl Schimpfd4699942016-04-02 09:55:31 -07007862 switch (getFlags().getRandomizeAndPoolImmediatesOption()) {
John Porto56958cb2016-01-14 09:18:18 -08007863 default:
7864 llvm::report_fatal_error("Unsupported -randomize-pool-immediates option");
7865 case RPI_Randomize: {
7866 // blind the constant offset
7867 // FROM:
7868 // offset[base, index, shift]
7869 // TO:
7870 // insert: lea offset+cookie[base], RegTemp
7871 // => -cookie[RegTemp, index, shift]
7872 uint32_t Value =
7873 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())->getValue();
7874 uint32_t Cookie = Func->getConstantBlindingCookie();
7875 Constant *Mask1 =
7876 Ctx->getConstantInt(MemOperand->getOffset()->getType(), Cookie + Value);
7877 Constant *Mask2 =
7878 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
7879
7880 X86OperandMem *TempMemOperand = X86OperandMem::create(
7881 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
7882 // If we have already assigned a physical register, we must come from
7883 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
7884 // the assigned register as this assignment is that start of its
7885 // use-def chain. So we add RegNum argument here.
7886 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
7887 _lea(RegTemp, TempMemOperand);
7888
7889 X86OperandMem *NewMemOperand = X86OperandMem::create(
7890 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
7891 MemOperand->getShift(), MemOperand->getSegmentRegister(),
7892 MemOperand->getIsRebased());
7893
7894 // Label this memory operand as randomized, so we won't randomize it
7895 // again in case we call legalize() multiple times on this memory
7896 // operand.
7897 NewMemOperand->setRandomized(true);
7898 return NewMemOperand;
7899 }
7900 case RPI_Pool: {
7901 // pool the constant offset
7902 // FROM:
7903 // offset[base, index, shift]
7904 // TO:
7905 // insert: mov $label, RegTemp
7906 // insert: lea [base, RegTemp], RegTemp
7907 // =>[RegTemp, index, shift]
7908
7909 // Memory operand should never exist as source operands in phi lowering
7910 // assignments, so there is no need to reuse any registers here. For
7911 // phi lowering, we should not ask for new physical registers in
7912 // general. However, if we do meet Memory Operand during phi lowering,
7913 // we should not blind or pool the immediates for now.
Reed Kotler5fa0a5f2016-02-15 20:01:24 -08007914 if (RegNum.hasValue())
John Porto56958cb2016-01-14 09:18:18 -08007915 return MemOperand;
7916 Variable *RegTemp = makeReg(IceType_i32);
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007917 assert(MemOperand->getOffset()->getShouldBePooled());
John Porto56958cb2016-01-14 09:18:18 -08007918 constexpr RelocOffsetT SymOffset = 0;
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007919 Constant *Symbol =
7920 Ctx->getConstantSym(SymOffset, MemOperand->getOffset()->getLabelName());
John Portoac2388c2016-01-22 07:10:56 -08007921 constexpr Variable *NoBase = nullptr;
John Porto56958cb2016-01-14 09:18:18 -08007922 X86OperandMem *SymbolOperand = X86OperandMem::create(
John Portoac2388c2016-01-22 07:10:56 -08007923 Func, MemOperand->getOffset()->getType(), NoBase, Symbol);
John Porto56958cb2016-01-14 09:18:18 -08007924 _mov(RegTemp, SymbolOperand);
7925 // If we have a base variable here, we should add the lea instruction
7926 // to add the value of the base variable to RegTemp. If there is no
7927 // base variable, we won't need this lea instruction.
7928 if (MemOperand->getBase()) {
7929 X86OperandMem *CalculateOperand = X86OperandMem::create(
7930 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, RegTemp,
7931 0, MemOperand->getSegmentRegister());
7932 _lea(RegTemp, CalculateOperand);
7933 }
7934 X86OperandMem *NewMemOperand = X86OperandMem::create(
7935 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),
7936 MemOperand->getShift(), MemOperand->getSegmentRegister());
7937 return NewMemOperand;
7938 }
7939 }
7940}
David Sehr6b80cf12016-01-21 23:16:58 -08007941
7942template <typename TraitsType>
7943void TargetX86Base<TraitsType>::emitJumpTable(
John Porto03077212016-04-05 06:30:21 -07007944 const Cfg *, const InstJumpTable *JumpTable) const {
David Sehr6b80cf12016-01-21 23:16:58 -08007945 if (!BuildDefs::dump())
7946 return;
7947 Ostream &Str = Ctx->getStrEmit();
Karl Schimpfd4699942016-04-02 09:55:31 -07007948 const bool UseNonsfi = getFlags().getUseNonsfi();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07007949 const char *Prefix = UseNonsfi ? ".data.rel.ro." : ".rodata.";
John Porto03077212016-04-05 06:30:21 -07007950 Str << "\t.section\t" << Prefix << JumpTable->getSectionName()
7951 << ",\"a\",@progbits\n"
7952 "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"
7953 << JumpTable->getName() << ":";
David Sehr6b80cf12016-01-21 23:16:58 -08007954
7955 // On X86 ILP32 pointers are 32-bit hence the use of .long
7956 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
7957 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
7958 Str << "\n";
7959}
7960
7961template <typename TraitsType>
7962template <typename T>
7963void TargetDataX86<TraitsType>::emitConstantPool(GlobalContext *Ctx) {
7964 if (!BuildDefs::dump())
7965 return;
7966 Ostream &Str = Ctx->getStrEmit();
7967 Type Ty = T::Ty;
7968 SizeT Align = typeAlignInBytes(Ty);
7969 ConstantList Pool = Ctx->getConstantPool(Ty);
7970
7971 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
7972 << "\n";
7973 Str << "\t.align\t" << Align << "\n";
7974
7975 // If reorder-pooled-constants option is set to true, we need to shuffle the
7976 // constant pool before emitting it.
Karl Schimpfd4699942016-04-02 09:55:31 -07007977 if (getFlags().getReorderPooledConstants() && !Pool.empty()) {
David Sehr6b80cf12016-01-21 23:16:58 -08007978 // Use the constant's kind value as the salt for creating random number
7979 // generator.
7980 Operand::OperandKind K = (*Pool.begin())->getKind();
Karl Schimpfd4699942016-04-02 09:55:31 -07007981 RandomNumberGenerator RNG(getFlags().getRandomSeed(),
David Sehr6b80cf12016-01-21 23:16:58 -08007982 RPE_PooledConstantReordering, K);
7983 RandomShuffle(Pool.begin(), Pool.end(),
7984 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });
7985 }
7986
7987 for (Constant *C : Pool) {
7988 if (!C->getShouldBePooled())
7989 continue;
7990 auto *Const = llvm::cast<typename T::IceType>(C);
7991 typename T::IceType::PrimType Value = Const->getValue();
7992 // Use memcpy() to copy bits from Value into RawValue in a way that avoids
7993 // breaking strict-aliasing rules.
7994 typename T::PrimitiveIntType RawValue;
7995 memcpy(&RawValue, &Value, sizeof(Value));
7996 char buf[30];
7997 int CharsPrinted =
7998 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
7999 assert(CharsPrinted >= 0);
8000 assert((size_t)CharsPrinted < llvm::array_lengthof(buf));
8001 (void)CharsPrinted; // avoid warnings if asserts are disabled
Jim Stichnoth467ffe52016-03-29 15:01:06 -07008002 Str << Const->getLabelName();
David Sehr6b80cf12016-01-21 23:16:58 -08008003 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t/* " << T::TypeName << " "
8004 << Value << " */\n";
8005 }
8006}
8007
8008template <typename TraitsType>
8009void TargetDataX86<TraitsType>::lowerConstants() {
Karl Schimpfd4699942016-04-02 09:55:31 -07008010 if (getFlags().getDisableTranslation())
David Sehr6b80cf12016-01-21 23:16:58 -08008011 return;
Karl Schimpfd4699942016-04-02 09:55:31 -07008012 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08008013 case FT_Elf: {
8014 ELFObjectWriter *Writer = Ctx->getObjectWriter();
8015
8016 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
8017 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
8018 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
8019
8020 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
8021 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
8022 } break;
8023 case FT_Asm:
8024 case FT_Iasm: {
8025 OstreamLocker L(Ctx);
8026
8027 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
8028 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
8029 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
8030
8031 emitConstantPool<PoolTypeConverter<float>>(Ctx);
8032 emitConstantPool<PoolTypeConverter<double>>(Ctx);
8033 } break;
8034 }
8035}
8036
8037template <typename TraitsType>
8038void TargetDataX86<TraitsType>::lowerJumpTables() {
Karl Schimpfd4699942016-04-02 09:55:31 -07008039 const bool IsPIC = getFlags().getUseNonsfi();
8040 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08008041 case FT_Elf: {
8042 ELFObjectWriter *Writer = Ctx->getObjectWriter();
8043 for (const JumpTableData &JT : Ctx->getJumpTables())
8044 Writer->writeJumpTable(JT, Traits::FK_Abs, IsPIC);
8045 } break;
8046 case FT_Asm:
8047 // Already emitted from Cfg
8048 break;
8049 case FT_Iasm: {
8050 if (!BuildDefs::dump())
8051 return;
8052 Ostream &Str = Ctx->getStrEmit();
Jim Stichnoth467ffe52016-03-29 15:01:06 -07008053 const char *Prefix = IsPIC ? ".data.rel.ro." : ".rodata.";
David Sehr6b80cf12016-01-21 23:16:58 -08008054 for (const JumpTableData &JT : Ctx->getJumpTables()) {
John Porto03077212016-04-05 06:30:21 -07008055 Str << "\t.section\t" << Prefix << JT.getSectionName()
8056 << ",\"a\",@progbits\n"
8057 "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"
8058 << JT.getName().toString() << ":";
David Sehr6b80cf12016-01-21 23:16:58 -08008059
8060 // On X8664 ILP32 pointers are 32-bit hence the use of .long
8061 for (intptr_t TargetOffset : JT.getTargetOffsets())
8062 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
8063 Str << "\n";
8064 }
8065 } break;
8066 }
8067}
8068
8069template <typename TraitsType>
8070void TargetDataX86<TraitsType>::lowerGlobals(
Jim Stichnoth467ffe52016-03-29 15:01:06 -07008071 const VariableDeclarationList &Vars, const std::string &SectionSuffix) {
Karl Schimpfd4699942016-04-02 09:55:31 -07008072 const bool IsPIC = getFlags().getUseNonsfi();
8073 switch (getFlags().getOutFileType()) {
David Sehr6b80cf12016-01-21 23:16:58 -08008074 case FT_Elf: {
8075 ELFObjectWriter *Writer = Ctx->getObjectWriter();
8076 Writer->writeDataSection(Vars, Traits::FK_Abs, SectionSuffix, IsPIC);
8077 } break;
8078 case FT_Asm:
8079 case FT_Iasm: {
David Sehr6b80cf12016-01-21 23:16:58 -08008080 OstreamLocker L(Ctx);
8081 for (const VariableDeclaration *Var : Vars) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -07008082 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
David Sehr6b80cf12016-01-21 23:16:58 -08008083 emitGlobal(*Var, SectionSuffix);
8084 }
8085 }
8086 } break;
8087 }
8088}
John Porto4a566862016-01-04 09:33:41 -08008089} // end of namespace X86NAMESPACE
John Porto7e93c622015-06-23 10:58:57 -07008090} // end of namespace Ice
8091
8092#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H