blob: f75ca292c1bbb05b77bc04583a19874411faca8e [file] [log] [blame]
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001//===- subzero/src/IceCfg.cpp - Control flow graph implementation ---------===//
2//
3// The Subzero Code Generator
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Andrew Scull9612d322015-07-06 14:53:25 -07009///
10/// \file
Jim Stichnoth92a6e5b2015-12-02 16:52:44 -080011/// \brief Implements the Cfg class.
Andrew Scull9612d322015-07-06 14:53:25 -070012///
Jim Stichnothf7c9a142014-04-29 10:52:43 -070013//===----------------------------------------------------------------------===//
14
15#include "IceCfg.h"
John Porto67f8de92015-06-25 10:14:17 -070016
17#include "IceAssembler.h"
John Porto36d6aa62016-02-26 07:19:59 -080018#include "IceBitVector.h"
Jim Stichnothf7c9a142014-04-29 10:52:43 -070019#include "IceCfgNode.h"
Jim Stichnoth989a7032014-08-08 10:13:44 -070020#include "IceClFlags.h"
Jim Stichnothf7c9a142014-04-29 10:52:43 -070021#include "IceDefs.h"
Jan Voungec270732015-01-12 17:00:22 -080022#include "IceELFObjectWriter.h"
John Portof8b4cc82015-06-09 18:06:19 -070023#include "IceGlobalInits.h"
Jim Stichnothf7c9a142014-04-29 10:52:43 -070024#include "IceInst.h"
John Portoec3f5652015-08-31 15:07:09 -070025#include "IceInstVarIter.h"
Nicolas Capensa9a92a52016-09-06 12:59:58 -040026#include "IceInstrumentation.h"
Jim Stichnothd97c7df2014-06-04 11:57:08 -070027#include "IceLiveness.h"
Andrew Scullaa6c1092015-09-03 17:50:30 -070028#include "IceLoopAnalyzer.h"
Jim Stichnothf7c9a142014-04-29 10:52:43 -070029#include "IceOperand.h"
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -070030#include "IceTargetLowering.h"
Jim Stichnothf7c9a142014-04-29 10:52:43 -070031
John Porto7bb9cab2016-04-01 05:43:09 -070032#include <memory>
33#include <utility>
34
Jim Stichnothf7c9a142014-04-29 10:52:43 -070035namespace Ice {
36
Jim Stichnothbbca7542015-02-11 16:08:31 -080037Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
Nicolas Capensa9a92a52016-09-06 12:59:58 -040038 : Allocator(createAllocator()), Ctx(Ctx), SequenceNumber(SequenceNumber),
39 VMask(getFlags().getVerbose()), FunctionName(),
40 NextInstNumber(Inst::NumberInitial), Live(nullptr) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -070041 NodeStrings.reset(new StringPool);
42 VarStrings.reset(new StringPool);
John Porto44b3ce82016-02-26 13:10:55 -080043 CfgLocalAllocatorScope _(this);
Karl Schimpfd4699942016-04-02 09:55:31 -070044 Target = TargetLowering::createLowering(getFlags().getTargetArch(), this);
John Porto44b3ce82016-02-26 13:10:55 -080045 VMetadata.reset(new VariablesMetadata(this));
46 TargetAssembler = Target->createAssembler();
47
Karl Schimpfd4699942016-04-02 09:55:31 -070048 if (getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Randomize) {
Andrew Scull57e12682015-09-16 11:30:19 -070049 // If -randomize-pool-immediates=randomize, create a random number
50 // generator to generate a cookie for constant blinding.
Karl Schimpfd4699942016-04-02 09:55:31 -070051 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_ConstantBlinding,
52 this->SequenceNumber);
Qining Luaee5fa82015-08-20 14:59:03 -070053 ConstantBlindingCookie =
Qining Lu360e3192015-08-20 17:13:07 -070054 (uint32_t)RNG.next((uint64_t)std::numeric_limits<uint32_t>::max() + 1);
Qining Luaee5fa82015-08-20 14:59:03 -070055 }
Karl Schimpf6fcbddd2014-11-06 09:49:24 -080056}
Jim Stichnothf7c9a142014-04-29 10:52:43 -070057
Jim Stichnoth467ffe52016-03-29 15:01:06 -070058Cfg::~Cfg() {
59 assert(CfgAllocatorTraits::current() == nullptr);
Karl Schimpfd4699942016-04-02 09:55:31 -070060 if (getFlags().getDumpStrings()) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -070061 OstreamLocker _(Ctx);
62 Ostream &Str = Ctx->getStrDump();
63 getNodeStrings()->dump(Str);
64 getVarStrings()->dump(Str);
65 }
66}
Jim Stichnothf7c9a142014-04-29 10:52:43 -070067
Nicolas Capensa9a92a52016-09-06 12:59:58 -040068// Called in the initalizer list of Cfg's constructor to create the Allocator
69// and set it as TLS before any other member fields are constructed, since they
70// may depend on it.
71ArenaAllocator *Cfg::createAllocator() {
72 ArenaAllocator *Allocator = new ArenaAllocator();
73 CfgAllocatorTraits::set_current(Allocator);
74 return Allocator;
75}
76
Jim Stichnothb40595a2016-01-29 06:14:31 -080077/// Create a string like "foo(i=123:b=9)" indicating the function name, number
78/// of high-level instructions, and number of basic blocks. This string is only
79/// used for dumping and other diagnostics, and the idea is that given a set of
80/// functions to debug a problem on, it's easy to find the smallest or simplest
81/// function to attack. Note that the counts may change somewhat depending on
82/// what point it is called during the translation passes.
Jim Stichnoth467ffe52016-03-29 15:01:06 -070083std::string Cfg::getFunctionNameAndSize() const {
Jim Stichnothb40595a2016-01-29 06:14:31 -080084 if (!BuildDefs::dump())
Jim Stichnoth467ffe52016-03-29 15:01:06 -070085 return getFunctionName().toString();
Jim Stichnothb40595a2016-01-29 06:14:31 -080086 SizeT NodeCount = 0;
87 SizeT InstCount = 0;
88 for (CfgNode *Node : getNodes()) {
89 ++NodeCount;
90 // Note: deleted instructions are *not* ignored.
91 InstCount += Node->getPhis().size();
92 for (Inst &I : Node->getInsts()) {
93 if (!llvm::isa<InstTarget>(&I))
94 ++InstCount;
95 }
96 }
97 return getFunctionName() + "(i=" + std::to_string(InstCount) + ":b=" +
98 std::to_string(NodeCount) + ")";
99}
100
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700101void Cfg::setError(const std::string &Message) {
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700102 HasError = true;
103 ErrorMessage = Message;
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700104}
105
Jim Stichnoth668a7a32014-12-10 15:32:25 -0800106CfgNode *Cfg::makeNode() {
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700107 SizeT LabelIndex = Nodes.size();
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800108 auto *Node = CfgNode::create(this, LabelIndex);
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700109 Nodes.push_back(Node);
110 return Node;
111}
112
Karl Schimpfac7d7342015-08-06 12:55:23 -0700113void Cfg::swapNodes(NodeList &NewNodes) {
Jim Stichnothe7dbc0b2015-09-15 10:09:24 -0700114 assert(Nodes.size() == NewNodes.size());
Karl Schimpfac7d7342015-08-06 12:55:23 -0700115 Nodes.swap(NewNodes);
116 for (SizeT I = 0, NumNodes = getNumNodes(); I < NumNodes; ++I)
117 Nodes[I]->resetIndex(I);
118}
119
John Portoa83bfde2015-09-18 08:43:02 -0700120template <> Variable *Cfg::makeVariable<Variable>(Type Ty) {
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700121 SizeT Index = Variables.size();
Jaydeep Patil958ddb72016-10-03 07:52:48 -0700122 Variable *Var;
123 if (Target->shouldSplitToVariableVecOn32(Ty)) {
124 Var = VariableVecOn32::create(this, Ty, Index);
125 } else if (Target->shouldSplitToVariable64On32(Ty)) {
126 Var = Variable64On32::create(this, Ty, Index);
127 } else {
128 Var = Variable::create(this, Ty, Index);
129 }
Andrew Scull6d47bcd2015-09-17 17:10:05 -0700130 Variables.push_back(Var);
131 return Var;
132}
133
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700134void Cfg::addArg(Variable *Arg) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700135 Arg->setIsArg();
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700136 Args.push_back(Arg);
137}
138
Jim Stichnoth144cdce2014-09-22 16:02:59 -0700139void Cfg::addImplicitArg(Variable *Arg) {
140 Arg->setIsImplicitArg();
141 ImplicitArgs.push_back(Arg);
142}
143
Andrew Scull57e12682015-09-16 11:30:19 -0700144// Returns whether the stack frame layout has been computed yet. This is used
145// for dumping the stack frame location of Variables.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700146bool Cfg::hasComputedFrame() const { return getTarget()->hasComputedFrame(); }
147
John Portof8b4cc82015-06-09 18:06:19 -0700148namespace {
149constexpr char BlockNameGlobalPrefix[] = ".L$profiler$block_name$";
150constexpr char BlockStatsGlobalPrefix[] = ".L$profiler$block_info$";
John Portoa78e4ba2016-03-15 09:28:04 -0700151} // end of anonymous namespace
John Portof8b4cc82015-06-09 18:06:19 -0700152
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700153void Cfg::createNodeNameDeclaration(const std::string &NodeAsmName) {
John Portoa78e4ba2016-03-15 09:28:04 -0700154 auto *Var = VariableDeclaration::create(GlobalInits.get());
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700155 Var->setName(Ctx, BlockNameGlobalPrefix + NodeAsmName);
John Portof8b4cc82015-06-09 18:06:19 -0700156 Var->setIsConstant(true);
John Porto1bec8bc2015-06-22 10:51:13 -0700157 Var->addInitializer(VariableDeclaration::DataInitializer::create(
John Portoa78e4ba2016-03-15 09:28:04 -0700158 GlobalInits.get(), NodeAsmName.data(), NodeAsmName.size() + 1));
John Portof8b4cc82015-06-09 18:06:19 -0700159 const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
160 Var->setAlignment(Int64ByteSize); // Wasteful, 32-bit could use 4 bytes.
John Portoa78e4ba2016-03-15 09:28:04 -0700161 GlobalInits->push_back(Var);
John Portof8b4cc82015-06-09 18:06:19 -0700162}
163
John Portoa78e4ba2016-03-15 09:28:04 -0700164void Cfg::createBlockProfilingInfoDeclaration(
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700165 const std::string &NodeAsmName, VariableDeclaration *NodeNameDeclaration) {
John Portoa78e4ba2016-03-15 09:28:04 -0700166 auto *Var = VariableDeclaration::create(GlobalInits.get());
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700167 Var->setName(Ctx, BlockStatsGlobalPrefix + NodeAsmName);
John Portof8b4cc82015-06-09 18:06:19 -0700168 const SizeT Int64ByteSize = typeWidthInBytes(IceType_i64);
John Portoa78e4ba2016-03-15 09:28:04 -0700169 Var->addInitializer(VariableDeclaration::ZeroInitializer::create(
170 GlobalInits.get(), Int64ByteSize));
John Portof8b4cc82015-06-09 18:06:19 -0700171
172 const RelocOffsetT NodeNameDeclarationOffset = 0;
John Porto1bec8bc2015-06-22 10:51:13 -0700173 Var->addInitializer(VariableDeclaration::RelocInitializer::create(
John Portoa78e4ba2016-03-15 09:28:04 -0700174 GlobalInits.get(), NodeNameDeclaration,
John Porto844211e2016-02-04 08:42:48 -0800175 {RelocOffset::create(Ctx, NodeNameDeclarationOffset)}));
John Portof8b4cc82015-06-09 18:06:19 -0700176 Var->setAlignment(Int64ByteSize);
John Portoa78e4ba2016-03-15 09:28:04 -0700177 GlobalInits->push_back(Var);
John Portof8b4cc82015-06-09 18:06:19 -0700178}
John Portof8b4cc82015-06-09 18:06:19 -0700179
180void Cfg::profileBlocks() {
181 if (GlobalInits == nullptr)
182 GlobalInits.reset(new VariableDeclarationList());
183
184 for (CfgNode *Node : Nodes) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700185 const std::string NodeAsmName = Node->getAsmName();
John Portoa78e4ba2016-03-15 09:28:04 -0700186 createNodeNameDeclaration(NodeAsmName);
187 createBlockProfilingInfoDeclaration(NodeAsmName, GlobalInits->back());
John Portof8b4cc82015-06-09 18:06:19 -0700188 Node->profileExecutionCount(GlobalInits->back());
189 }
190}
191
192bool Cfg::isProfileGlobal(const VariableDeclaration &Var) {
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700193 if (!Var.getName().hasStdString())
194 return false;
195 return Var.getName().toString().find(BlockStatsGlobalPrefix) == 0;
John Portof8b4cc82015-06-09 18:06:19 -0700196}
197
198void Cfg::addCallToProfileSummary() {
199 // The call(s) to __Sz_profile_summary are added by the profiler in functions
200 // that cause the program to exit. This function is defined in
201 // runtime/szrt_profiler.c.
202 Constant *ProfileSummarySym =
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700203 Ctx->getConstantExternSym(Ctx->getGlobalString("__Sz_profile_summary"));
John Portof8b4cc82015-06-09 18:06:19 -0700204 constexpr SizeT NumArgs = 0;
205 constexpr Variable *Void = nullptr;
206 constexpr bool HasTailCall = false;
207 auto *Call =
208 InstCall::create(this, NumArgs, Void, ProfileSummarySym, HasTailCall);
209 getEntryNode()->getInsts().push_front(Call);
210}
211
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700212void Cfg::translate() {
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700213 if (hasError())
214 return;
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -0700215 // Cache the possibly-overridden optimization level once translation begins.
216 // It would be nicer to do this in the constructor, but we need to wait until
217 // after setFunctionName() has a chance to be called.
218 OptimizationLevel =
219 getFlags().matchForceO2(getFunctionName(), getSequenceNumber())
220 ? Opt_2
221 : getFlags().getOptLevel();
Jim Stichnoth318c01b2016-04-03 21:58:03 -0700222 if (BuildDefs::timers()) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -0700223 if (getFlags().matchTimingFocus(getFunctionName(), getSequenceNumber())) {
224 setFocusedTiming();
225 getContext()->resetTimer(GlobalContext::TSK_Default);
Jim Stichnoth1c44d812014-12-08 14:57:52 -0800226 }
Jim Stichnoth318c01b2016-04-03 21:58:03 -0700227 }
228 if (BuildDefs::dump()) {
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -0700229 if (isVerbose(IceV_Status) &&
230 getFlags().matchTestStatus(getFunctionName(), getSequenceNumber())) {
Jim Stichnoth3dd1bb82016-02-27 09:05:50 -0800231 getContext()->getStrDump() << ">>>Translating "
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -0700232 << getFunctionNameAndSize()
233 << " seq=" << getSequenceNumber() << "\n";
Jim Stichnoth3dd1bb82016-02-27 09:05:50 -0800234 }
Jim Stichnothd14b1a02014-10-08 08:28:36 -0700235 }
Jim Stichnoth467ffe52016-03-29 15:01:06 -0700236 TimerMarker T_func(getContext(), getFunctionName().toStringOrEmpty());
Jim Stichnoth8363a062014-10-07 10:02:38 -0700237 TimerMarker T(TimerStack::TT_translate, this);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700238
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700239 dump("Initial CFG");
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700240
Karl Schimpfd4699942016-04-02 09:55:31 -0700241 if (getFlags().getEnableBlockProfile()) {
John Portof8b4cc82015-06-09 18:06:19 -0700242 profileBlocks();
Andrew Scull57e12682015-09-16 11:30:19 -0700243 // TODO(jpp): this is fragile, at best. Figure out a better way of
244 // detecting exit functions.
Jim Stichnothdd6dcfa2016-04-18 12:52:09 -0700245 if (getFunctionName().toStringOrEmpty() == "exit") {
John Portof8b4cc82015-06-09 18:06:19 -0700246 addCallToProfileSummary();
247 }
248 dump("Profiled CFG");
249 }
250
Jim Stichnothbe87b2e2015-09-18 15:43:59 -0700251 // Create the Hi and Lo variables where a split was needed
Jaydeep Patil958ddb72016-10-03 07:52:48 -0700252 for (Variable *Var : Variables) {
253 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var)) {
Jim Stichnothbe87b2e2015-09-18 15:43:59 -0700254 Var64On32->initHiLo(this);
Jaydeep Patil958ddb72016-10-03 07:52:48 -0700255 } else if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Var)) {
256 VarVecOn32->initVecElement(this);
257 }
258 }
Jim Stichnothbe87b2e2015-09-18 15:43:59 -0700259
Thomas Livelyaab70992016-06-07 13:54:59 -0700260 // Instrument the Cfg, e.g. with AddressSanitizer
261 if (!BuildDefs::minimal() && getFlags().getSanitizeAddresses()) {
262 getContext()->instrumentFunc(this);
263 dump("Instrumented CFG");
264 }
265
Andrew Scull57e12682015-09-16 11:30:19 -0700266 // The set of translation passes and their order are determined by the
267 // target.
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700268 getTarget()->translate();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700269
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700270 dump("Final output");
Jim Stichnoth318c01b2016-04-03 21:58:03 -0700271 if (getFocusedTiming()) {
Jim Stichnoth2b000fd2016-04-06 06:37:15 -0700272 getContext()->dumpLocalTimers(getFunctionName().toString());
Jim Stichnoth318c01b2016-04-03 21:58:03 -0700273 }
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700274}
275
Eric Holk16f80612016-04-04 17:07:42 -0700276void Cfg::fixPhiNodes() {
277 for (auto *Node : Nodes) {
278 // Fix all the phi edges since WASM can't tell how to make them correctly at
279 // the beginning.
280 assert(Node);
281 const auto &InEdges = Node->getInEdges();
282 for (auto &Instr : Node->getPhis()) {
283 auto *Phi = llvm::cast<InstPhi>(&Instr);
284 assert(Phi);
285 for (SizeT i = 0; i < InEdges.size(); ++i) {
286 Phi->setLabel(i, InEdges[i]);
287 }
288 }
289 }
290}
291
Jim Stichnoth69d3f9c2015-03-23 10:33:38 -0700292void Cfg::computeInOutEdges() {
293 // Compute the out-edges.
Eric Holk085bdae2016-04-18 15:08:19 -0700294 for (CfgNode *Node : Nodes) {
Jim Stichnoth69d3f9c2015-03-23 10:33:38 -0700295 Node->computeSuccessors();
Eric Holk085bdae2016-04-18 15:08:19 -0700296 }
Jim Stichnoth69d3f9c2015-03-23 10:33:38 -0700297
298 // Prune any unreachable nodes before computing in-edges.
299 SizeT NumNodes = getNumNodes();
John Porto36d6aa62016-02-26 07:19:59 -0800300 BitVector Reachable(NumNodes);
301 BitVector Pending(NumNodes);
Jim Stichnoth69d3f9c2015-03-23 10:33:38 -0700302 Pending.set(getEntryNode()->getIndex());
303 while (true) {
304 int Index = Pending.find_first();
305 if (Index == -1)
306 break;
307 Pending.reset(Index);
308 Reachable.set(Index);
309 CfgNode *Node = Nodes[Index];
310 assert(Node->getIndex() == (SizeT)Index);
311 for (CfgNode *Succ : Node->getOutEdges()) {
312 SizeT SuccIndex = Succ->getIndex();
313 if (!Reachable.test(SuccIndex))
314 Pending.set(SuccIndex);
315 }
316 }
317 SizeT Dest = 0;
318 for (SizeT Source = 0; Source < NumNodes; ++Source) {
319 if (Reachable.test(Source)) {
320 Nodes[Dest] = Nodes[Source];
321 Nodes[Dest]->resetIndex(Dest);
322 // Compute the in-edges.
323 Nodes[Dest]->computePredecessors();
324 ++Dest;
325 }
326 }
327 Nodes.resize(Dest);
Jim Stichnoth1aca2302015-09-16 11:25:22 -0700328
329 TimerMarker T(TimerStack::TT_phiValidation, this);
330 for (CfgNode *Node : Nodes)
David Sehr263ac522016-04-04 10:11:08 -0700331 Node->enforcePhiConsistency();
Jim Stichnothf7c9a142014-04-29 10:52:43 -0700332}
333
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700334void Cfg::renumberInstructions() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700335 TimerMarker T(TimerStack::TT_renumberInstructions, this);
Jim Stichnothe5b73e62014-12-15 09:58:51 -0800336 NextInstNumber = Inst::NumberInitial;
Jim Stichnothf44f3712014-10-01 14:05:51 -0700337 for (CfgNode *Node : Nodes)
338 Node->renumberInstructions();
Jim Stichnoth6f7ad6c2016-01-15 09:52:54 -0800339 // Make sure the entry node is the first node and therefore got the lowest
340 // instruction numbers, to facilitate live range computation of function
341 // arguments. We want to model function arguments as being live on entry to
342 // the function, otherwise an argument whose only use is in the first
343 // instruction will be assigned a trivial live range and the register
344 // allocator will not recognize its live range as overlapping another
345 // variable's live range.
346 assert(Nodes.empty() || (*Nodes.begin() == getEntryNode()));
Jim Stichnothd97c7df2014-06-04 11:57:08 -0700347}
348
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700349// placePhiLoads() must be called before placePhiStores().
350void Cfg::placePhiLoads() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700351 TimerMarker T(TimerStack::TT_placePhiLoads, this);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700352 for (CfgNode *Node : Nodes)
353 Node->placePhiLoads();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700354}
355
356// placePhiStores() must be called after placePhiLoads().
357void Cfg::placePhiStores() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700358 TimerMarker T(TimerStack::TT_placePhiStores, this);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700359 for (CfgNode *Node : Nodes)
360 Node->placePhiStores();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700361}
362
363void Cfg::deletePhis() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700364 TimerMarker T(TimerStack::TT_deletePhis, this);
Jim Stichnothf44f3712014-10-01 14:05:51 -0700365 for (CfgNode *Node : Nodes)
366 Node->deletePhis();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -0700367}
368
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700369void Cfg::advancedPhiLowering() {
370 TimerMarker T(TimerStack::TT_advancedPhiLowering, this);
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700371 // Clear all previously computed live ranges (but not live-in/live-out bit
372 // vectors or last-use markers), because the follow-on register allocation is
373 // only concerned with live ranges across the newly created blocks.
374 for (Variable *Var : Variables) {
375 Var->getLiveRange().reset();
376 }
Andrew Scull57e12682015-09-16 11:30:19 -0700377 // This splits edges and appends new nodes to the end of the node list. This
378 // can invalidate iterators, so don't use an iterator.
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700379 SizeT NumNodes = getNumNodes();
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700380 SizeT NumVars = getNumVariables();
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700381 for (SizeT I = 0; I < NumNodes; ++I)
382 Nodes[I]->advancedPhiLowering();
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700383
384 TimerMarker TT(TimerStack::TT_lowerPhiAssignments, this);
385 if (true) {
Andrew Scull57e12682015-09-16 11:30:19 -0700386 // The following code does an in-place update of liveness and live ranges
387 // as a result of adding the new phi edge split nodes.
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700388 getLiveness()->initPhiEdgeSplits(Nodes.begin() + NumNodes,
389 Variables.begin() + NumVars);
390 TimerMarker TTT(TimerStack::TT_liveness, this);
391 // Iterate over the newly added nodes to add their liveness info.
392 for (auto I = Nodes.begin() + NumNodes, E = Nodes.end(); I != E; ++I) {
393 InstNumberT FirstInstNum = getNextInstNumber();
394 (*I)->renumberInstructions();
395 InstNumberT LastInstNum = getNextInstNumber() - 1;
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700396 (*I)->liveness(getLiveness());
397 (*I)->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
398 }
399 } else {
400 // The following code does a brute-force recalculation of live ranges as a
Andrew Scull57e12682015-09-16 11:30:19 -0700401 // result of adding the new phi edge split nodes. The liveness calculation
Jim Stichnotha3f57b92015-07-30 12:46:04 -0700402 // is particularly expensive because the new nodes are not yet in a proper
403 // topological order and so convergence is slower.
404 //
405 // This code is kept here for reference and can be temporarily enabled in
406 // case the incremental code is under suspicion.
407 renumberInstructions();
408 liveness(Liveness_Intervals);
409 getVMetadata()->init(VMK_All);
410 }
411 Target->regAlloc(RAK_Phi);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700412}
413
Andrew Scull57e12682015-09-16 11:30:19 -0700414// Find a reasonable placement for nodes that have not yet been placed, while
415// maintaining the same relative ordering among already placed nodes.
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700416void Cfg::reorderNodes() {
Andrew Scull57e12682015-09-16 11:30:19 -0700417 // TODO(ascull): it would be nice if the switch tests were always followed by
418 // the default case to allow for fall through.
Andrew Scull00741a02015-09-16 19:04:09 -0700419 using PlacedList = CfgList<CfgNode *>;
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700420 PlacedList Placed; // Nodes with relative placement locked down
421 PlacedList Unreachable; // Unreachable nodes
422 PlacedList::iterator NoPlace = Placed.end();
Andrew Scull57e12682015-09-16 11:30:19 -0700423 // Keep track of where each node has been tentatively placed so that we can
424 // manage insertions into the middle.
Andrew Scull00741a02015-09-16 19:04:09 -0700425 CfgVector<PlacedList::iterator> PlaceIndex(Nodes.size(), NoPlace);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700426 for (CfgNode *Node : Nodes) {
Andrew Scull57e12682015-09-16 11:30:19 -0700427 // The "do ... while(0);" construct is to factor out the --PlaceIndex and
428 // assert() statements before moving to the next node.
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700429 do {
Andrew Scull713278a2015-07-22 17:17:02 -0700430 if (Node != getEntryNode() && Node->getInEdges().empty()) {
Andrew Scull57e12682015-09-16 11:30:19 -0700431 // The node has essentially been deleted since it is not a successor of
432 // any other node.
Andrew Scull713278a2015-07-22 17:17:02 -0700433 Unreachable.push_back(Node);
434 PlaceIndex[Node->getIndex()] = Unreachable.end();
435 Node->setNeedsPlacement(false);
436 continue;
437 }
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700438 if (!Node->needsPlacement()) {
439 // Add to the end of the Placed list.
440 Placed.push_back(Node);
441 PlaceIndex[Node->getIndex()] = Placed.end();
442 continue;
443 }
444 Node->setNeedsPlacement(false);
Andrew Scull57e12682015-09-16 11:30:19 -0700445 // Assume for now that the unplaced node is from edge-splitting and
446 // therefore has 1 in-edge and 1 out-edge (actually, possibly more than 1
447 // in-edge if the predecessor node was contracted). If this changes in
448 // the future, rethink the strategy.
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700449 assert(Node->getInEdges().size() >= 1);
Eric Holk16f80612016-04-04 17:07:42 -0700450 assert(Node->hasSingleOutEdge());
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700451
452 // If it's a (non-critical) edge where the successor has a single
453 // in-edge, then place it before the successor.
Jim Stichnothbfb410d2014-11-05 16:04:05 -0800454 CfgNode *Succ = Node->getOutEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700455 if (Succ->getInEdges().size() == 1 &&
456 PlaceIndex[Succ->getIndex()] != NoPlace) {
457 Placed.insert(PlaceIndex[Succ->getIndex()], Node);
458 PlaceIndex[Node->getIndex()] = PlaceIndex[Succ->getIndex()];
459 continue;
460 }
461
462 // Otherwise, place it after the (first) predecessor.
Jim Stichnothbfb410d2014-11-05 16:04:05 -0800463 CfgNode *Pred = Node->getInEdges().front();
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700464 auto PredPosition = PlaceIndex[Pred->getIndex()];
Andrew Scull57e12682015-09-16 11:30:19 -0700465 // It shouldn't be the case that PredPosition==NoPlace, but if that
466 // somehow turns out to be true, we just insert Node before
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700467 // PredPosition=NoPlace=Placed.end() .
468 if (PredPosition != NoPlace)
469 ++PredPosition;
470 Placed.insert(PredPosition, Node);
471 PlaceIndex[Node->getIndex()] = PredPosition;
472 } while (0);
473
474 --PlaceIndex[Node->getIndex()];
475 assert(*PlaceIndex[Node->getIndex()] == Node);
476 }
477
478 // Reorder Nodes according to the built-up lists.
Karl Schimpfac7d7342015-08-06 12:55:23 -0700479 NodeList Reordered;
480 Reordered.reserve(Placed.size() + Unreachable.size());
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700481 for (CfgNode *Node : Placed)
Karl Schimpfac7d7342015-08-06 12:55:23 -0700482 Reordered.push_back(Node);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700483 for (CfgNode *Node : Unreachable)
Karl Schimpfac7d7342015-08-06 12:55:23 -0700484 Reordered.push_back(Node);
485 assert(getNumNodes() == Reordered.size());
486 swapNodes(Reordered);
Jim Stichnoth336f6c42014-10-30 15:01:31 -0700487}
488
Qining Lu969f6a32015-07-31 09:58:34 -0700489namespace {
John Porto36d6aa62016-02-26 07:19:59 -0800490void getRandomPostOrder(CfgNode *Node, BitVector &ToVisit,
Qining Lu969f6a32015-07-31 09:58:34 -0700491 Ice::NodeList &PostOrder,
492 Ice::RandomNumberGenerator *RNG) {
493 assert(ToVisit[Node->getIndex()]);
494 ToVisit[Node->getIndex()] = false;
495 NodeList Outs = Node->getOutEdges();
496 Ice::RandomShuffle(Outs.begin(), Outs.end(),
497 [RNG](int N) { return RNG->next(N); });
498 for (CfgNode *Next : Outs) {
499 if (ToVisit[Next->getIndex()])
500 getRandomPostOrder(Next, ToVisit, PostOrder, RNG);
501 }
502 PostOrder.push_back(Node);
503}
504} // end of anonymous namespace
505
506void Cfg::shuffleNodes() {
Karl Schimpfd4699942016-04-02 09:55:31 -0700507 if (!getFlags().getReorderBasicBlocks())
Qining Lu969f6a32015-07-31 09:58:34 -0700508 return;
509
510 NodeList ReversedReachable;
511 NodeList Unreachable;
John Porto36d6aa62016-02-26 07:19:59 -0800512 BitVector ToVisit(Nodes.size(), true);
Qining Luaee5fa82015-08-20 14:59:03 -0700513 // Create Random number generator for function reordering
Karl Schimpfd4699942016-04-02 09:55:31 -0700514 RandomNumberGenerator RNG(getFlags().getRandomSeed(),
Qining Luaee5fa82015-08-20 14:59:03 -0700515 RPE_BasicBlockReordering, SequenceNumber);
Qining Lu969f6a32015-07-31 09:58:34 -0700516 // Traverse from entry node.
Qining Luaee5fa82015-08-20 14:59:03 -0700517 getRandomPostOrder(getEntryNode(), ToVisit, ReversedReachable, &RNG);
Qining Lu969f6a32015-07-31 09:58:34 -0700518 // Collect the unreachable nodes.
519 for (CfgNode *Node : Nodes)
520 if (ToVisit[Node->getIndex()])
521 Unreachable.push_back(Node);
522 // Copy the layout list to the Nodes.
Karl Schimpfac7d7342015-08-06 12:55:23 -0700523 NodeList Shuffled;
524 Shuffled.reserve(ReversedReachable.size() + Unreachable.size());
Qining Lu969f6a32015-07-31 09:58:34 -0700525 for (CfgNode *Node : reverse_range(ReversedReachable))
Karl Schimpfac7d7342015-08-06 12:55:23 -0700526 Shuffled.push_back(Node);
527 for (CfgNode *Node : Unreachable)
528 Shuffled.push_back(Node);
529 assert(Nodes.size() == Shuffled.size());
530 swapNodes(Shuffled);
Qining Lu969f6a32015-07-31 09:58:34 -0700531
532 dump("After basic block shuffling");
533}
534
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700535void Cfg::localCSE(bool AssumeSSA) {
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700536 // Performs basic-block local common-subexpression elimination
537 // If we have
538 // t1 = op b c
539 // t2 = op b c
540 // This pass will replace future references to t2 in a basic block by t1
541 // Points to note:
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700542 // 1. Assumes SSA by default. To change this, use -lcse=no-ssa
543 // This is needed if this pass is moved to a point later in the pipeline.
544 // If variables have a single definition (in the node), CSE can work just
545 // on the basis of an equality compare on instructions (sans Dest). When
546 // variables can be updated (hence, non-SSA) the result of a previous
547 // instruction which used that variable as an operand can not be reused.
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700548 // 2. Leaves removal of instructions to DCE.
549 // 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected
550 // to take care of cases not arising from GEP simplification.
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700551 // 4. By default, a single pass is made over each basic block. Control this
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700552 // with -lcse-max-iters=N
553
554 TimerMarker T(TimerStack::TT_localCse, this);
555 struct VariableHash {
556 size_t operator()(const Variable *Var) const { return Var->hashValue(); }
557 };
558
559 struct InstHash {
560 size_t operator()(const Inst *Instr) const {
561 auto Kind = Instr->getKind();
562 auto Result =
563 std::hash<typename std::underlying_type<Inst::InstKind>::type>()(
564 Kind);
565 for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
566 Result ^= Instr->getSrc(i)->hashValue();
567 }
568 return Result;
569 }
570 };
571 struct InstEq {
572 bool srcEq(const Operand *A, const Operand *B) const {
573 if (llvm::isa<Variable>(A) || llvm::isa<Constant>(A))
574 return (A == B);
575 return false;
576 }
577 bool operator()(const Inst *InstrA, const Inst *InstrB) const {
578 if ((InstrA->getKind() != InstrB->getKind()) ||
579 (InstrA->getSrcSize() != InstrB->getSrcSize()))
580 return false;
581
582 if (auto *A = llvm::dyn_cast<InstArithmetic>(InstrA)) {
583 auto *B = llvm::cast<InstArithmetic>(InstrB);
584 // A, B are guaranteed to be of the same 'kind' at this point
585 // So, dyn_cast is not needed
586 if (A->getOp() != B->getOp())
587 return false;
588 }
589 // Does not enter loop if different kind or number of operands
590 for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) {
591 if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i)))
592 return false;
593 }
594 return true;
595 }
596 };
597
598 for (CfgNode *Node : getNodes()) {
599 CfgUnorderedSet<Inst *, InstHash, InstEq> Seen;
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700600 // Stores currently available instructions.
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700601
602 CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements;
603 // Combining the above two into a single data structure might consume less
604 // memory but will be slower i.e map of Instruction -> Set of Variables
605
606 CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency;
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700607 // Maps a variable to the Instructions that depend on it.
608 // a = op1 b c
609 // x = op2 c d
610 // Will result in the map : b -> {a}, c -> {a, x}, d -> {x}
611 // Not necessary for SSA as dependencies will never be invalidated, and the
612 // container will use minimal memory when left unused.
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700613
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700614 auto IterCount = getFlags().getLocalCseMaxIterations();
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700615
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700616 for (uint32_t i = 0; i < IterCount; ++i) {
617 // TODO(manasijm): Stats on IterCount -> performance
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700618 for (Inst &Instr : Node->getInsts()) {
619 if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr))
620 continue;
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700621 if (!AssumeSSA) {
622 // Invalidate replacements
623 auto Iter = Replacements.find(Instr.getDest());
624 if (Iter != Replacements.end()) {
625 Replacements.erase(Iter);
626 }
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700627
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700628 // Invalidate 'seen' instructions whose operands were just updated.
629 auto DepIter = Dependency.find(Instr.getDest());
630 if (DepIter != Dependency.end()) {
631 for (auto *DepInst : DepIter->second) {
632 Seen.erase(DepInst);
633 }
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700634 }
635 }
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700636
637 // Replace - doing this before checking for repetitions might enable
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700638 // more optimizations
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700639 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
640 auto *Opnd = Instr.getSrc(i);
641 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
642 if (Replacements.find(Var) != Replacements.end()) {
643 Instr.replaceSource(i, Replacements[Var]);
644 }
645 }
646 }
647
648 // Check for repetitions
649 auto SeenIter = Seen.find(&Instr);
650 if (SeenIter != Seen.end()) { // seen before
651 const Inst *Found = *SeenIter;
652 Replacements[Instr.getDest()] = Found->getDest();
653 } else { // new
654 Seen.insert(&Instr);
655
Manasij Mukherjee53c8fbd2016-08-01 15:40:42 -0700656 if (!AssumeSSA) {
657 // Update dependencies
658 for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
659 auto *Opnd = Instr.getSrc(i);
660 if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
661 Dependency[Var].push_back(&Instr);
662 }
Manasij Mukherjee032c3152016-05-24 14:25:04 -0700663 }
664 }
665 }
666 }
667 }
668 }
669}
670
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700671void Cfg::loopInvariantCodeMotion() {
672 TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this);
673 // Does not introduce new nodes as of now.
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -0700674 for (auto &Loop : LoopInfo) {
675 CfgNode *Header = Loop.Header;
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700676 assert(Header);
677 if (Header->getLoopNestDepth() < 1)
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -0700678 return;
679 CfgNode *PreHeader = Loop.PreHeader;
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700680 if (PreHeader == nullptr || PreHeader->getInsts().size() == 0) {
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -0700681 return; // try next loop
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700682 }
683
684 auto &Insts = PreHeader->getInsts();
685 auto &LastInst = Insts.back();
686 Insts.pop_back();
687
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -0700688 for (auto *Inst : findLoopInvariantInstructions(Loop.Body)) {
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700689 PreHeader->appendInst(Inst);
690 }
691 PreHeader->appendInst(&LastInst);
692 }
693}
694
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -0700695CfgVector<Inst *>
696Cfg::findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> &Body) {
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700697 CfgUnorderedSet<Inst *> InvariantInsts;
698 CfgUnorderedSet<Variable *> InvariantVars;
699 for (auto *Var : getArgs()) {
700 InvariantVars.insert(Var);
701 }
702 bool Changed = false;
703 do {
704 Changed = false;
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -0700705 for (auto NodeIndex : Body) {
Manasij Mukherjeef47d5202016-07-12 16:59:17 -0700706 auto *Node = Nodes[NodeIndex];
707 CfgVector<std::reference_wrapper<Inst>> Insts(Node->getInsts().begin(),
708 Node->getInsts().end());
709
710 for (auto &InstRef : Insts) {
711 auto &Inst = InstRef.get();
712 if (Inst.isDeleted() ||
713 InvariantInsts.find(&Inst) != InvariantInsts.end())
714 continue;
715 switch (Inst.getKind()) {
716 case Inst::InstKind::Alloca:
717 case Inst::InstKind::Br:
718 case Inst::InstKind::Ret:
719 case Inst::InstKind::Phi:
720 case Inst::InstKind::Call:
721 case Inst::InstKind::IntrinsicCall:
722 case Inst::InstKind::Load:
723 case Inst::InstKind::Store:
724 case Inst::InstKind::Switch:
725 continue;
726 default:
727 break;
728 }
729
730 bool IsInvariant = true;
731 for (SizeT i = 0; i < Inst.getSrcSize(); ++i) {
732 if (auto *Var = llvm::dyn_cast<Variable>(Inst.getSrc(i))) {
733 if (InvariantVars.find(Var) == InvariantVars.end()) {
734 IsInvariant = false;
735 }
736 }
737 }
738 if (IsInvariant) {
739 Changed = true;
740 InvariantInsts.insert(&Inst);
741 Node->getInsts().remove(Inst);
742 if (Inst.getDest() != nullptr) {
743 InvariantVars.insert(Inst.getDest());
744 }
745 }
746 }
747 }
748 } while (Changed);
749
750 CfgVector<Inst *> InstVector(InvariantInsts.begin(), InvariantInsts.end());
751 std::sort(InstVector.begin(), InstVector.end(),
752 [](Inst *A, Inst *B) { return A->getNumber() < B->getNumber(); });
753 return InstVector;
754}
755
Manasij Mukherjee45f51a22016-06-27 16:12:37 -0700756void Cfg::shortCircuitJumps() {
757 // Split Nodes whenever an early jump is possible.
758 // __N :
759 // a = <something>
760 // Instruction 1 without side effect
761 // ... b = <something> ...
762 // Instruction N without side effect
763 // t1 = or a b
764 // br t1 __X __Y
765 //
766 // is transformed into:
767 // __N :
768 // a = <something>
769 // br a __X __N_ext
770 //
771 // __N_ext :
772 // Instruction 1 without side effect
773 // ... b = <something> ...
774 // Instruction N without side effect
775 // br b __X __Y
776 // (Similar logic for AND, jump to false instead of true target.)
777
778 TimerMarker T(TimerStack::TT_shortCircuit, this);
779 getVMetadata()->init(VMK_Uses);
780 auto NodeStack = this->getNodes();
781 CfgUnorderedMap<SizeT, CfgVector<CfgNode *>> Splits;
782 while (!NodeStack.empty()) {
783 auto *Node = NodeStack.back();
784 NodeStack.pop_back();
785 auto NewNode = Node->shortCircuit();
786 if (NewNode) {
787 NodeStack.push_back(NewNode);
788 NodeStack.push_back(Node);
789 Splits[Node->getIndex()].push_back(NewNode);
790 }
791 }
792
793 // Insert nodes in the right place
794 NodeList NewList;
795 NewList.reserve(Nodes.size());
796 CfgUnorderedSet<SizeT> Inserted;
797 for (auto *Node : Nodes) {
798 if (Inserted.find(Node->getIndex()) != Inserted.end())
799 continue; // already inserted
800 NodeList Stack{Node};
801 while (!Stack.empty()) {
802 auto *Current = Stack.back();
803 Stack.pop_back();
804 Inserted.insert(Current->getIndex());
805 NewList.push_back(Current);
806 for (auto *Next : Splits[Current->getIndex()]) {
807 Stack.push_back(Next);
808 }
809 }
810 }
811
812 SizeT NodeIndex = 0;
813 for (auto *Node : NewList) {
814 Node->resetIndex(NodeIndex++);
815 }
816 Nodes = NewList;
817}
818
Manasij Mukherjee5bcc6ca2016-08-04 14:24:58 -0700819void Cfg::floatConstantCSE() {
820 // Load multiple uses of a floating point constant (between two call
821 // instructions or block start/end) into a variable before its first use.
822 // t1 = b + 1.0
823 // t2 = c + 1.0
824 // Gets transformed to:
825 // t0 = 1.0
826 // t0_1 = t0
827 // t1 = b + t0_1
828 // t2 = c + t0_1
829 // Call instructions reset the procedure, but use the same variable, just in
830 // case it got a register. We are assuming floating point registers are not
831 // callee saved in general. Example, continuing from before:
832 // result = call <some function>
833 // t3 = d + 1.0
834 // Gets transformed to:
835 // result = call <some function>
836 // t0_2 = t0
837 // t3 = d + t0_2
838 // TODO(manasijm, stichnot): Figure out how to 'link' t0 to the stack slot of
839 // 1.0. When t0 does not get a register, introducing an extra assignment
840 // statement does not make sense. The relevant portion is marked below.
841
842 TimerMarker _(TimerStack::TT_floatConstantCse, this);
843 for (CfgNode *Node : getNodes()) {
844
845 CfgUnorderedMap<Constant *, Variable *> ConstCache;
846 auto Current = Node->getInsts().begin();
847 auto End = Node->getInsts().end();
848 while (Current != End) {
Jim Stichnothefdf4122016-08-17 09:12:52 -0700849 CfgUnorderedMap<Constant *, CfgVector<InstList::iterator>> FloatUses;
Manasij Mukherjee5bcc6ca2016-08-04 14:24:58 -0700850 if (llvm::isa<InstCall>(iteratorToInst(Current))) {
851 ++Current;
852 assert(Current != End);
853 // Block should not end with a call
854 }
855 while (Current != End && !llvm::isa<InstCall>(iteratorToInst(Current))) {
Nicolas Capense1e17832016-11-24 23:38:33 -0500856 if (!Current->isDeleted()) {
857 for (SizeT i = 0; i < Current->getSrcSize(); ++i) {
858 if (auto *Const = llvm::dyn_cast<Constant>(Current->getSrc(i))) {
859 if (Const->getType() == IceType_f32 ||
860 Const->getType() == IceType_f64) {
861 FloatUses[Const].push_back(Current);
862 }
Manasij Mukherjee5bcc6ca2016-08-04 14:24:58 -0700863 }
864 }
865 }
Nicolas Capense1e17832016-11-24 23:38:33 -0500866 ++Current;
Manasij Mukherjee5bcc6ca2016-08-04 14:24:58 -0700867 }
868 for (auto &Pair : FloatUses) {
869 static constexpr SizeT MinUseThreshold = 3;
870 if (Pair.second.size() < MinUseThreshold)
871 continue;
872 // Only consider constants with at least `MinUseThreshold` uses
873 auto &Insts = Node->getInsts();
874
875 if (ConstCache.find(Pair.first) == ConstCache.end()) {
876 // Saw a constant (which is used at least twice) for the first time
877 auto *NewVar = makeVariable(Pair.first->getType());
878 // NewVar->setLinkedTo(Pair.first);
879 // TODO(manasijm): Plumbing for linking to an Operand.
880 auto *Assign = InstAssign::create(Node->getCfg(), NewVar, Pair.first);
881 Insts.insert(Pair.second[0], Assign);
882 ConstCache[Pair.first] = NewVar;
883 }
884
885 auto *NewVar = makeVariable(Pair.first->getType());
886 NewVar->setLinkedTo(ConstCache[Pair.first]);
887 auto *Assign =
888 InstAssign::create(Node->getCfg(), NewVar, ConstCache[Pair.first]);
889
890 Insts.insert(Pair.second[0], Assign);
Jim Stichnothefdf4122016-08-17 09:12:52 -0700891 for (auto InstUse : Pair.second) {
Manasij Mukherjee5bcc6ca2016-08-04 14:24:58 -0700892 for (SizeT i = 0; i < InstUse->getSrcSize(); ++i) {
893 if (auto *Const = llvm::dyn_cast<Constant>(InstUse->getSrc(i))) {
894 if (Const == Pair.first) {
895 InstUse->replaceSource(i, NewVar);
896 }
897 }
898 }
899 }
900 }
901 }
902 }
903}
904
Matt Wala45a06232014-07-09 16:33:22 -0700905void Cfg::doArgLowering() {
Jim Stichnoth8363a062014-10-07 10:02:38 -0700906 TimerMarker T(TimerStack::TT_doArgLowering, this);
Matt Wala45a06232014-07-09 16:33:22 -0700907 getTarget()->lowerArguments();
908}
909
Thomas Lively1fd80c72016-06-27 14:47:21 -0700910void Cfg::sortAndCombineAllocas(CfgVector<InstAlloca *> &Allocas,
David Sehr4318a412015-11-11 15:01:55 -0800911 uint32_t CombinedAlignment, InstList &Insts,
912 AllocaBaseVariableType BaseVariableType) {
David Sehre39d0ca2015-11-06 11:25:41 -0800913 if (Allocas.empty())
914 return;
David Sehr4318a412015-11-11 15:01:55 -0800915 // Sort by decreasing alignment.
Thomas Lively1fd80c72016-06-27 14:47:21 -0700916 std::sort(Allocas.begin(), Allocas.end(), [](InstAlloca *A1, InstAlloca *A2) {
917 uint32_t Align1 = A1->getAlignInBytes();
918 uint32_t Align2 = A2->getAlignInBytes();
919 if (Align1 == Align2)
Thomas Livelycbd3dbc2016-08-09 15:02:35 -0700920 return A1->getNumber() < A2->getNumber();
Thomas Lively1fd80c72016-06-27 14:47:21 -0700921 else
922 return Align1 > Align2;
Jim Stichnothc59288b2015-11-09 11:38:40 -0800923 });
David Sehr4318a412015-11-11 15:01:55 -0800924 // Process the allocas in order of decreasing stack alignment. This allows
925 // us to pack less-aligned pieces after more-aligned ones, resulting in less
926 // stack growth. It also allows there to be at most one stack alignment "and"
927 // instruction for a whole list of allocas.
928 uint32_t CurrentOffset = 0;
929 CfgVector<int32_t> Offsets;
Jim Stichnothc59288b2015-11-09 11:38:40 -0800930 for (Inst *Instr : Allocas) {
David Sehre39d0ca2015-11-06 11:25:41 -0800931 auto *Alloca = llvm::cast<InstAlloca>(Instr);
David Sehr4318a412015-11-11 15:01:55 -0800932 // Adjust the size of the allocation up to the next multiple of the
933 // object's alignment.
934 uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
935 auto *ConstSize =
936 llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
937 uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
938 if (BaseVariableType == BVT_FramePointer) {
939 // Addressing is relative to the frame pointer. Subtract the offset after
940 // adding the size of the alloca, because it grows downwards from the
941 // frame pointer.
Stefan Maksimovic298d14e2017-01-11 05:58:27 -0800942 Offsets.push_back(Target->getFramePointerOffset(CurrentOffset, Size));
David Sehr4318a412015-11-11 15:01:55 -0800943 } else {
944 // Addressing is relative to the stack pointer or to a user pointer. Add
945 // the offset before adding the size of the object, because it grows
John Porto614140e2015-11-23 11:43:13 -0800946 // upwards from the stack pointer. In addition, if the addressing is
947 // relative to the stack pointer, we need to add the pre-computed max out
948 // args size bytes.
949 const uint32_t OutArgsOffsetOrZero =
950 (BaseVariableType == BVT_StackPointer)
951 ? getTarget()->maxOutArgsSizeBytes()
952 : 0;
953 Offsets.push_back(CurrentOffset + OutArgsOffsetOrZero);
David Sehr4318a412015-11-11 15:01:55 -0800954 }
955 // Update the running offset of the fused alloca region.
956 CurrentOffset += Size;
957 }
958 // Round the offset up to the alignment granularity to use as the size.
959 uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
960 // Ensure every alloca was assigned an offset.
961 assert(Allocas.size() == Offsets.size());
David Sehr2f3b8ec2015-11-16 16:51:39 -0800962
963 switch (BaseVariableType) {
964 case BVT_UserPointer: {
965 Variable *BaseVariable = makeVariable(IceType_i32);
966 for (SizeT i = 0; i < Allocas.size(); ++i) {
967 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
David Sehr4318a412015-11-11 15:01:55 -0800968 // Emit a new addition operation to replace the alloca.
969 Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
970 InstArithmetic *Add =
971 InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
972 BaseVariable, AllocaOffset);
973 Insts.push_front(Add);
David Sehr2f3b8ec2015-11-16 16:51:39 -0800974 Alloca->setDeleted();
975 }
976 Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
977 InstAlloca *CombinedAlloca =
978 InstAlloca::create(this, BaseVariable, AllocaSize, CombinedAlignment);
979 CombinedAlloca->setKnownFrameOffset();
980 Insts.push_front(CombinedAlloca);
981 } break;
982 case BVT_StackPointer:
983 case BVT_FramePointer: {
984 for (SizeT i = 0; i < Allocas.size(); ++i) {
985 auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
David Sehr4318a412015-11-11 15:01:55 -0800986 // Emit a fake definition of the rematerializable variable.
987 Variable *Dest = Alloca->getDest();
Jim Stichnoth54f3d512015-12-11 09:53:00 -0800988 auto *Def = InstFakeDef::create(this, Dest);
David Sehr2f3b8ec2015-11-16 16:51:39 -0800989 if (BaseVariableType == BVT_StackPointer)
990 Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
991 else
992 Dest->setRematerializable(getTarget()->getFrameReg(), Offsets[i]);
David Sehr4318a412015-11-11 15:01:55 -0800993 Insts.push_front(Def);
David Sehr2f3b8ec2015-11-16 16:51:39 -0800994 Alloca->setDeleted();
David Sehr4318a412015-11-11 15:01:55 -0800995 }
David Sehr2f3b8ec2015-11-16 16:51:39 -0800996 // Allocate the fixed area in the function prolog.
997 getTarget()->reserveFixedAllocaArea(TotalSize, CombinedAlignment);
David Sehr4318a412015-11-11 15:01:55 -0800998 } break;
David Sehr4318a412015-11-11 15:01:55 -0800999 }
David Sehre39d0ca2015-11-06 11:25:41 -08001000}
1001
David Sehr4318a412015-11-11 15:01:55 -08001002void Cfg::processAllocas(bool SortAndCombine) {
Jim Stichnothb88d8c82016-03-11 15:33:00 -08001003 TimerMarker _(TimerStack::TT_alloca, this);
David Sehre39d0ca2015-11-06 11:25:41 -08001004 const uint32_t StackAlignment = getTarget()->getStackAlignment();
1005 CfgNode *EntryNode = getEntryNode();
Eric Holk67c7c412016-04-15 13:05:37 -07001006 assert(EntryNode);
David Sehre39d0ca2015-11-06 11:25:41 -08001007 // LLVM enforces power of 2 alignment.
1008 assert(llvm::isPowerOf2_32(StackAlignment));
Nicolas Capens4e679e52017-01-12 17:01:06 -05001009 // If the ABI's stack alignment is smaller than the vector size (16 bytes),
1010 // conservatively use a frame pointer to allow for explicit alignment of the
1011 // stack pointer. This needs to happen before register allocation so the frame
1012 // pointer can be reserved.
1013 if (getTarget()->needsStackPointerAlignment()) {
1014 getTarget()->setHasFramePointer();
1015 }
David Sehr4318a412015-11-11 15:01:55 -08001016 // Determine if there are large alignment allocations in the entry block or
1017 // dynamic allocations (variable size in the entry block).
1018 bool HasLargeAlignment = false;
1019 bool HasDynamicAllocation = false;
David Sehre39d0ca2015-11-06 11:25:41 -08001020 for (Inst &Instr : EntryNode->getInsts()) {
Thomas Lively1fd80c72016-06-27 14:47:21 -07001021 if (Instr.isDeleted())
1022 continue;
David Sehre39d0ca2015-11-06 11:25:41 -08001023 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
David Sehre39d0ca2015-11-06 11:25:41 -08001024 uint32_t AlignmentParam = Alloca->getAlignInBytes();
David Sehr4318a412015-11-11 15:01:55 -08001025 if (AlignmentParam > StackAlignment)
1026 HasLargeAlignment = true;
1027 if (llvm::isa<Constant>(Alloca->getSizeInBytes()))
1028 Alloca->setKnownFrameOffset();
1029 else {
1030 HasDynamicAllocation = true;
1031 // If Allocas are not sorted, the first dynamic allocation causes
1032 // later Allocas to be at unknown offsets relative to the stack/frame.
1033 if (!SortAndCombine)
1034 break;
1035 }
David Sehre39d0ca2015-11-06 11:25:41 -08001036 }
1037 }
David Sehr4318a412015-11-11 15:01:55 -08001038 // Don't do the heavyweight sorting and layout for low optimization levels.
1039 if (!SortAndCombine)
1040 return;
1041 // Any alloca outside the entry block is a dynamic allocation.
David Sehre39d0ca2015-11-06 11:25:41 -08001042 for (CfgNode *Node : Nodes) {
1043 if (Node == EntryNode)
1044 continue;
1045 for (Inst &Instr : Node->getInsts()) {
Thomas Lively1fd80c72016-06-27 14:47:21 -07001046 if (Instr.isDeleted())
1047 continue;
David Sehre39d0ca2015-11-06 11:25:41 -08001048 if (llvm::isa<InstAlloca>(&Instr)) {
1049 // Allocations outside the entry block require a frame pointer.
David Sehr4318a412015-11-11 15:01:55 -08001050 HasDynamicAllocation = true;
David Sehre39d0ca2015-11-06 11:25:41 -08001051 break;
1052 }
1053 }
David Sehr4318a412015-11-11 15:01:55 -08001054 if (HasDynamicAllocation)
David Sehre39d0ca2015-11-06 11:25:41 -08001055 break;
1056 }
1057 // Mark the target as requiring a frame pointer.
David Sehr4318a412015-11-11 15:01:55 -08001058 if (HasLargeAlignment || HasDynamicAllocation)
David Sehre39d0ca2015-11-06 11:25:41 -08001059 getTarget()->setHasFramePointer();
David Sehr4318a412015-11-11 15:01:55 -08001060 // Collect the Allocas into the two vectors.
1061 // Allocas in the entry block that have constant size and alignment less
1062 // than or equal to the function's stack alignment.
Thomas Lively1fd80c72016-06-27 14:47:21 -07001063 CfgVector<InstAlloca *> FixedAllocas;
David Sehr4318a412015-11-11 15:01:55 -08001064 // Allocas in the entry block that have constant size and alignment greater
1065 // than the function's stack alignment.
Thomas Lively1fd80c72016-06-27 14:47:21 -07001066 CfgVector<InstAlloca *> AlignedAllocas;
David Sehr2f3b8ec2015-11-16 16:51:39 -08001067 // Maximum alignment used by any alloca.
David Sehr4318a412015-11-11 15:01:55 -08001068 uint32_t MaxAlignment = StackAlignment;
1069 for (Inst &Instr : EntryNode->getInsts()) {
Thomas Lively1fd80c72016-06-27 14:47:21 -07001070 if (Instr.isDeleted())
1071 continue;
David Sehr4318a412015-11-11 15:01:55 -08001072 if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
1073 if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
1074 continue;
1075 uint32_t AlignmentParam = Alloca->getAlignInBytes();
1076 // For default align=0, set it to the real value 1, to avoid any
1077 // bit-manipulation problems below.
1078 AlignmentParam = std::max(AlignmentParam, 1u);
1079 assert(llvm::isPowerOf2_32(AlignmentParam));
1080 if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
1081 // If we have both dynamic allocations and large stack alignments,
1082 // high-alignment allocations are pulled out with their own base.
1083 AlignedAllocas.push_back(Alloca);
1084 } else {
1085 FixedAllocas.push_back(Alloca);
1086 }
1087 MaxAlignment = std::max(AlignmentParam, MaxAlignment);
1088 }
1089 }
David Sehre39d0ca2015-11-06 11:25:41 -08001090 // Add instructions to the head of the entry block in reverse order.
1091 InstList &Insts = getEntryNode()->getInsts();
David Sehr4318a412015-11-11 15:01:55 -08001092 if (HasDynamicAllocation && HasLargeAlignment) {
Nicolas Capens4e679e52017-01-12 17:01:06 -05001093 // We are using a frame pointer, but fixed large-alignment alloca addresses
David Sehr4318a412015-11-11 15:01:55 -08001094 // do not have a known offset from either the stack or frame pointer.
1095 // They grow up from a user pointer from an alloca.
1096 sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
David Sehr2f3b8ec2015-11-16 16:51:39 -08001097 // Fixed size allocas are addressed relative to the frame pointer.
1098 sortAndCombineAllocas(FixedAllocas, StackAlignment, Insts,
1099 BVT_FramePointer);
1100 } else {
1101 // Otherwise, fixed size allocas are addressed relative to the stack unless
1102 // there are dynamic allocas.
1103 const AllocaBaseVariableType BasePointerType =
1104 (HasDynamicAllocation ? BVT_FramePointer : BVT_StackPointer);
1105 sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
David Sehr4318a412015-11-11 15:01:55 -08001106 }
Jim Stichnoth3607b6c2015-11-13 14:28:23 -08001107 if (!FixedAllocas.empty() || !AlignedAllocas.empty())
1108 // No use calling findRematerializable() unless there is some
1109 // rematerializable alloca instruction to seed it.
1110 findRematerializable();
1111}
1112
1113namespace {
1114
1115// Helpers for findRematerializable(). For each of them, if a suitable
1116// rematerialization is found, the instruction's Dest variable is set to be
1117// rematerializable and it returns true, otherwise it returns false.
1118
1119bool rematerializeArithmetic(const Inst *Instr) {
1120 // Check that it's an Arithmetic instruction with an Add operation.
1121 auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr);
1122 if (Arith == nullptr || Arith->getOp() != InstArithmetic::Add)
1123 return false;
1124 // Check that Src(0) is rematerializable.
1125 auto *Src0Var = llvm::dyn_cast<Variable>(Arith->getSrc(0));
1126 if (Src0Var == nullptr || !Src0Var->isRematerializable())
1127 return false;
1128 // Check that Src(1) is an immediate.
1129 auto *Src1Imm = llvm::dyn_cast<ConstantInteger32>(Arith->getSrc(1));
1130 if (Src1Imm == nullptr)
1131 return false;
1132 Arith->getDest()->setRematerializable(
1133 Src0Var->getRegNum(), Src0Var->getStackOffset() + Src1Imm->getValue());
1134 return true;
1135}
1136
1137bool rematerializeAssign(const Inst *Instr) {
1138 // An InstAssign only originates from an inttoptr or ptrtoint instruction,
1139 // which never occurs in a MINIMAL build.
1140 if (BuildDefs::minimal())
1141 return false;
1142 // Check that it's an Assign instruction.
1143 if (!llvm::isa<InstAssign>(Instr))
1144 return false;
1145 // Check that Src(0) is rematerializable.
1146 auto *Src0Var = llvm::dyn_cast<Variable>(Instr->getSrc(0));
1147 if (Src0Var == nullptr || !Src0Var->isRematerializable())
1148 return false;
1149 Instr->getDest()->setRematerializable(Src0Var->getRegNum(),
1150 Src0Var->getStackOffset());
1151 return true;
1152}
1153
1154bool rematerializeCast(const Inst *Instr) {
1155 // An pointer-type bitcast never occurs in a MINIMAL build.
1156 if (BuildDefs::minimal())
1157 return false;
1158 // Check that it's a Cast instruction with a Bitcast operation.
1159 auto *Cast = llvm::dyn_cast<InstCast>(Instr);
1160 if (Cast == nullptr || Cast->getCastKind() != InstCast::Bitcast)
1161 return false;
1162 // Check that Src(0) is rematerializable.
1163 auto *Src0Var = llvm::dyn_cast<Variable>(Cast->getSrc(0));
1164 if (Src0Var == nullptr || !Src0Var->isRematerializable())
1165 return false;
1166 // Check that Dest and Src(0) have the same type.
1167 Variable *Dest = Cast->getDest();
1168 if (Dest->getType() != Src0Var->getType())
1169 return false;
1170 Dest->setRematerializable(Src0Var->getRegNum(), Src0Var->getStackOffset());
1171 return true;
1172}
1173
1174} // end of anonymous namespace
1175
1176/// Scan the function to find additional rematerializable variables. This is
1177/// possible when the source operand of an InstAssignment is a rematerializable
1178/// variable, or the same for a pointer-type InstCast::Bitcast, or when an
1179/// InstArithmetic is an add of a rematerializable variable and an immediate.
1180/// Note that InstAssignment instructions and pointer-type InstCast::Bitcast
1181/// instructions generally only come about from the IceConverter's treatment of
1182/// inttoptr, ptrtoint, and bitcast instructions. TODO(stichnot): Consider
1183/// other possibilities, however unlikely, such as InstArithmetic::Sub, or
1184/// commutativity.
1185void Cfg::findRematerializable() {
1186 // Scan the instructions in order, and repeat until no new opportunities are
1187 // found. It may take more than one iteration because a variable's defining
1188 // block may happen to come after a block where it is used, depending on the
1189 // CfgNode linearization order.
1190 bool FoundNewAssignment;
1191 do {
1192 FoundNewAssignment = false;
1193 for (CfgNode *Node : getNodes()) {
1194 // No need to process Phi instructions.
1195 for (Inst &Instr : Node->getInsts()) {
1196 if (Instr.isDeleted())
1197 continue;
1198 Variable *Dest = Instr.getDest();
1199 if (Dest == nullptr || Dest->isRematerializable())
1200 continue;
1201 if (rematerializeArithmetic(&Instr) || rematerializeAssign(&Instr) ||
1202 rematerializeCast(&Instr)) {
1203 FoundNewAssignment = true;
1204 }
1205 }
1206 }
1207 } while (FoundNewAssignment);
David Sehre39d0ca2015-11-06 11:25:41 -08001208}
1209
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001210void Cfg::doAddressOpt() {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001211 TimerMarker T(TimerStack::TT_doAddressOpt, this);
Jim Stichnothf44f3712014-10-01 14:05:51 -07001212 for (CfgNode *Node : Nodes)
1213 Node->doAddressOpt();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001214}
1215
John Portoa47c11c2016-04-21 05:53:42 -07001216namespace {
1217// ShuffleVectorUtils implements helper functions for rematerializing
1218// shufflevector instructions from a sequence of extractelement/insertelement
1219// instructions. It looks for the following pattern:
1220//
1221// %t0 = extractelement A, %n0
1222// %t1 = extractelement B, %n1
1223// %t2 = extractelement C, %n2
1224// ...
1225// %tN = extractelement N, %nN
1226// %d0 = insertelement undef, %t0, 0
1227// %d1 = insertelement %d0, %t1, 1
1228// %d2 = insertelement %d1, %t2, 2
1229// ...
1230// %dest = insertelement %d_N-1, %tN, N
1231//
1232// where N is num_element(typeof(%dest)), and A, B, C, ... N are at most two
1233// distinct variables.
1234namespace ShuffleVectorUtils {
1235// findAllInserts is used when searching for all the insertelements that are
1236// used in a shufflevector operation. This function works recursively, when
1237// invoked with I = i, the function assumes Insts[i] is the last found
1238// insertelement in the chain. The next insertelement insertruction is saved in
1239// Insts[i+1].
1240bool findAllInserts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
1241 CfgVector<const Inst *> *Insts, SizeT I = 0) {
1242 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
1243
1244 if (I > Insts->size()) {
1245 if (Verbose) {
1246 Ctx->getStrDump() << "\tToo many inserts.\n";
1247 }
1248 return false;
1249 }
1250
1251 const auto *LastInsert = Insts->at(I);
1252 assert(llvm::isa<InstInsertElement>(LastInsert));
1253
1254 if (I == Insts->size() - 1) {
1255 // Matching against undef is not really needed because the value in Src(0)
1256 // will be totally overwritten. We still enforce it anyways because the
1257 // PNaCl toolchain generates the bitcode with it.
1258 if (!llvm::isa<ConstantUndef>(LastInsert->getSrc(0))) {
1259 if (Verbose) {
1260 Ctx->getStrDump() << "\tSrc0 is not undef: " << I << " "
1261 << Insts->size();
1262 LastInsert->dump(Func);
1263 Ctx->getStrDump() << "\n";
1264 }
1265 return false;
1266 }
1267
1268 // The following loop ensures that the insertelements are sorted. In theory,
1269 // we could relax this restriction and allow any order. As long as each
1270 // index appears exactly once, this chain is still a candidate for becoming
1271 // a shufflevector. The Insts vector is traversed backwards because the
1272 // instructions are "enqueued" in reverse order.
1273 int32_t ExpectedElement = 0;
1274 for (const auto *I : reverse_range(*Insts)) {
1275 if (llvm::cast<ConstantInteger32>(I->getSrc(2))->getValue() !=
1276 ExpectedElement) {
1277 return false;
1278 }
1279 ++ExpectedElement;
1280 }
1281 return true;
1282 }
1283
1284 const auto *Src0V = llvm::cast<Variable>(LastInsert->getSrc(0));
1285 const auto *Def = VM->getSingleDefinition(Src0V);
1286
1287 // Only optimize if the first operand in
1288 //
1289 // Dest = insertelement A, B, 10
1290 //
1291 // is singly-def'ed.
1292 if (Def == nullptr) {
1293 if (Verbose) {
1294 Ctx->getStrDump() << "\tmulti-def: ";
1295 (*Insts)[I]->dump(Func);
1296 Ctx->getStrDump() << "\n";
1297 }
1298 return false;
1299 }
1300
1301 // We also require the (single) definition to come from an insertelement
1302 // instruction.
1303 if (!llvm::isa<InstInsertElement>(Def)) {
1304 if (Verbose) {
1305 Ctx->getStrDump() << "\tnot insert element: ";
1306 Def->dump(Func);
1307 Ctx->getStrDump() << "\n";
1308 }
1309 return false;
1310 }
1311
1312 // Everything seems fine, so we save Def in Insts, and delegate the decision
1313 // to findAllInserts.
1314 (*Insts)[I + 1] = Def;
1315
1316 return findAllInserts(Func, Ctx, VM, Insts, I + 1);
1317}
1318
1319// insertsLastElement returns true if Insert is inserting an element in the last
1320// position of a vector.
1321bool insertsLastElement(const Inst &Insert) {
1322 const Type DestTy = Insert.getDest()->getType();
1323 assert(isVectorType(DestTy));
1324 const SizeT Elem =
1325 llvm::cast<ConstantInteger32>(Insert.getSrc(2))->getValue();
1326 return Elem == typeNumElements(DestTy) - 1;
1327}
1328
1329// findAllExtracts goes over all the insertelement instructions that are
1330// candidates to be replaced by a shufflevector, and searches for all the
1331// definitions of the elements being inserted. If all of the elements are the
1332// result of an extractelement instruction, and all of the extractelements
1333// operate on at most two different sources, than the instructions can be
1334// replaced by a shufflevector.
1335bool findAllExtracts(Cfg *Func, GlobalContext *Ctx, VariablesMetadata *VM,
1336 const CfgVector<const Inst *> &Insts, Variable **Src0,
1337 Variable **Src1, CfgVector<const Inst *> *Extracts) {
1338 const bool Verbose = BuildDefs::dump() && Func->isVerbose(IceV_ShufMat);
1339
1340 *Src0 = nullptr;
1341 *Src1 = nullptr;
1342 assert(Insts.size() > 0);
1343 for (SizeT I = 0; I < Insts.size(); ++I) {
1344 const auto *Insert = Insts.at(I);
1345 const auto *Src1V = llvm::dyn_cast<Variable>(Insert->getSrc(1));
1346 if (Src1V == nullptr) {
1347 if (Verbose) {
1348 Ctx->getStrDump() << "src(1) is not a variable: ";
1349 Insert->dump(Func);
1350 Ctx->getStrDump() << "\n";
1351 }
1352 return false;
1353 }
1354
1355 const auto *Def = VM->getSingleDefinition(Src1V);
1356 if (Def == nullptr) {
1357 if (Verbose) {
1358 Ctx->getStrDump() << "multi-def src(1): ";
1359 Insert->dump(Func);
1360 Ctx->getStrDump() << "\n";
1361 }
1362 return false;
1363 }
1364
1365 if (!llvm::isa<InstExtractElement>(Def)) {
1366 if (Verbose) {
1367 Ctx->getStrDump() << "not extractelement: ";
1368 Def->dump(Func);
1369 Ctx->getStrDump() << "\n";
1370 }
1371 return false;
1372 }
1373
1374 auto *Src = llvm::cast<Variable>(Def->getSrc(0));
1375 if (*Src0 == nullptr) {
1376 // No sources yet. Save Src to Src0.
1377 *Src0 = Src;
1378 } else if (*Src1 == nullptr) {
1379 // We already have a source, so we might save Src in Src1 -- but only if
1380 // Src0 is not Src.
1381 if (*Src0 != Src) {
1382 *Src1 = Src;
1383 }
1384 } else if (Src != *Src0 && Src != *Src1) {
1385 // More than two sources, so we can't rematerialize the shufflevector
1386 // instruction.
1387 if (Verbose) {
1388 Ctx->getStrDump() << "Can't shuffle more than two sources.\n";
1389 }
1390 return false;
1391 }
1392
1393 (*Extracts)[I] = Def;
1394 }
1395
1396 // We should have seen at least one source operand.
1397 assert(*Src0 != nullptr);
1398
1399 // If a second source was not seen, then we just make Src1 = Src0 to simplify
1400 // things down stream. This should not matter, as all of the indexes in the
1401 // shufflevector instruction will point to Src0.
1402 if (*Src1 == nullptr) {
1403 *Src1 = *Src0;
1404 }
1405
1406 return true;
1407}
1408
1409} // end of namespace ShuffleVectorUtils
1410} // end of anonymous namespace
1411
1412void Cfg::materializeVectorShuffles() {
1413 const bool Verbose = BuildDefs::dump() && isVerbose(IceV_ShufMat);
1414
1415 std::unique_ptr<OstreamLocker> L;
1416 if (Verbose) {
1417 L.reset(new OstreamLocker(getContext()));
1418 getContext()->getStrDump() << "\nShuffle materialization:\n";
1419 }
1420
1421 // MaxVectorElements is the maximum number of elements in the vector types
1422 // handled by Subzero. We use it to create the Inserts and Extracts vectors
1423 // with the appropriate size, thus avoiding resize() calls.
1424 const SizeT MaxVectorElements = typeNumElements(IceType_v16i8);
1425 CfgVector<const Inst *> Inserts(MaxVectorElements);
1426 CfgVector<const Inst *> Extracts(MaxVectorElements);
1427
1428 TimerMarker T(TimerStack::TT_materializeVectorShuffles, this);
1429 for (CfgNode *Node : Nodes) {
1430 for (auto &Instr : Node->getInsts()) {
1431 if (!llvm::isa<InstInsertElement>(Instr)) {
1432 continue;
1433 }
1434 if (!ShuffleVectorUtils::insertsLastElement(Instr)) {
1435 // To avoid wasting time, we only start the pattern match at the last
1436 // insertelement instruction -- and go backwards from there.
1437 continue;
1438 }
1439 if (Verbose) {
1440 getContext()->getStrDump() << "\tCandidate: ";
1441 Instr.dump(this);
1442 getContext()->getStrDump() << "\n";
1443 }
1444 Inserts.resize(typeNumElements(Instr.getDest()->getType()));
1445 Inserts[0] = &Instr;
1446 if (!ShuffleVectorUtils::findAllInserts(this, getContext(),
1447 VMetadata.get(), &Inserts)) {
1448 // If we fail to find a sequence of insertelements, we stop the
1449 // optimization.
1450 if (Verbose) {
1451 getContext()->getStrDump() << "\tFalse alarm.\n";
1452 }
1453 continue;
1454 }
1455 if (Verbose) {
1456 getContext()->getStrDump() << "\tFound the following insertelement: \n";
1457 for (auto *I : reverse_range(Inserts)) {
1458 getContext()->getStrDump() << "\t\t";
1459 I->dump(this);
1460 getContext()->getStrDump() << "\n";
1461 }
1462 }
1463 Extracts.resize(Inserts.size());
1464 Variable *Src0;
1465 Variable *Src1;
1466 if (!ShuffleVectorUtils::findAllExtracts(this, getContext(),
1467 VMetadata.get(), Inserts, &Src0,
1468 &Src1, &Extracts)) {
1469 // If we fail to match the definitions of the insertelements' sources
1470 // with extractelement instructions -- or if those instructions operate
1471 // on more than two different variables -- we stop the optimization.
1472 if (Verbose) {
1473 getContext()->getStrDump() << "\tFailed to match extractelements.\n";
1474 }
1475 continue;
1476 }
1477 if (Verbose) {
1478 getContext()->getStrDump()
1479 << "\tFound the following insert/extract element pairs: \n";
1480 for (SizeT I = 0; I < Inserts.size(); ++I) {
1481 const SizeT Pos = Inserts.size() - I - 1;
1482 getContext()->getStrDump() << "\t\tInsert : ";
1483 Inserts[Pos]->dump(this);
1484 getContext()->getStrDump() << "\n\t\tExtract: ";
1485 Extracts[Pos]->dump(this);
1486 getContext()->getStrDump() << "\n";
1487 }
1488 }
1489
1490 assert(Src0 != nullptr);
1491 assert(Src1 != nullptr);
1492
1493 auto *ShuffleVector =
1494 InstShuffleVector::create(this, Instr.getDest(), Src0, Src1);
1495 assert(ShuffleVector->getSrc(0) == Src0);
1496 assert(ShuffleVector->getSrc(1) == Src1);
1497 for (SizeT I = 0; I < Extracts.size(); ++I) {
1498 const SizeT Pos = Extracts.size() - I - 1;
1499 auto *Index = llvm::cast<ConstantInteger32>(Extracts[Pos]->getSrc(1));
1500 if (Src0 == Extracts[Pos]->getSrc(0)) {
1501 ShuffleVector->addIndex(Index);
1502 } else {
1503 ShuffleVector->addIndex(llvm::cast<ConstantInteger32>(
1504 Ctx->getConstantInt32(Index->getValue() + Extracts.size())));
1505 }
1506 }
1507
1508 if (Verbose) {
1509 getContext()->getStrDump() << "Created: ";
1510 ShuffleVector->dump(this);
1511 getContext()->getStrDump() << "\n";
1512 }
1513
1514 Instr.setDeleted();
1515 auto &LoweringContext = getTarget()->getContext();
Jim Stichnothf5fdd232016-05-09 12:24:36 -07001516 LoweringContext.setInsertPoint(instToIterator(&Instr));
John Portoa47c11c2016-04-21 05:53:42 -07001517 LoweringContext.insert(ShuffleVector);
1518 }
1519 }
1520}
1521
Matt Walac3302742014-08-15 16:21:56 -07001522void Cfg::doNopInsertion() {
Karl Schimpfd4699942016-04-02 09:55:31 -07001523 if (!getFlags().getShouldDoNopInsertion())
Qining Lu969f6a32015-07-31 09:58:34 -07001524 return;
Jim Stichnoth8363a062014-10-07 10:02:38 -07001525 TimerMarker T(TimerStack::TT_doNopInsertion, this);
Karl Schimpfd4699942016-04-02 09:55:31 -07001526 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_NopInsertion,
Qining Luaee5fa82015-08-20 14:59:03 -07001527 SequenceNumber);
Jim Stichnothf44f3712014-10-01 14:05:51 -07001528 for (CfgNode *Node : Nodes)
Qining Luaee5fa82015-08-20 14:59:03 -07001529 Node->doNopInsertion(RNG);
Matt Walac3302742014-08-15 16:21:56 -07001530}
1531
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001532void Cfg::genCode() {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001533 TimerMarker T(TimerStack::TT_genCode, this);
Jim Stichnothf44f3712014-10-01 14:05:51 -07001534 for (CfgNode *Node : Nodes)
1535 Node->genCode();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001536}
1537
1538// Compute the stack frame layout.
1539void Cfg::genFrame() {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001540 TimerMarker T(TimerStack::TT_genFrame, this);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001541 getTarget()->addProlog(Entry);
Jim Stichnothf44f3712014-10-01 14:05:51 -07001542 for (CfgNode *Node : Nodes)
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001543 if (Node->getHasReturn())
1544 getTarget()->addEpilog(Node);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001545}
1546
Manasij Mukherjeef47d5202016-07-12 16:59:17 -07001547void Cfg::generateLoopInfo() {
Andrew Scullaa6c1092015-09-03 17:50:30 -07001548 TimerMarker T(TimerStack::TT_computeLoopNestDepth, this);
Manasij Mukherjeeadf352b2016-07-19 13:31:36 -07001549 LoopInfo = ComputeLoopInfo(this);
Andrew Scullaa6c1092015-09-03 17:50:30 -07001550}
1551
Andrew Scull57e12682015-09-16 11:30:19 -07001552// This is a lightweight version of live-range-end calculation. Marks the last
Andrew Scullaa6c1092015-09-03 17:50:30 -07001553// use of only those variables whose definition and uses are completely with a
Andrew Scull57e12682015-09-16 11:30:19 -07001554// single block. It is a quick single pass and doesn't need to iterate until
Andrew Scullaa6c1092015-09-03 17:50:30 -07001555// convergence.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001556void Cfg::livenessLightweight() {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001557 TimerMarker T(TimerStack::TT_livenessLightweight, this);
Jim Stichnoth877b04e2014-10-15 15:13:06 -07001558 getVMetadata()->init(VMK_Uses);
Jim Stichnothf44f3712014-10-01 14:05:51 -07001559 for (CfgNode *Node : Nodes)
1560 Node->livenessLightweight();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001561}
1562
1563void Cfg::liveness(LivenessMode Mode) {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001564 TimerMarker T(TimerStack::TT_liveness, this);
John Porto7bb9cab2016-04-01 05:43:09 -07001565 // Destroying the previous (if any) Liveness information clears the Liveness
1566 // allocator TLS pointer.
1567 Live = nullptr;
1568 Live = Liveness::create(this, Mode);
1569
Jim Stichnoth877b04e2014-10-15 15:13:06 -07001570 getVMetadata()->init(VMK_Uses);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001571 Live->init();
John Porto7bb9cab2016-04-01 05:43:09 -07001572
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001573 // Initialize with all nodes needing to be processed.
John Porto36d6aa62016-02-26 07:19:59 -08001574 BitVector NeedToProcess(Nodes.size(), true);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001575 while (NeedToProcess.any()) {
1576 // Iterate in reverse topological order to speed up convergence.
Jim Stichnoth7e571362015-01-09 11:43:26 -08001577 for (CfgNode *Node : reverse_range(Nodes)) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001578 if (NeedToProcess[Node->getIndex()]) {
1579 NeedToProcess[Node->getIndex()] = false;
1580 bool Changed = Node->liveness(getLiveness());
1581 if (Changed) {
1582 // If the beginning-of-block liveness changed since the last
1583 // iteration, mark all in-edges as needing to be processed.
Jim Stichnothf44f3712014-10-01 14:05:51 -07001584 for (CfgNode *Pred : Node->getInEdges())
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001585 NeedToProcess[Pred->getIndex()] = true;
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001586 }
1587 }
1588 }
1589 }
1590 if (Mode == Liveness_Intervals) {
1591 // Reset each variable's live range.
Jim Stichnothf44f3712014-10-01 14:05:51 -07001592 for (Variable *Var : Variables)
1593 Var->resetLiveRange();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001594 }
Andrew Scull57e12682015-09-16 11:30:19 -07001595 // Make a final pass over each node to delete dead instructions, collect the
1596 // first and last instruction numbers, and add live range segments for that
1597 // node.
Jim Stichnothe5b73e62014-12-15 09:58:51 -08001598 for (CfgNode *Node : Nodes) {
1599 InstNumberT FirstInstNum = Inst::NumberSentinel;
1600 InstNumberT LastInstNum = Inst::NumberSentinel;
Jim Stichnoth29841e82014-12-23 12:26:24 -08001601 for (Inst &I : Node->getPhis()) {
1602 I.deleteIfDead();
1603 if (Mode == Liveness_Intervals && !I.isDeleted()) {
Jim Stichnothe5b73e62014-12-15 09:58:51 -08001604 if (FirstInstNum == Inst::NumberSentinel)
Jim Stichnoth29841e82014-12-23 12:26:24 -08001605 FirstInstNum = I.getNumber();
1606 assert(I.getNumber() > LastInstNum);
1607 LastInstNum = I.getNumber();
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001608 }
Jim Stichnothe5b73e62014-12-15 09:58:51 -08001609 }
Jim Stichnoth29841e82014-12-23 12:26:24 -08001610 for (Inst &I : Node->getInsts()) {
1611 I.deleteIfDead();
1612 if (Mode == Liveness_Intervals && !I.isDeleted()) {
Jim Stichnothe5b73e62014-12-15 09:58:51 -08001613 if (FirstInstNum == Inst::NumberSentinel)
Jim Stichnoth29841e82014-12-23 12:26:24 -08001614 FirstInstNum = I.getNumber();
1615 assert(I.getNumber() > LastInstNum);
1616 LastInstNum = I.getNumber();
Jim Stichnothe5b73e62014-12-15 09:58:51 -08001617 }
1618 }
1619 if (Mode == Liveness_Intervals) {
Andrew Scull57e12682015-09-16 11:30:19 -07001620 // Special treatment for live in-args. Their liveness needs to extend
1621 // beyond the beginning of the function, otherwise an arg whose only use
1622 // is in the first instruction will end up having the trivial live range
1623 // [2,2) and will *not* interfere with other arguments. So if the first
1624 // instruction of the method is "r=arg1+arg2", both args may be assigned
1625 // the same register. This is accomplished by extending the entry block's
1626 // instruction range from [2,n) to [1,n) which will transform the
Jim Stichnoth6f7ad6c2016-01-15 09:52:54 -08001627 // problematic [2,2) live ranges into [1,2). This extension works because
1628 // the entry node is guaranteed to have the lowest instruction numbers.
Jim Stichnothe4f65d82015-06-17 22:16:02 -07001629 if (Node == getEntryNode()) {
Jim Stichnothe5b73e62014-12-15 09:58:51 -08001630 FirstInstNum = Inst::NumberExtended;
Jim Stichnoth6f7ad6c2016-01-15 09:52:54 -08001631 // Just in case the entry node somehow contains no instructions...
1632 if (LastInstNum == Inst::NumberSentinel)
1633 LastInstNum = FirstInstNum;
Jim Stichnothe4f65d82015-06-17 22:16:02 -07001634 }
Jim Stichnoth6f7ad6c2016-01-15 09:52:54 -08001635 // If this node somehow contains no instructions, don't bother trying to
1636 // add liveness intervals for it, because variables that are live-in and
1637 // live-out will have a bogus interval added.
1638 if (FirstInstNum != Inst::NumberSentinel)
1639 Node->livenessAddIntervals(getLiveness(), FirstInstNum, LastInstNum);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001640 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001641 }
1642}
1643
Andrew Scull57e12682015-09-16 11:30:19 -07001644// Traverse every Variable of every Inst and verify that it appears within the
1645// Variable's computed live range.
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001646bool Cfg::validateLiveness() const {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001647 TimerMarker T(TimerStack::TT_validateLiveness, this);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001648 bool Valid = true;
Jim Stichnothe4a8f402015-01-20 12:52:51 -08001649 OstreamLocker L(Ctx);
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001650 Ostream &Str = Ctx->getStrDump();
Jim Stichnothf44f3712014-10-01 14:05:51 -07001651 for (CfgNode *Node : Nodes) {
Jim Stichnothae953202014-12-20 06:17:49 -08001652 Inst *FirstInst = nullptr;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001653 for (Inst &Instr : Node->getInsts()) {
1654 if (Instr.isDeleted())
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001655 continue;
Jim Stichnothae953202014-12-20 06:17:49 -08001656 if (FirstInst == nullptr)
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001657 FirstInst = &Instr;
1658 InstNumberT InstNumber = Instr.getNumber();
1659 if (Variable *Dest = Instr.getDest()) {
Jim Stichnoth47752552014-10-13 17:15:08 -07001660 if (!Dest->getIgnoreLiveness()) {
1661 bool Invalid = false;
Jim Stichnoth5bff61c2015-10-28 09:26:00 -07001662 constexpr bool IsDest = true;
Jim Stichnoth47752552014-10-13 17:15:08 -07001663 if (!Dest->getLiveRange().containsValue(InstNumber, IsDest))
1664 Invalid = true;
Andrew Scull57e12682015-09-16 11:30:19 -07001665 // Check that this instruction actually *begins* Dest's live range,
1666 // by checking that Dest is not live in the previous instruction. As
1667 // a special exception, we don't check this for the first instruction
1668 // of the block, because a Phi temporary may be live at the end of
1669 // the previous block, and if it is also assigned in the first
1670 // instruction of this block, the adjacent live ranges get merged.
Jim Stichnoth2d6c8262016-02-07 09:50:27 -08001671 if (&Instr != FirstInst && !Instr.isDestRedefined() &&
Jim Stichnoth47752552014-10-13 17:15:08 -07001672 Dest->getLiveRange().containsValue(InstNumber - 1, IsDest))
1673 Invalid = true;
1674 if (Invalid) {
1675 Valid = false;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001676 Str << "Liveness error: inst " << Instr.getNumber() << " dest ";
Jim Stichnoth47752552014-10-13 17:15:08 -07001677 Dest->dump(this);
1678 Str << " live range " << Dest->getLiveRange() << "\n";
1679 }
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001680 }
1681 }
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001682 FOREACH_VAR_IN_INST(Var, Instr) {
John Portoec3f5652015-08-31 15:07:09 -07001683 static constexpr bool IsDest = false;
1684 if (!Var->getIgnoreLiveness() &&
1685 !Var->getLiveRange().containsValue(InstNumber, IsDest)) {
1686 Valid = false;
Jim Stichnoth8cfeb692016-02-05 09:50:02 -08001687 Str << "Liveness error: inst " << Instr.getNumber() << " var ";
John Portoec3f5652015-08-31 15:07:09 -07001688 Var->dump(this);
1689 Str << " live range " << Var->getLiveRange() << "\n";
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001690 }
1691 }
1692 }
1693 }
1694 return Valid;
1695}
1696
Jim Stichnoth336f6c42014-10-30 15:01:31 -07001697void Cfg::contractEmptyNodes() {
Andrew Scullaa6c1092015-09-03 17:50:30 -07001698 // If we're decorating the asm output with register liveness info, this
1699 // information may become corrupted or incorrect after contracting nodes that
1700 // contain only redundant assignments. As such, we disable this pass when
1701 // DecorateAsm is specified. This may make the resulting code look more
1702 // branchy, but it should have no effect on the register assignments.
Karl Schimpfd4699942016-04-02 09:55:31 -07001703 if (getFlags().getDecorateAsm())
Jim Stichnoth3d44fe82014-11-01 10:10:18 -07001704 return;
Jim Stichnoth336f6c42014-10-30 15:01:31 -07001705 for (CfgNode *Node : Nodes) {
1706 Node->contractIfEmpty();
1707 }
1708}
1709
Jim Stichnothff9c7062014-09-18 04:50:49 -07001710void Cfg::doBranchOpt() {
Jim Stichnoth8363a062014-10-07 10:02:38 -07001711 TimerMarker T(TimerStack::TT_doBranchOpt, this);
Jim Stichnothf44f3712014-10-01 14:05:51 -07001712 for (auto I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
Andrew Scull713278a2015-07-22 17:17:02 -07001713 auto NextNode = I + 1;
Jim Stichnothae953202014-12-20 06:17:49 -08001714 (*I)->doBranchOpt(NextNode == E ? nullptr : *NextNode);
Jim Stichnothff9c7062014-09-18 04:50:49 -07001715 }
1716}
1717
Andrew Scull86df4e92015-07-30 13:54:44 -07001718void Cfg::markNodesForSandboxing() {
1719 for (const InstJumpTable *JT : JumpTables)
1720 for (SizeT I = 0; I < JT->getNumTargets(); ++I)
1721 JT->getTarget(I)->setNeedsAlignment();
1722}
1723
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001724// ======================== Dump routines ======================== //
1725
Andrew Scull57e12682015-09-16 11:30:19 -07001726// emitTextHeader() is not target-specific (apart from what is abstracted by
1727// the Assembler), so it is defined here rather than in the target lowering
1728// class.
Jim Stichnoth467ffe52016-03-29 15:01:06 -07001729void Cfg::emitTextHeader(GlobalString Name, GlobalContext *Ctx,
Jim Stichnothbbca7542015-02-11 16:08:31 -08001730 const Assembler *Asm) {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001731 if (!BuildDefs::dump())
Jim Stichnoth729dbd02015-02-25 14:48:43 -08001732 return;
Jan Voung0faec4c2014-11-05 17:29:56 -08001733 Ostream &Str = Ctx->getStrEmit();
1734 Str << "\t.text\n";
Karl Schimpfd4699942016-04-02 09:55:31 -07001735 if (getFlags().getFunctionSections())
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001736 Str << "\t.section\t.text." << Name << ",\"ax\",%progbits\n";
Karl Schimpfd4699942016-04-02 09:55:31 -07001737 if (!Asm->getInternal() || getFlags().getDisableInternal()) {
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001738 Str << "\t.globl\t" << Name << "\n";
1739 Str << "\t.type\t" << Name << ",%function\n";
Jan Voung0faec4c2014-11-05 17:29:56 -08001740 }
Andrew Scull86df4e92015-07-30 13:54:44 -07001741 Str << "\t" << Asm->getAlignDirective() << " "
Jan Voungb2d50842015-05-12 09:53:50 -07001742 << Asm->getBundleAlignLog2Bytes() << ",0x";
Jan Voung08c3bcd2014-12-01 17:55:16 -08001743 for (uint8_t I : Asm->getNonExecBundlePadding())
Jan Voung0faec4c2014-11-05 17:29:56 -08001744 Str.write_hex(I);
1745 Str << "\n";
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001746 Str << Name << ":\n";
Jan Voung0faec4c2014-11-05 17:29:56 -08001747}
1748
Andrew Scull86df4e92015-07-30 13:54:44 -07001749void Cfg::emitJumpTables() {
Karl Schimpfd4699942016-04-02 09:55:31 -07001750 switch (getFlags().getOutFileType()) {
Andrew Scull86df4e92015-07-30 13:54:44 -07001751 case FT_Elf:
1752 case FT_Iasm: {
Andrew Scull57e12682015-09-16 11:30:19 -07001753 // The emission needs to be delayed until the after the text section so
1754 // save the offsets in the global context.
Andrew Scull86df4e92015-07-30 13:54:44 -07001755 for (const InstJumpTable *JumpTable : JumpTables) {
John Porto03077212016-04-05 06:30:21 -07001756 Ctx->addJumpTableData(JumpTable->toJumpTableData(getAssembler()));
Andrew Scull86df4e92015-07-30 13:54:44 -07001757 }
1758 } break;
1759 case FT_Asm: {
1760 // Emit the assembly directly so we don't need to hang on to all the names
1761 for (const InstJumpTable *JumpTable : JumpTables)
1762 getTarget()->emitJumpTable(this, JumpTable);
1763 } break;
Andrew Scull86df4e92015-07-30 13:54:44 -07001764 }
1765}
1766
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001767void Cfg::emit() {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001768 if (!BuildDefs::dump())
Karl Schimpfb6c96af2014-11-17 10:58:39 -08001769 return;
Karl Schimpfb6e9b892016-03-08 12:27:12 -08001770 TimerMarker T(TimerStack::TT_emitAsm, this);
Karl Schimpfd4699942016-04-02 09:55:31 -07001771 if (getFlags().getDecorateAsm()) {
Jim Stichnoth3d44fe82014-11-01 10:10:18 -07001772 renumberInstructions();
1773 getVMetadata()->init(VMK_Uses);
1774 liveness(Liveness_Basic);
1775 dump("After recomputing liveness for -decorate-asm");
1776 }
Jim Stichnothe4a8f402015-01-20 12:52:51 -08001777 OstreamLocker L(Ctx);
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001778 Ostream &Str = Ctx->getStrEmit();
Andrew Scull86df4e92015-07-30 13:54:44 -07001779 const Assembler *Asm = getAssembler<>();
Karl Schimpfd4699942016-04-02 09:55:31 -07001780 const bool NeedSandboxing = getFlags().getUseSandboxing();
Andrew Scull86df4e92015-07-30 13:54:44 -07001781
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001782 emitTextHeader(FunctionName, Ctx, Asm);
Karl Schimpfd4699942016-04-02 09:55:31 -07001783 if (getFlags().getDecorateAsm()) {
Jim Stichnoth238b4c12015-10-01 07:46:38 -07001784 for (Variable *Var : getVariables()) {
Jim Stichnothb9a84722016-08-01 13:18:36 -07001785 if (Var->hasKnownStackOffset() && !Var->isRematerializable()) {
Jim Stichnotha91c3412016-04-05 15:31:43 -07001786 Str << "\t" << Var->getSymbolicStackOffset() << " = "
Jim Stichnoth238b4c12015-10-01 07:46:38 -07001787 << Var->getStackOffset() << "\n";
1788 }
1789 }
1790 }
Andrew Scull86df4e92015-07-30 13:54:44 -07001791 for (CfgNode *Node : Nodes) {
1792 if (NeedSandboxing && Node->needsAlignment()) {
1793 Str << "\t" << Asm->getAlignDirective() << " "
1794 << Asm->getBundleAlignLog2Bytes() << "\n";
1795 }
Jim Stichnothf44f3712014-10-01 14:05:51 -07001796 Node->emit(this);
Andrew Scull86df4e92015-07-30 13:54:44 -07001797 }
1798 emitJumpTables();
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001799 Str << "\n";
Jim Stichnoth5bc2b1d2014-05-22 13:38:48 -07001800}
1801
Jan Voung0faec4c2014-11-05 17:29:56 -08001802void Cfg::emitIAS() {
Karl Schimpfb6e9b892016-03-08 12:27:12 -08001803 TimerMarker T(TimerStack::TT_emitAsm, this);
Andrew Scull57e12682015-09-16 11:30:19 -07001804 // The emitIAS() routines emit into the internal assembler buffer, so there's
1805 // no need to lock the streams.
Karl Schimpfd4699942016-04-02 09:55:31 -07001806 const bool NeedSandboxing = getFlags().getUseSandboxing();
Andrew Scull86df4e92015-07-30 13:54:44 -07001807 for (CfgNode *Node : Nodes) {
1808 if (NeedSandboxing && Node->needsAlignment())
1809 getAssembler()->alignCfgNode();
Jan Voung0faec4c2014-11-05 17:29:56 -08001810 Node->emitIAS(this);
Andrew Scull86df4e92015-07-30 13:54:44 -07001811 }
1812 emitJumpTables();
Jan Voung0faec4c2014-11-05 17:29:56 -08001813}
1814
John Portoa3984a12016-04-01 11:14:30 -07001815size_t Cfg::getTotalMemoryMB() const {
1816 constexpr size_t _1MB = 1024 * 1024;
1817 assert(Allocator != nullptr);
1818 assert(CfgAllocatorTraits::current() == Allocator.get());
1819 return Allocator->getTotalMemory() / _1MB;
1820}
1821
1822size_t Cfg::getLivenessMemoryMB() const {
1823 constexpr size_t _1MB = 1024 * 1024;
1824 if (Live == nullptr) {
1825 return 0;
1826 }
1827 return Live->getAllocator()->getTotalMemory() / _1MB;
Jim Stichnoth1bdb7352016-02-29 16:58:15 -08001828}
1829
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001830// Dumps the IR with an optional introductory message.
Jim Stichnothb88d8c82016-03-11 15:33:00 -08001831void Cfg::dump(const char *Message) {
Jim Stichnoth20b71f52015-06-24 15:52:24 -07001832 if (!BuildDefs::dump())
Karl Schimpfb6c96af2014-11-17 10:58:39 -08001833 return;
Jim Stichnothfa4efea2015-01-27 05:06:03 -08001834 if (!isVerbose())
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001835 return;
Jim Stichnothe4a8f402015-01-20 12:52:51 -08001836 OstreamLocker L(Ctx);
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001837 Ostream &Str = Ctx->getStrDump();
Jim Stichnothb88d8c82016-03-11 15:33:00 -08001838 if (Message[0])
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001839 Str << "================ " << Message << " ================\n";
Jim Stichnoth3dd1bb82016-02-27 09:05:50 -08001840 if (isVerbose(IceV_Mem)) {
Jim Stichnoth1bdb7352016-02-29 16:58:15 -08001841 Str << "Memory size = " << getTotalMemoryMB() << " MB\n";
Jim Stichnoth3dd1bb82016-02-27 09:05:50 -08001842 }
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001843 setCurrentNode(getEntryNode());
1844 // Print function name+args
Jim Stichnothfa4efea2015-01-27 05:06:03 -08001845 if (isVerbose(IceV_Instructions)) {
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001846 Str << "define ";
Karl Schimpfd4699942016-04-02 09:55:31 -07001847 if (getInternal() && !getFlags().getDisableInternal())
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001848 Str << "internal ";
Jim Stichnoth98ba0062016-03-07 09:26:22 -08001849 Str << ReturnType << " @" << getFunctionName() << "(";
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001850 for (SizeT i = 0; i < Args.size(); ++i) {
1851 if (i > 0)
1852 Str << ", ";
1853 Str << Args[i]->getType() << " ";
1854 Args[i]->dump(this);
1855 }
Jim Stichnothb40595a2016-01-29 06:14:31 -08001856 // Append an extra copy of the function name here, in order to print its
1857 // size stats but not mess up lit tests.
1858 Str << ") { # " << getFunctionNameAndSize() << "\n";
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001859 }
Jim Stichnoth800dab22014-09-20 12:25:02 -07001860 resetCurrentNode();
Jim Stichnothfa4efea2015-01-27 05:06:03 -08001861 if (isVerbose(IceV_Liveness)) {
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001862 // Print summary info about variables
Jim Stichnothf44f3712014-10-01 14:05:51 -07001863 for (Variable *Var : Variables) {
Jim Stichnoth144cdce2014-09-22 16:02:59 -07001864 Str << "// multiblock=";
1865 if (getVMetadata()->isTracked(Var))
1866 Str << getVMetadata()->isMultiBlock(Var);
1867 else
1868 Str << "?";
Jim Stichnoth48e3ae52015-10-01 13:33:35 -07001869 Str << " defs=";
1870 bool FirstPrint = true;
1871 if (VMetadata->getKind() != VMK_Uses) {
1872 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var)) {
1873 Str << FirstDef->getNumber();
1874 FirstPrint = false;
1875 }
1876 }
1877 if (VMetadata->getKind() == VMK_All) {
1878 for (const Inst *Instr : VMetadata->getLatterDefinitions(Var)) {
1879 if (!FirstPrint)
1880 Str << ",";
1881 Str << Instr->getNumber();
1882 FirstPrint = false;
1883 }
1884 }
Andrew Scull11c9a322015-08-28 14:24:14 -07001885 Str << " weight=" << Var->getWeight(this) << " ";
Jim Stichnothd97c7df2014-06-04 11:57:08 -07001886 Var->dump(this);
1887 Str << " LIVE=" << Var->getLiveRange() << "\n";
1888 }
1889 }
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001890 // Print each basic block
Jim Stichnothf44f3712014-10-01 14:05:51 -07001891 for (CfgNode *Node : Nodes)
1892 Node->dump(this);
Jim Stichnothfa4efea2015-01-27 05:06:03 -08001893 if (isVerbose(IceV_Instructions))
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001894 Str << "}\n";
Jim Stichnothf7c9a142014-04-29 10:52:43 -07001895}
1896
1897} // end of namespace Ice