blob: a07d2f20acab817816187deb2af9f185a3d7fb90 [file] [log] [blame]
Eugene Zelenko60433b62017-10-05 00:33:50 +00001//===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass which inserts x86 AVX vzeroupper instructions
10// before calls to SSE encoded functions. This avoids transition latency
Andrea Di Biagio4f8bdcb2015-02-07 13:56:20 +000011// penalty when transferring control between AVX encoded instructions and old
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000012// SSE encoding mode.
13//
14//===----------------------------------------------------------------------===//
15
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000016#include "X86.h"
17#include "X86InstrInfo.h"
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +000018#include "X86Subtarget.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000019#include "llvm/ADT/SmallVector.h"
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000020#include "llvm/ADT/Statistic.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000021#include "llvm/CodeGen/MachineBasicBlock.h"
22#include "llvm/CodeGen/MachineFunction.h"
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000023#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000024#include "llvm/CodeGen/MachineInstr.h"
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000025#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000026#include "llvm/CodeGen/MachineOperand.h"
Eli Friedman8f249602011-11-04 23:46:11 +000027#include "llvm/CodeGen/MachineRegisterInfo.h"
David Blaikie3f833ed2017-11-08 01:01:31 +000028#include "llvm/CodeGen/TargetInstrInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000029#include "llvm/CodeGen/TargetRegisterInfo.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000030#include "llvm/IR/CallingConv.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/IR/Function.h"
Eli Friedman8f249602011-11-04 23:46:11 +000033#include "llvm/Support/Debug.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000034#include "llvm/Support/ErrorHandling.h"
Eli Friedman8f249602011-11-04 23:46:11 +000035#include "llvm/Support/raw_ostream.h"
Eugene Zelenko60433b62017-10-05 00:33:50 +000036#include <cassert>
37
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000038using namespace llvm;
39
Chandler Carruth84e68b22014-04-22 02:41:26 +000040#define DEBUG_TYPE "x86-vzeroupper"
41
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000042STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
43
44namespace {
Lang Hames7c8189c2014-03-17 01:22:54 +000045
46 class VZeroUpperInserter : public MachineFunctionPass {
47 public:
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000048 VZeroUpperInserter() : MachineFunctionPass(ID) {}
Eugene Zelenko60433b62017-10-05 00:33:50 +000049
Craig Topper2d9361e2014-03-09 07:44:38 +000050 bool runOnMachineFunction(MachineFunction &MF) override;
Eugene Zelenko60433b62017-10-05 00:33:50 +000051
Derek Schuff1dbf7a52016-04-04 17:09:25 +000052 MachineFunctionProperties getRequiredProperties() const override {
53 return MachineFunctionProperties().set(
Matthias Braun1eb47362016-08-25 01:27:13 +000054 MachineFunctionProperties::Property::NoVRegs);
Derek Schuff1dbf7a52016-04-04 17:09:25 +000055 }
Eugene Zelenko60433b62017-10-05 00:33:50 +000056
Mehdi Amini117296c2016-10-01 02:56:57 +000057 StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +000058
59 private:
Lang Hames7c8189c2014-03-17 01:22:54 +000060 void processBasicBlock(MachineBasicBlock &MBB);
61 void insertVZeroUpper(MachineBasicBlock::iterator I,
62 MachineBasicBlock &MBB);
63 void addDirtySuccessor(MachineBasicBlock &MBB);
Eli Friedman8f249602011-11-04 23:46:11 +000064
Eugene Zelenko60433b62017-10-05 00:33:50 +000065 using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };
66
Lang Hames7c8189c2014-03-17 01:22:54 +000067 static const char* getBlockExitStateName(BlockExitState ST);
Eli Friedman8f249602011-11-04 23:46:11 +000068
Lang Hames7c8189c2014-03-17 01:22:54 +000069 // Core algorithm state:
70 // BlockState - Each block is either:
Amjad Aboud4f977512017-03-03 09:03:24 +000071 // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
Lang Hames7c8189c2014-03-17 01:22:54 +000072 // vzeroupper instructions in this block.
73 // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
Amjad Aboud4f977512017-03-03 09:03:24 +000074 // block that will ensure that YMM/ZMM is clean on exit.
75 // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
Lang Hames7c8189c2014-03-17 01:22:54 +000076 // subsequent vzeroupper in the block clears it.
Eli Friedman8f249602011-11-04 23:46:11 +000077 //
Lang Hames7c8189c2014-03-17 01:22:54 +000078 // AddedToDirtySuccessors - This flag is raised when a block is added to the
79 // DirtySuccessors list to ensure that it's not
80 // added multiple times.
Eli Friedman8f249602011-11-04 23:46:11 +000081 //
Lang Hames7c8189c2014-03-17 01:22:54 +000082 // FirstUnguardedCall - Records the location of the first unguarded call in
83 // each basic block that may need to be guarded by a
84 // vzeroupper. We won't know whether it actually needs
85 // to be guarded until we discover a predecessor that
86 // is DIRTY_OUT.
87 struct BlockState {
Eugene Zelenko60433b62017-10-05 00:33:50 +000088 BlockExitState ExitState = PASS_THROUGH;
89 bool AddedToDirtySuccessors = false;
Lang Hames7c8189c2014-03-17 01:22:54 +000090 MachineBasicBlock::iterator FirstUnguardedCall;
Eugene Zelenko60433b62017-10-05 00:33:50 +000091
92 BlockState() = default;
Eli Friedman8f249602011-11-04 23:46:11 +000093 };
Eugene Zelenko60433b62017-10-05 00:33:50 +000094
95 using BlockStateMap = SmallVector<BlockState, 8>;
96 using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;
Eli Friedman8f249602011-11-04 23:46:11 +000097
Lang Hames7c8189c2014-03-17 01:22:54 +000098 BlockStateMap BlockStates;
99 DirtySuccessorsWorkList DirtySuccessors;
100 bool EverMadeChange;
Amjad Aboud719325fe12016-03-01 11:32:03 +0000101 bool IsX86INTR;
Lang Hames7c8189c2014-03-17 01:22:54 +0000102 const TargetInstrInfo *TII;
Eli Friedman8f249602011-11-04 23:46:11 +0000103
Lang Hames7c8189c2014-03-17 01:22:54 +0000104 static char ID;
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000105 };
Lang Hames7c8189c2014-03-17 01:22:54 +0000106
Eugene Zelenko60433b62017-10-05 00:33:50 +0000107} // end anonymous namespace
108
109char VZeroUpperInserter::ID = 0;
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000110
111FunctionPass *llvm::createX86IssueVZeroUpperPass() {
112 return new VZeroUpperInserter();
113}
114
Craig Topper3eb6ff92017-03-22 06:07:58 +0000115#ifndef NDEBUG
Lang Hames7c8189c2014-03-17 01:22:54 +0000116const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
117 switch (ST) {
118 case PASS_THROUGH: return "Pass-through";
119 case EXITS_DIRTY: return "Exits-dirty";
120 case EXITS_CLEAN: return "Exits-clean";
121 }
122 llvm_unreachable("Invalid block exit state.");
123}
Craig Topper3eb6ff92017-03-22 06:07:58 +0000124#endif
Lang Hames7c8189c2014-03-17 01:22:54 +0000125
Amjad Aboud4f977512017-03-03 09:03:24 +0000126/// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
127/// Thus, there is no need to check for Y/ZMM16 and above.
128static bool isYmmOrZmmReg(unsigned Reg) {
129 return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
130 (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
Eli Friedman8f249602011-11-04 23:46:11 +0000131}
132
Amjad Aboud4f977512017-03-03 09:03:24 +0000133static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
Krzysztof Parzyszek72518ea2017-10-16 19:08:41 +0000134 for (std::pair<unsigned, unsigned> LI : MRI.liveins())
135 if (isYmmOrZmmReg(LI.first))
Eli Friedman8f249602011-11-04 23:46:11 +0000136 return true;
137
138 return false;
139}
140
Amjad Aboud4f977512017-03-03 09:03:24 +0000141static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +0000142 for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
Elena Demikhovsky9e0df7c2013-02-13 08:02:04 +0000143 if (!MO.clobbersPhysReg(reg))
144 return false;
145 }
Amjad Aboud4f977512017-03-03 09:03:24 +0000146 for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
147 if (!MO.clobbersPhysReg(reg))
148 return false;
149 }
Elena Demikhovsky9e0df7c2013-02-13 08:02:04 +0000150 return true;
151}
152
Amjad Aboud4f977512017-03-03 09:03:24 +0000153static bool hasYmmOrZmmReg(MachineInstr &MI) {
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000154 for (const MachineOperand &MO : MI.operands()) {
Amjad Aboud4f977512017-03-03 09:03:24 +0000155 if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
Elena Demikhovsky9e0df7c2013-02-13 08:02:04 +0000156 return true;
Eli Friedman8f249602011-11-04 23:46:11 +0000157 if (!MO.isReg())
158 continue;
159 if (MO.isDebug())
160 continue;
Amjad Aboud4f977512017-03-03 09:03:24 +0000161 if (isYmmOrZmmReg(MO.getReg()))
Eli Friedman8f249602011-11-04 23:46:11 +0000162 return true;
163 }
164 return false;
165}
166
Amjad Aboud4f977512017-03-03 09:03:24 +0000167/// Check if given call instruction has a RegMask operand.
168static bool callHasRegMask(MachineInstr &MI) {
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000169 assert(MI.isCall() && "Can only be called on call instructions.");
170 for (const MachineOperand &MO : MI.operands()) {
Amjad Aboud4f977512017-03-03 09:03:24 +0000171 if (MO.isRegMask())
172 return true;
Michael Liao14b02842013-12-03 09:17:32 +0000173 }
174 return false;
175}
176
Sanjay Patel8bc63b22016-05-20 16:46:01 +0000177/// Insert a vzeroupper instruction before I.
Lang Hames7c8189c2014-03-17 01:22:54 +0000178void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
Sanjay Patel8bc63b22016-05-20 16:46:01 +0000179 MachineBasicBlock &MBB) {
Lang Hames7c8189c2014-03-17 01:22:54 +0000180 DebugLoc dl = I->getDebugLoc();
181 BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER));
182 ++NumVZU;
183 EverMadeChange = true;
184}
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000185
Sanjay Patel8bc63b22016-05-20 16:46:01 +0000186/// Add MBB to the DirtySuccessors list if it hasn't already been added.
Lang Hames7c8189c2014-03-17 01:22:54 +0000187void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
188 if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
189 DirtySuccessors.push_back(&MBB);
190 BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000191 }
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000192}
193
Sanjay Patel8bc63b22016-05-20 16:46:01 +0000194/// Loop over all of the instructions in the basic block, inserting vzeroupper
195/// instructions before function calls.
Lang Hames7c8189c2014-03-17 01:22:54 +0000196void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
Sanjay Patel5496a232016-05-20 17:07:19 +0000197 // Start by assuming that the block is PASS_THROUGH which implies no unguarded
Lang Hames7c8189c2014-03-17 01:22:54 +0000198 // calls.
199 BlockExitState CurState = PASS_THROUGH;
200 BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
Eli Friedman8f249602011-11-04 23:46:11 +0000201
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000202 for (MachineInstr &MI : MBB) {
Amjad Aboud4f977512017-03-03 09:03:24 +0000203 bool IsCall = MI.isCall();
204 bool IsReturn = MI.isReturn();
205 bool IsControlFlow = IsCall || IsReturn;
206
Amjad Aboud719325fe12016-03-01 11:32:03 +0000207 // No need for vzeroupper before iret in interrupt handler function,
Amjad Aboud4f977512017-03-03 09:03:24 +0000208 // epilogue will restore YMM/ZMM registers if needed.
209 if (IsX86INTR && IsReturn)
210 continue;
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000211
Sanjay Patel39553602016-05-25 16:39:47 +0000212 // An existing VZERO* instruction resets the state.
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000213 if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
Sanjay Patel39553602016-05-25 16:39:47 +0000214 CurState = EXITS_CLEAN;
215 continue;
216 }
217
Chad Rosier24c19d22012-08-01 18:39:17 +0000218 // Shortcut: don't need to check regular instructions in dirty state.
Amjad Aboud4f977512017-03-03 09:03:24 +0000219 if (!IsControlFlow && CurState == EXITS_DIRTY)
Eli Friedman8f249602011-11-04 23:46:11 +0000220 continue;
221
Amjad Aboud4f977512017-03-03 09:03:24 +0000222 if (hasYmmOrZmmReg(MI)) {
223 // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
224 // instruction, or it could be control flow.
Lang Hames7c8189c2014-03-17 01:22:54 +0000225 CurState = EXITS_DIRTY;
Eli Friedman8f249602011-11-04 23:46:11 +0000226 continue;
227 }
228
229 // Check for control-flow out of the current function (which might
230 // indirectly execute SSE instructions).
Amjad Aboud4f977512017-03-03 09:03:24 +0000231 if (!IsControlFlow)
Eli Friedman8f249602011-11-04 23:46:11 +0000232 continue;
233
Amjad Aboud4f977512017-03-03 09:03:24 +0000234 // If the call has no RegMask, skip it as well. It usually happens on
235 // helper function calls (such as '_chkstk', '_ftol2') where standard
236 // calling convention is not used (RegMask is not used to mark register
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000237 // clobbered and register usage (def/implicit-def/use) is well-defined and
Amjad Aboud4f977512017-03-03 09:03:24 +0000238 // explicitly specified.
239 if (IsCall && !callHasRegMask(MI))
Michael Liao14b02842013-12-03 09:17:32 +0000240 continue;
241
Amjad Aboud4f977512017-03-03 09:03:24 +0000242 // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
Sanjay Patel5496a232016-05-20 17:07:19 +0000243 // registers. In addition, the processor changes back to Clean state, after
244 // which execution of SSE instructions or AVX instructions has no transition
245 // penalty. Add the VZEROUPPER instruction before any function call/return
246 // that might execute SSE code.
Eli Friedman8f249602011-11-04 23:46:11 +0000247 // FIXME: In some cases, we may want to move the VZEROUPPER into a
248 // predecessor block.
Lang Hames7c8189c2014-03-17 01:22:54 +0000249 if (CurState == EXITS_DIRTY) {
Eli Friedman8f249602011-11-04 23:46:11 +0000250 // After the inserted VZEROUPPER the state becomes clean again, but
Amjad Aboud4f977512017-03-03 09:03:24 +0000251 // other YMM/ZMM may appear before other subsequent calls or even before
Eli Friedman8f249602011-11-04 23:46:11 +0000252 // the end of the BB.
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000253 insertVZeroUpper(MI, MBB);
Lang Hames7c8189c2014-03-17 01:22:54 +0000254 CurState = EXITS_CLEAN;
255 } else if (CurState == PASS_THROUGH) {
256 // If this block is currently in pass-through state and we encounter a
257 // call then whether we need a vzeroupper or not depends on whether this
258 // block has successors that exit dirty. Record the location of the call,
259 // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
260 // It will be inserted later if necessary.
Duncan P. N. Exon Smith7b4c18e2016-07-12 03:18:50 +0000261 BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
Lang Hames7c8189c2014-03-17 01:22:54 +0000262 CurState = EXITS_CLEAN;
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000263 }
264 }
265
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000266 LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: "
267 << getBlockExitStateName(CurState) << '\n');
Eli Friedman8f249602011-11-04 23:46:11 +0000268
Lang Hames7c8189c2014-03-17 01:22:54 +0000269 if (CurState == EXITS_DIRTY)
270 for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
271 SE = MBB.succ_end();
272 SI != SE; ++SI)
273 addDirtySuccessor(**SI);
Eli Friedman8f249602011-11-04 23:46:11 +0000274
Lang Hames7c8189c2014-03-17 01:22:54 +0000275 BlockStates[MBB.getNumber()].ExitState = CurState;
276}
Eli Friedman8f249602011-11-04 23:46:11 +0000277
Sanjay Patel8bc63b22016-05-20 16:46:01 +0000278/// Loop over all of the basic blocks, inserting vzeroupper instructions before
279/// function calls.
Lang Hames7c8189c2014-03-17 01:22:54 +0000280bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
Eric Christopher05b81972015-02-02 17:38:43 +0000281 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
Amjad Aboud4f977512017-03-03 09:03:24 +0000282 if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite())
Lang Hames7c8189c2014-03-17 01:22:54 +0000283 return false;
Eric Christopher05b81972015-02-02 17:38:43 +0000284 TII = ST.getInstrInfo();
Lang Hames7c8189c2014-03-17 01:22:54 +0000285 MachineRegisterInfo &MRI = MF.getRegInfo();
286 EverMadeChange = false;
Matthias Braunf1caa282017-12-15 22:22:58 +0000287 IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
Lang Hames7c8189c2014-03-17 01:22:54 +0000288
Amjad Aboud4f977512017-03-03 09:03:24 +0000289 bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
Matthias Braunada0adf2015-01-08 00:33:48 +0000290
Amjad Aboud4f977512017-03-03 09:03:24 +0000291 // Fast check: if the function doesn't use any ymm/zmm registers, we don't
292 // need to insert any VZEROUPPER instructions. This is constant-time, so it
293 // is cheap in the common case of no ymm/zmm use.
294 bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
295 const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass};
296 for (auto *RC : RCs) {
297 if (!YmmOrZmmUsed) {
298 for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
299 i++) {
300 if (!MRI.reg_nodbg_empty(*i)) {
301 YmmOrZmmUsed = true;
302 break;
303 }
Matthias Braunada0adf2015-01-08 00:33:48 +0000304 }
Lang Hames7c8189c2014-03-17 01:22:54 +0000305 }
306 }
Amjad Aboud4f977512017-03-03 09:03:24 +0000307 if (!YmmOrZmmUsed) {
Lang Hames7c8189c2014-03-17 01:22:54 +0000308 return false;
309 }
310
311 assert(BlockStates.empty() && DirtySuccessors.empty() &&
312 "X86VZeroUpper state should be clear");
313 BlockStates.resize(MF.getNumBlockIDs());
314
315 // Process all blocks. This will compute block exit states, record the first
316 // unguarded call in each block, and add successors of dirty blocks to the
317 // DirtySuccessors list.
Sanjay Patelfbca70d2015-05-05 21:20:52 +0000318 for (MachineBasicBlock &MBB : MF)
319 processBasicBlock(MBB);
Lang Hames7c8189c2014-03-17 01:22:54 +0000320
Amjad Aboud4f977512017-03-03 09:03:24 +0000321 // If any YMM/ZMM regs are live-in to this function, add the entry block to
322 // the DirtySuccessors list
323 if (FnHasLiveInYmmOrZmm)
Lang Hames7c8189c2014-03-17 01:22:54 +0000324 addDirtySuccessor(MF.front());
325
Sanjay Patel8099fb7e2016-05-23 18:01:20 +0000326 // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
Lang Hames7c8189c2014-03-17 01:22:54 +0000327 // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
328 // through PASS_THROUGH blocks.
329 while (!DirtySuccessors.empty()) {
330 MachineBasicBlock &MBB = *DirtySuccessors.back();
331 DirtySuccessors.pop_back();
332 BlockState &BBState = BlockStates[MBB.getNumber()];
333
334 // MBB is a successor of a dirty block, so its first call needs to be
335 // guarded.
336 if (BBState.FirstUnguardedCall != MBB.end())
337 insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
338
Sanjay Patel5496a232016-05-20 17:07:19 +0000339 // If this successor was a pass-through block, then it is now dirty. Its
Lang Hames7c8189c2014-03-17 01:22:54 +0000340 // successors need to be added to the worklist (if they haven't been
341 // already).
342 if (BBState.ExitState == PASS_THROUGH) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000343 LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber()
344 << " was Pass-through, is now Dirty-out.\n");
Sanjay Patel13a0d492016-05-23 18:00:50 +0000345 for (MachineBasicBlock *Succ : MBB.successors())
346 addDirtySuccessor(*Succ);
Lang Hames7c8189c2014-03-17 01:22:54 +0000347 }
348 }
349
350 BlockStates.clear();
351 return EverMadeChange;
Bruno Cardoso Lopes2a3ffb52011-08-23 01:14:17 +0000352}