| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 1 | //===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===// |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 2 | // |
| Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the pass which inserts x86 AVX vzeroupper instructions |
| 10 | // before calls to SSE encoded functions. This avoids transition latency |
| Andrea Di Biagio | 4f8bdcb | 2015-02-07 13:56:20 +0000 | [diff] [blame] | 11 | // penalty when transferring control between AVX encoded instructions and old |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 12 | // SSE encoding mode. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 16 | #include "X86.h" |
| 17 | #include "X86InstrInfo.h" |
| Elena Demikhovsky | 52e4a0e | 2014-01-05 10:46:09 +0000 | [diff] [blame] | 18 | #include "X86Subtarget.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 19 | #include "llvm/ADT/SmallVector.h" |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 20 | #include "llvm/ADT/Statistic.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 21 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 22 | #include "llvm/CodeGen/MachineFunction.h" |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 24 | #include "llvm/CodeGen/MachineInstr.h" |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 26 | #include "llvm/CodeGen/MachineOperand.h" |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 27 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| David Blaikie | 3f833ed | 2017-11-08 01:01:31 +0000 | [diff] [blame] | 28 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| David Blaikie | b3bde2e | 2017-11-17 01:07:10 +0000 | [diff] [blame] | 29 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 30 | #include "llvm/IR/CallingConv.h" |
| 31 | #include "llvm/IR/DebugLoc.h" |
| 32 | #include "llvm/IR/Function.h" |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 33 | #include "llvm/Support/Debug.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 34 | #include "llvm/Support/ErrorHandling.h" |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 35 | #include "llvm/Support/raw_ostream.h" |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 36 | #include <cassert> |
| 37 | |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 38 | using namespace llvm; |
| 39 | |
| Chandler Carruth | 84e68b2 | 2014-04-22 02:41:26 +0000 | [diff] [blame] | 40 | #define DEBUG_TYPE "x86-vzeroupper" |
| 41 | |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 42 | STATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); |
| 43 | |
| 44 | namespace { |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 45 | |
| 46 | class VZeroUpperInserter : public MachineFunctionPass { |
| 47 | public: |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 48 | VZeroUpperInserter() : MachineFunctionPass(ID) {} |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 49 | |
| Craig Topper | 2d9361e | 2014-03-09 07:44:38 +0000 | [diff] [blame] | 50 | bool runOnMachineFunction(MachineFunction &MF) override; |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 51 | |
| Derek Schuff | 1dbf7a5 | 2016-04-04 17:09:25 +0000 | [diff] [blame] | 52 | MachineFunctionProperties getRequiredProperties() const override { |
| 53 | return MachineFunctionProperties().set( |
| Matthias Braun | 1eb4736 | 2016-08-25 01:27:13 +0000 | [diff] [blame] | 54 | MachineFunctionProperties::Property::NoVRegs); |
| Derek Schuff | 1dbf7a5 | 2016-04-04 17:09:25 +0000 | [diff] [blame] | 55 | } |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 56 | |
| Mehdi Amini | 117296c | 2016-10-01 02:56:57 +0000 | [diff] [blame] | 57 | StringRef getPassName() const override { return "X86 vzeroupper inserter"; } |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 58 | |
| 59 | private: |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 60 | void processBasicBlock(MachineBasicBlock &MBB); |
| 61 | void insertVZeroUpper(MachineBasicBlock::iterator I, |
| 62 | MachineBasicBlock &MBB); |
| 63 | void addDirtySuccessor(MachineBasicBlock &MBB); |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 64 | |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 65 | using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY }; |
| 66 | |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 67 | static const char* getBlockExitStateName(BlockExitState ST); |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 68 | |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 69 | // Core algorithm state: |
| 70 | // BlockState - Each block is either: |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 71 | // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 72 | // vzeroupper instructions in this block. |
| 73 | // - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 74 | // block that will ensure that YMM/ZMM is clean on exit. |
| 75 | // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 76 | // subsequent vzeroupper in the block clears it. |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 77 | // |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 78 | // AddedToDirtySuccessors - This flag is raised when a block is added to the |
| 79 | // DirtySuccessors list to ensure that it's not |
| 80 | // added multiple times. |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 81 | // |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 82 | // FirstUnguardedCall - Records the location of the first unguarded call in |
| 83 | // each basic block that may need to be guarded by a |
| 84 | // vzeroupper. We won't know whether it actually needs |
| 85 | // to be guarded until we discover a predecessor that |
| 86 | // is DIRTY_OUT. |
| 87 | struct BlockState { |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 88 | BlockExitState ExitState = PASS_THROUGH; |
| 89 | bool AddedToDirtySuccessors = false; |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 90 | MachineBasicBlock::iterator FirstUnguardedCall; |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 91 | |
| 92 | BlockState() = default; |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 93 | }; |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 94 | |
| 95 | using BlockStateMap = SmallVector<BlockState, 8>; |
| 96 | using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>; |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 97 | |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 98 | BlockStateMap BlockStates; |
| 99 | DirtySuccessorsWorkList DirtySuccessors; |
| 100 | bool EverMadeChange; |
| Amjad Aboud | 719325fe1 | 2016-03-01 11:32:03 +0000 | [diff] [blame] | 101 | bool IsX86INTR; |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 102 | const TargetInstrInfo *TII; |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 103 | |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 104 | static char ID; |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 105 | }; |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 106 | |
| Eugene Zelenko | 60433b6 | 2017-10-05 00:33:50 +0000 | [diff] [blame] | 107 | } // end anonymous namespace |
| 108 | |
| 109 | char VZeroUpperInserter::ID = 0; |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 110 | |
| 111 | FunctionPass *llvm::createX86IssueVZeroUpperPass() { |
| 112 | return new VZeroUpperInserter(); |
| 113 | } |
| 114 | |
| Craig Topper | 3eb6ff9 | 2017-03-22 06:07:58 +0000 | [diff] [blame] | 115 | #ifndef NDEBUG |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 116 | const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) { |
| 117 | switch (ST) { |
| 118 | case PASS_THROUGH: return "Pass-through"; |
| 119 | case EXITS_DIRTY: return "Exits-dirty"; |
| 120 | case EXITS_CLEAN: return "Exits-clean"; |
| 121 | } |
| 122 | llvm_unreachable("Invalid block exit state."); |
| 123 | } |
| Craig Topper | 3eb6ff9 | 2017-03-22 06:07:58 +0000 | [diff] [blame] | 124 | #endif |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 125 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 126 | /// VZEROUPPER cleans state that is related to Y/ZMM0-15 only. |
| 127 | /// Thus, there is no need to check for Y/ZMM16 and above. |
| 128 | static bool isYmmOrZmmReg(unsigned Reg) { |
| 129 | return (Reg >= X86::YMM0 && Reg <= X86::YMM15) || |
| 130 | (Reg >= X86::ZMM0 && Reg <= X86::ZMM15); |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 131 | } |
| 132 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 133 | static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) { |
| Krzysztof Parzyszek | 72518ea | 2017-10-16 19:08:41 +0000 | [diff] [blame] | 134 | for (std::pair<unsigned, unsigned> LI : MRI.liveins()) |
| 135 | if (isYmmOrZmmReg(LI.first)) |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 136 | return true; |
| 137 | |
| 138 | return false; |
| 139 | } |
| 140 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 141 | static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) { |
| Elena Demikhovsky | 52e4a0e | 2014-01-05 10:46:09 +0000 | [diff] [blame] | 142 | for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) { |
| Elena Demikhovsky | 9e0df7c | 2013-02-13 08:02:04 +0000 | [diff] [blame] | 143 | if (!MO.clobbersPhysReg(reg)) |
| 144 | return false; |
| 145 | } |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 146 | for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) { |
| 147 | if (!MO.clobbersPhysReg(reg)) |
| 148 | return false; |
| 149 | } |
| Elena Demikhovsky | 9e0df7c | 2013-02-13 08:02:04 +0000 | [diff] [blame] | 150 | return true; |
| 151 | } |
| 152 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 153 | static bool hasYmmOrZmmReg(MachineInstr &MI) { |
| Duncan P. N. Exon Smith | 7b4c18e | 2016-07-12 03:18:50 +0000 | [diff] [blame] | 154 | for (const MachineOperand &MO : MI.operands()) { |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 155 | if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO)) |
| Elena Demikhovsky | 9e0df7c | 2013-02-13 08:02:04 +0000 | [diff] [blame] | 156 | return true; |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 157 | if (!MO.isReg()) |
| 158 | continue; |
| 159 | if (MO.isDebug()) |
| 160 | continue; |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 161 | if (isYmmOrZmmReg(MO.getReg())) |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 162 | return true; |
| 163 | } |
| 164 | return false; |
| 165 | } |
| 166 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 167 | /// Check if given call instruction has a RegMask operand. |
| 168 | static bool callHasRegMask(MachineInstr &MI) { |
| Duncan P. N. Exon Smith | 7b4c18e | 2016-07-12 03:18:50 +0000 | [diff] [blame] | 169 | assert(MI.isCall() && "Can only be called on call instructions."); |
| 170 | for (const MachineOperand &MO : MI.operands()) { |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 171 | if (MO.isRegMask()) |
| 172 | return true; |
| Michael Liao | 14b0284 | 2013-12-03 09:17:32 +0000 | [diff] [blame] | 173 | } |
| 174 | return false; |
| 175 | } |
| 176 | |
| Sanjay Patel | 8bc63b2 | 2016-05-20 16:46:01 +0000 | [diff] [blame] | 177 | /// Insert a vzeroupper instruction before I. |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 178 | void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I, |
| Sanjay Patel | 8bc63b2 | 2016-05-20 16:46:01 +0000 | [diff] [blame] | 179 | MachineBasicBlock &MBB) { |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 180 | DebugLoc dl = I->getDebugLoc(); |
| 181 | BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER)); |
| 182 | ++NumVZU; |
| 183 | EverMadeChange = true; |
| 184 | } |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 185 | |
| Sanjay Patel | 8bc63b2 | 2016-05-20 16:46:01 +0000 | [diff] [blame] | 186 | /// Add MBB to the DirtySuccessors list if it hasn't already been added. |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 187 | void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) { |
| 188 | if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) { |
| 189 | DirtySuccessors.push_back(&MBB); |
| 190 | BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true; |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 191 | } |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 192 | } |
| 193 | |
| Sanjay Patel | 8bc63b2 | 2016-05-20 16:46:01 +0000 | [diff] [blame] | 194 | /// Loop over all of the instructions in the basic block, inserting vzeroupper |
| 195 | /// instructions before function calls. |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 196 | void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { |
| Sanjay Patel | 5496a23 | 2016-05-20 17:07:19 +0000 | [diff] [blame] | 197 | // Start by assuming that the block is PASS_THROUGH which implies no unguarded |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 198 | // calls. |
| 199 | BlockExitState CurState = PASS_THROUGH; |
| 200 | BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end(); |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 201 | |
| Duncan P. N. Exon Smith | 7b4c18e | 2016-07-12 03:18:50 +0000 | [diff] [blame] | 202 | for (MachineInstr &MI : MBB) { |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 203 | bool IsCall = MI.isCall(); |
| 204 | bool IsReturn = MI.isReturn(); |
| 205 | bool IsControlFlow = IsCall || IsReturn; |
| 206 | |
| Amjad Aboud | 719325fe1 | 2016-03-01 11:32:03 +0000 | [diff] [blame] | 207 | // No need for vzeroupper before iret in interrupt handler function, |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 208 | // epilogue will restore YMM/ZMM registers if needed. |
| 209 | if (IsX86INTR && IsReturn) |
| 210 | continue; |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 211 | |
| Sanjay Patel | 3955360 | 2016-05-25 16:39:47 +0000 | [diff] [blame] | 212 | // An existing VZERO* instruction resets the state. |
| Duncan P. N. Exon Smith | 7b4c18e | 2016-07-12 03:18:50 +0000 | [diff] [blame] | 213 | if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) { |
| Sanjay Patel | 3955360 | 2016-05-25 16:39:47 +0000 | [diff] [blame] | 214 | CurState = EXITS_CLEAN; |
| 215 | continue; |
| 216 | } |
| 217 | |
| Chad Rosier | 24c19d2 | 2012-08-01 18:39:17 +0000 | [diff] [blame] | 218 | // Shortcut: don't need to check regular instructions in dirty state. |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 219 | if (!IsControlFlow && CurState == EXITS_DIRTY) |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 220 | continue; |
| 221 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 222 | if (hasYmmOrZmmReg(MI)) { |
| 223 | // We found a ymm/zmm-using instruction; this could be an AVX/AVX512 |
| 224 | // instruction, or it could be control flow. |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 225 | CurState = EXITS_DIRTY; |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 226 | continue; |
| 227 | } |
| 228 | |
| 229 | // Check for control-flow out of the current function (which might |
| 230 | // indirectly execute SSE instructions). |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 231 | if (!IsControlFlow) |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 232 | continue; |
| 233 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 234 | // If the call has no RegMask, skip it as well. It usually happens on |
| 235 | // helper function calls (such as '_chkstk', '_ftol2') where standard |
| 236 | // calling convention is not used (RegMask is not used to mark register |
| Francis Visoiu Mistrih | a8a83d1 | 2017-12-07 10:40:31 +0000 | [diff] [blame] | 237 | // clobbered and register usage (def/implicit-def/use) is well-defined and |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 238 | // explicitly specified. |
| 239 | if (IsCall && !callHasRegMask(MI)) |
| Michael Liao | 14b0284 | 2013-12-03 09:17:32 +0000 | [diff] [blame] | 240 | continue; |
| 241 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 242 | // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15 |
| Sanjay Patel | 5496a23 | 2016-05-20 17:07:19 +0000 | [diff] [blame] | 243 | // registers. In addition, the processor changes back to Clean state, after |
| 244 | // which execution of SSE instructions or AVX instructions has no transition |
| 245 | // penalty. Add the VZEROUPPER instruction before any function call/return |
| 246 | // that might execute SSE code. |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 247 | // FIXME: In some cases, we may want to move the VZEROUPPER into a |
| 248 | // predecessor block. |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 249 | if (CurState == EXITS_DIRTY) { |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 250 | // After the inserted VZEROUPPER the state becomes clean again, but |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 251 | // other YMM/ZMM may appear before other subsequent calls or even before |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 252 | // the end of the BB. |
| Duncan P. N. Exon Smith | 7b4c18e | 2016-07-12 03:18:50 +0000 | [diff] [blame] | 253 | insertVZeroUpper(MI, MBB); |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 254 | CurState = EXITS_CLEAN; |
| 255 | } else if (CurState == PASS_THROUGH) { |
| 256 | // If this block is currently in pass-through state and we encounter a |
| 257 | // call then whether we need a vzeroupper or not depends on whether this |
| 258 | // block has successors that exit dirty. Record the location of the call, |
| 259 | // and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet. |
| 260 | // It will be inserted later if necessary. |
| Duncan P. N. Exon Smith | 7b4c18e | 2016-07-12 03:18:50 +0000 | [diff] [blame] | 261 | BlockStates[MBB.getNumber()].FirstUnguardedCall = MI; |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 262 | CurState = EXITS_CLEAN; |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 263 | } |
| 264 | } |
| 265 | |
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 266 | LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() << " exit state: " |
| 267 | << getBlockExitStateName(CurState) << '\n'); |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 268 | |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 269 | if (CurState == EXITS_DIRTY) |
| 270 | for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), |
| 271 | SE = MBB.succ_end(); |
| 272 | SI != SE; ++SI) |
| 273 | addDirtySuccessor(**SI); |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 274 | |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 275 | BlockStates[MBB.getNumber()].ExitState = CurState; |
| 276 | } |
| Eli Friedman | 8f24960 | 2011-11-04 23:46:11 +0000 | [diff] [blame] | 277 | |
| Sanjay Patel | 8bc63b2 | 2016-05-20 16:46:01 +0000 | [diff] [blame] | 278 | /// Loop over all of the basic blocks, inserting vzeroupper instructions before |
| 279 | /// function calls. |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 280 | bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { |
| Eric Christopher | 05b8197 | 2015-02-02 17:38:43 +0000 | [diff] [blame] | 281 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 282 | if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite()) |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 283 | return false; |
| Eric Christopher | 05b8197 | 2015-02-02 17:38:43 +0000 | [diff] [blame] | 284 | TII = ST.getInstrInfo(); |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 285 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 286 | EverMadeChange = false; |
| Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 287 | IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR; |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 288 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 289 | bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI); |
| Matthias Braun | ada0adf | 2015-01-08 00:33:48 +0000 | [diff] [blame] | 290 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 291 | // Fast check: if the function doesn't use any ymm/zmm registers, we don't |
| 292 | // need to insert any VZEROUPPER instructions. This is constant-time, so it |
| 293 | // is cheap in the common case of no ymm/zmm use. |
| 294 | bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm; |
| 295 | const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass}; |
| 296 | for (auto *RC : RCs) { |
| 297 | if (!YmmOrZmmUsed) { |
| 298 | for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; |
| 299 | i++) { |
| 300 | if (!MRI.reg_nodbg_empty(*i)) { |
| 301 | YmmOrZmmUsed = true; |
| 302 | break; |
| 303 | } |
| Matthias Braun | ada0adf | 2015-01-08 00:33:48 +0000 | [diff] [blame] | 304 | } |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 305 | } |
| 306 | } |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 307 | if (!YmmOrZmmUsed) { |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 308 | return false; |
| 309 | } |
| 310 | |
| 311 | assert(BlockStates.empty() && DirtySuccessors.empty() && |
| 312 | "X86VZeroUpper state should be clear"); |
| 313 | BlockStates.resize(MF.getNumBlockIDs()); |
| 314 | |
| 315 | // Process all blocks. This will compute block exit states, record the first |
| 316 | // unguarded call in each block, and add successors of dirty blocks to the |
| 317 | // DirtySuccessors list. |
| Sanjay Patel | fbca70d | 2015-05-05 21:20:52 +0000 | [diff] [blame] | 318 | for (MachineBasicBlock &MBB : MF) |
| 319 | processBasicBlock(MBB); |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 320 | |
| Amjad Aboud | 4f97751 | 2017-03-03 09:03:24 +0000 | [diff] [blame] | 321 | // If any YMM/ZMM regs are live-in to this function, add the entry block to |
| 322 | // the DirtySuccessors list |
| 323 | if (FnHasLiveInYmmOrZmm) |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 324 | addDirtySuccessor(MF.front()); |
| 325 | |
| Sanjay Patel | 8099fb7e | 2016-05-23 18:01:20 +0000 | [diff] [blame] | 326 | // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 327 | // vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY |
| 328 | // through PASS_THROUGH blocks. |
| 329 | while (!DirtySuccessors.empty()) { |
| 330 | MachineBasicBlock &MBB = *DirtySuccessors.back(); |
| 331 | DirtySuccessors.pop_back(); |
| 332 | BlockState &BBState = BlockStates[MBB.getNumber()]; |
| 333 | |
| 334 | // MBB is a successor of a dirty block, so its first call needs to be |
| 335 | // guarded. |
| 336 | if (BBState.FirstUnguardedCall != MBB.end()) |
| 337 | insertVZeroUpper(BBState.FirstUnguardedCall, MBB); |
| 338 | |
| Sanjay Patel | 5496a23 | 2016-05-20 17:07:19 +0000 | [diff] [blame] | 339 | // If this successor was a pass-through block, then it is now dirty. Its |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 340 | // successors need to be added to the worklist (if they haven't been |
| 341 | // already). |
| 342 | if (BBState.ExitState == PASS_THROUGH) { |
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 343 | LLVM_DEBUG(dbgs() << "MBB #" << MBB.getNumber() |
| 344 | << " was Pass-through, is now Dirty-out.\n"); |
| Sanjay Patel | 13a0d49 | 2016-05-23 18:00:50 +0000 | [diff] [blame] | 345 | for (MachineBasicBlock *Succ : MBB.successors()) |
| 346 | addDirtySuccessor(*Succ); |
| Lang Hames | 7c8189c | 2014-03-17 01:22:54 +0000 | [diff] [blame] | 347 | } |
| 348 | } |
| 349 | |
| 350 | BlockStates.clear(); |
| 351 | return EverMadeChange; |
| Bruno Cardoso Lopes | 2a3ffb5 | 2011-08-23 01:14:17 +0000 | [diff] [blame] | 352 | } |