blob: 947ca1da78b29f24031206ce63ea7615ab05cb10 [file] [log] [blame]
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +00001//===- SIMemoryLegalizer.cpp ----------------------------------------------===//
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Memory legalizer - implements memory model. More information can be
12/// found here:
13/// http://llvm.org/docs/AMDGPUUsage.html#memory-model
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000014//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "AMDGPUMachineModuleInfo.h"
19#include "AMDGPUSubtarget.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000020#include "SIDefines.h"
21#include "SIInstrInfo.h"
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000022#include "Utils/AMDGPUBaseInfo.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000023#include "llvm/ADT/None.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineFunction.h"
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000027#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000029#include "llvm/CodeGen/MachineMemOperand.h"
30#include "llvm/CodeGen/MachineModuleInfo.h"
31#include "llvm/CodeGen/MachineOperand.h"
32#include "llvm/IR/DebugLoc.h"
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000033#include "llvm/IR/DiagnosticInfo.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000034#include "llvm/IR/Function.h"
35#include "llvm/IR/LLVMContext.h"
36#include "llvm/MC/MCInstrDesc.h"
37#include "llvm/Pass.h"
38#include "llvm/Support/AtomicOrdering.h"
39#include <cassert>
40#include <list>
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000041
42using namespace llvm;
43using namespace llvm::AMDGPU;
44
45#define DEBUG_TYPE "si-memory-legalizer"
46#define PASS_NAME "SI Memory Legalizer"
47
48namespace {
49
50class SIMemoryLegalizer final : public MachineFunctionPass {
51private:
52 struct AtomicInfo final {
53 SyncScope::ID SSID = SyncScope::System;
54 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent;
55 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent;
56
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000057 AtomicInfo() = default;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000058
59 AtomicInfo(SyncScope::ID SSID,
60 AtomicOrdering Ordering,
61 AtomicOrdering FailureOrdering)
62 : SSID(SSID),
63 Ordering(Ordering),
64 FailureOrdering(FailureOrdering) {}
65
66 AtomicInfo(const MachineMemOperand *MMO)
67 : SSID(MMO->getSyncScopeID()),
68 Ordering(MMO->getOrdering()),
69 FailureOrdering(MMO->getFailureOrdering()) {}
70 };
71
72 /// \brief LLVM context.
73 LLVMContext *CTX = nullptr;
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000074
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000075 /// \brief Machine module info.
76 const AMDGPUMachineModuleInfo *MMI = nullptr;
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000077
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000078 /// \brief Instruction info.
79 const SIInstrInfo *TII = nullptr;
80
81 /// \brief Immediate for "vmcnt(0)".
82 unsigned Vmcnt0Immediate = 0;
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000083
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000084 /// \brief Opcode for cache invalidation instruction (L1).
85 unsigned Wbinvl1Opcode = 0;
86
87 /// \brief List of atomic pseudo instructions.
88 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
89
90 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
91 /// Always returns true.
92 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
93 bool Before = true) const;
94 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
95 /// Always returns true.
96 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
97 bool Before = true) const;
98
99 /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is
100 /// modified, false otherwise.
101 bool setGLC(const MachineBasicBlock::iterator &MI) const;
102
103 /// \brief Removes all processed atomic pseudo instructions from the current
104 /// function. Returns true if current function is modified, false otherwise.
105 bool removeAtomicPseudoMIs();
106
107 /// \brief Reports unknown synchronization scope used in \p MI to LLVM
108 /// context.
109 void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI);
110
111 /// \returns Atomic fence info if \p MI is an atomic fence operation,
112 /// "None" otherwise.
113 Optional<AtomicInfo> getAtomicFenceInfo(
114 const MachineBasicBlock::iterator &MI) const;
115 /// \returns Atomic load info if \p MI is an atomic load operation,
116 /// "None" otherwise.
117 Optional<AtomicInfo> getAtomicLoadInfo(
118 const MachineBasicBlock::iterator &MI) const;
119 /// \returns Atomic store info if \p MI is an atomic store operation,
120 /// "None" otherwise.
121 Optional<AtomicInfo> getAtomicStoreInfo(
122 const MachineBasicBlock::iterator &MI) const;
123 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
124 /// "None" otherwise.
125 Optional<AtomicInfo> getAtomicCmpxchgInfo(
126 const MachineBasicBlock::iterator &MI) const;
127 /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
128 /// "None" otherwise.
129 Optional<AtomicInfo> getAtomicRmwInfo(
130 const MachineBasicBlock::iterator &MI) const;
131
132 /// \brief Expands atomic fence operation \p MI. Returns true if
133 /// instructions are added/deleted or \p MI is modified, false otherwise.
134 bool expandAtomicFence(const AtomicInfo &AI,
135 MachineBasicBlock::iterator &MI);
136 /// \brief Expands atomic load operation \p MI. Returns true if
137 /// instructions are added/deleted or \p MI is modified, false otherwise.
138 bool expandAtomicLoad(const AtomicInfo &AI,
139 MachineBasicBlock::iterator &MI);
140 /// \brief Expands atomic store operation \p MI. Returns true if
141 /// instructions are added/deleted or \p MI is modified, false otherwise.
142 bool expandAtomicStore(const AtomicInfo &AI,
143 MachineBasicBlock::iterator &MI);
144 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
145 /// instructions are added/deleted or \p MI is modified, false otherwise.
146 bool expandAtomicCmpxchg(const AtomicInfo &AI,
147 MachineBasicBlock::iterator &MI);
148 /// \brief Expands atomic rmw operation \p MI. Returns true if
149 /// instructions are added/deleted or \p MI is modified, false otherwise.
150 bool expandAtomicRmw(const AtomicInfo &AI,
151 MachineBasicBlock::iterator &MI);
152
153public:
154 static char ID;
155
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +0000156 SIMemoryLegalizer() : MachineFunctionPass(ID) {}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000157
158 void getAnalysisUsage(AnalysisUsage &AU) const override {
159 AU.setPreservesCFG();
160 MachineFunctionPass::getAnalysisUsage(AU);
161 }
162
163 StringRef getPassName() const override {
164 return PASS_NAME;
165 }
166
167 bool runOnMachineFunction(MachineFunction &MF) override;
168};
169
170} // end namespace anonymous
171
172bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
173 bool Before) const {
174 MachineBasicBlock &MBB = *MI->getParent();
175 DebugLoc DL = MI->getDebugLoc();
176
177 if (!Before)
178 ++MI;
179
180 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
181
182 if (!Before)
183 --MI;
184
185 return true;
186}
187
188bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
189 bool Before) const {
190 MachineBasicBlock &MBB = *MI->getParent();
191 DebugLoc DL = MI->getDebugLoc();
192
193 if (!Before)
194 ++MI;
195
196 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
197
198 if (!Before)
199 --MI;
200
201 return true;
202}
203
204bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const {
205 int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc);
206 if (GLCIdx == -1)
207 return false;
208
209 MachineOperand &GLC = MI->getOperand(GLCIdx);
210 if (GLC.getImm() == 1)
211 return false;
212
213 GLC.setImm(1);
214 return true;
215}
216
217bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
218 if (AtomicPseudoMIs.empty())
219 return false;
220
221 for (auto &MI : AtomicPseudoMIs)
222 MI->eraseFromParent();
223
224 AtomicPseudoMIs.clear();
225 return true;
226}
227
228void SIMemoryLegalizer::reportUnknownSynchScope(
229 const MachineBasicBlock::iterator &MI) {
230 DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(),
231 "Unsupported synchronization scope");
232 CTX->diagnose(Diag);
233}
234
235Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo(
236 const MachineBasicBlock::iterator &MI) const {
237 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
238
239 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
240 return None;
241
242 SyncScope::ID SSID =
243 static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
244 AtomicOrdering Ordering =
245 static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
246 return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic);
247}
248
249Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo(
250 const MachineBasicBlock::iterator &MI) const {
251 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
252
253 if (!(MI->mayLoad() && !MI->mayStore()))
254 return None;
255 if (!MI->hasOneMemOperand())
256 return AtomicInfo();
257
258 const MachineMemOperand *MMO = *MI->memoperands_begin();
259 if (!MMO->isAtomic())
260 return None;
261
262 return AtomicInfo(MMO);
263}
264
265Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo(
266 const MachineBasicBlock::iterator &MI) const {
267 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
268
269 if (!(!MI->mayLoad() && MI->mayStore()))
270 return None;
271 if (!MI->hasOneMemOperand())
272 return AtomicInfo();
273
274 const MachineMemOperand *MMO = *MI->memoperands_begin();
275 if (!MMO->isAtomic())
276 return None;
277
278 return AtomicInfo(MMO);
279}
280
281Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo(
282 const MachineBasicBlock::iterator &MI) const {
283 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
284
285 if (!(MI->mayLoad() && MI->mayStore()))
286 return None;
287 if (!MI->hasOneMemOperand())
288 return AtomicInfo();
289
290 const MachineMemOperand *MMO = *MI->memoperands_begin();
291 if (!MMO->isAtomic())
292 return None;
293 if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic)
294 return None;
295
296 return AtomicInfo(MMO);
297}
298
299Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo(
300 const MachineBasicBlock::iterator &MI) const {
301 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
302
303 if (!(MI->mayLoad() && MI->mayStore()))
304 return None;
305 if (!MI->hasOneMemOperand())
306 return AtomicInfo();
307
308 const MachineMemOperand *MMO = *MI->memoperands_begin();
309 if (!MMO->isAtomic())
310 return None;
311 if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
312 return None;
313
314 return AtomicInfo(MMO);
315}
316
317bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI,
318 MachineBasicBlock::iterator &MI) {
319 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
320
321 bool Changed = false;
322 if (AI.SSID == SyncScope::System ||
323 AI.SSID == MMI->getAgentSSID()) {
324 if (AI.Ordering == AtomicOrdering::Acquire ||
325 AI.Ordering == AtomicOrdering::Release ||
326 AI.Ordering == AtomicOrdering::AcquireRelease ||
327 AI.Ordering == AtomicOrdering::SequentiallyConsistent)
328 Changed |= insertWaitcntVmcnt0(MI);
329
330 if (AI.Ordering == AtomicOrdering::Acquire ||
331 AI.Ordering == AtomicOrdering::AcquireRelease ||
332 AI.Ordering == AtomicOrdering::SequentiallyConsistent)
333 Changed |= insertBufferWbinvl1Vol(MI);
334
335 AtomicPseudoMIs.push_back(MI);
336 return Changed;
337 } else if (AI.SSID == SyncScope::SingleThread ||
338 AI.SSID == MMI->getWorkgroupSSID() ||
339 AI.SSID == MMI->getWavefrontSSID()) {
340 AtomicPseudoMIs.push_back(MI);
341 return Changed;
342 } else {
343 reportUnknownSynchScope(MI);
344 return Changed;
345 }
346}
347
348bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI,
349 MachineBasicBlock::iterator &MI) {
350 assert(MI->mayLoad() && !MI->mayStore());
351
352 bool Changed = false;
353 if (AI.SSID == SyncScope::System ||
354 AI.SSID == MMI->getAgentSSID()) {
355 if (AI.Ordering == AtomicOrdering::Acquire ||
356 AI.Ordering == AtomicOrdering::SequentiallyConsistent)
357 Changed |= setGLC(MI);
358
359 if (AI.Ordering == AtomicOrdering::SequentiallyConsistent)
360 Changed |= insertWaitcntVmcnt0(MI);
361
362 if (AI.Ordering == AtomicOrdering::Acquire ||
363 AI.Ordering == AtomicOrdering::SequentiallyConsistent) {
364 Changed |= insertWaitcntVmcnt0(MI, false);
365 Changed |= insertBufferWbinvl1Vol(MI, false);
366 }
367
368 return Changed;
369 } else if (AI.SSID == SyncScope::SingleThread ||
370 AI.SSID == MMI->getWorkgroupSSID() ||
371 AI.SSID == MMI->getWavefrontSSID()) {
372 return Changed;
373 } else {
374 reportUnknownSynchScope(MI);
375 return Changed;
376 }
377}
378
379bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI,
380 MachineBasicBlock::iterator &MI) {
381 assert(!MI->mayLoad() && MI->mayStore());
382
383 bool Changed = false;
384 if (AI.SSID == SyncScope::System ||
385 AI.SSID == MMI->getAgentSSID()) {
386 if (AI.Ordering == AtomicOrdering::Release ||
387 AI.Ordering == AtomicOrdering::SequentiallyConsistent)
388 Changed |= insertWaitcntVmcnt0(MI);
389
390 return Changed;
391 } else if (AI.SSID == SyncScope::SingleThread ||
392 AI.SSID == MMI->getWorkgroupSSID() ||
393 AI.SSID == MMI->getWavefrontSSID()) {
394 return Changed;
395 } else {
396 reportUnknownSynchScope(MI);
397 return Changed;
398 }
399}
400
401bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI,
402 MachineBasicBlock::iterator &MI) {
403 assert(MI->mayLoad() && MI->mayStore());
404
405 bool Changed = false;
406 if (AI.SSID == SyncScope::System ||
407 AI.SSID == MMI->getAgentSSID()) {
408 if (AI.Ordering == AtomicOrdering::Release ||
409 AI.Ordering == AtomicOrdering::AcquireRelease ||
410 AI.Ordering == AtomicOrdering::SequentiallyConsistent ||
411 AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent)
412 Changed |= insertWaitcntVmcnt0(MI);
413
414 if (AI.Ordering == AtomicOrdering::Acquire ||
415 AI.Ordering == AtomicOrdering::AcquireRelease ||
416 AI.Ordering == AtomicOrdering::SequentiallyConsistent ||
417 AI.FailureOrdering == AtomicOrdering::Acquire ||
418 AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) {
419 Changed |= insertWaitcntVmcnt0(MI, false);
420 Changed |= insertBufferWbinvl1Vol(MI, false);
421 }
422
423 return Changed;
424 } else if (AI.SSID == SyncScope::SingleThread ||
425 AI.SSID == MMI->getWorkgroupSSID() ||
426 AI.SSID == MMI->getWavefrontSSID()) {
427 Changed |= setGLC(MI);
428 return Changed;
429 } else {
430 reportUnknownSynchScope(MI);
431 return Changed;
432 }
433}
434
435bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI,
436 MachineBasicBlock::iterator &MI) {
437 assert(MI->mayLoad() && MI->mayStore());
438
439 bool Changed = false;
440 if (AI.SSID == SyncScope::System ||
441 AI.SSID == MMI->getAgentSSID()) {
442 if (AI.Ordering == AtomicOrdering::Release ||
443 AI.Ordering == AtomicOrdering::AcquireRelease ||
444 AI.Ordering == AtomicOrdering::SequentiallyConsistent)
445 Changed |= insertWaitcntVmcnt0(MI);
446
447 if (AI.Ordering == AtomicOrdering::Acquire ||
448 AI.Ordering == AtomicOrdering::AcquireRelease ||
449 AI.Ordering == AtomicOrdering::SequentiallyConsistent) {
450 Changed |= insertWaitcntVmcnt0(MI, false);
451 Changed |= insertBufferWbinvl1Vol(MI, false);
452 }
453
454 return Changed;
455 } else if (AI.SSID == SyncScope::SingleThread ||
456 AI.SSID == MMI->getWorkgroupSSID() ||
457 AI.SSID == MMI->getWavefrontSSID()) {
458 Changed |= setGLC(MI);
459 return Changed;
460 } else {
461 reportUnknownSynchScope(MI);
462 return Changed;
463 }
464}
465
466bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
467 bool Changed = false;
468 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
469 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
470
471 CTX = &MF.getFunction()->getContext();
472 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
473 TII = ST.getInstrInfo();
474
475 Vmcnt0Immediate =
476 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
477 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
478 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
479
480 for (auto &MBB : MF) {
481 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
482 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
483 continue;
484
485 if (const auto &AI = getAtomicFenceInfo(MI))
486 Changed |= expandAtomicFence(AI.getValue(), MI);
487 else if (const auto &AI = getAtomicLoadInfo(MI))
488 Changed |= expandAtomicLoad(AI.getValue(), MI);
489 else if (const auto &AI = getAtomicStoreInfo(MI))
490 Changed |= expandAtomicStore(AI.getValue(), MI);
491 else if (const auto &AI = getAtomicCmpxchgInfo(MI))
492 Changed |= expandAtomicCmpxchg(AI.getValue(), MI);
493 else if (const auto &AI = getAtomicRmwInfo(MI))
494 Changed |= expandAtomicRmw(AI.getValue(), MI);
495 }
496 }
497
498 Changed |= removeAtomicPseudoMIs();
499 return Changed;
500}
501
502INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
503
504char SIMemoryLegalizer::ID = 0;
505char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
506
507FunctionPass *llvm::createSIMemoryLegalizerPass() {
508 return new SIMemoryLegalizer();
509}