blob: 938cdaf1ef8fb4f4a8f5d7311288596872f1179d [file] [log] [blame]
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +00001//===- SIMemoryLegalizer.cpp ----------------------------------------------===//
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000011/// Memory legalizer - implements memory model. More information can be
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000012/// found here:
13/// http://llvm.org/docs/AMDGPUUsage.html#memory-model
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000014//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "AMDGPUMachineModuleInfo.h"
19#include "AMDGPUSubtarget.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000020#include "SIDefines.h"
21#include "SIInstrInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000022#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000023#include "Utils/AMDGPUBaseInfo.h"
Tony Tyea5a7c332018-06-07 22:28:32 +000024#include "llvm/ADT/BitmaskEnum.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000025#include "llvm/ADT/None.h"
26#include "llvm/ADT/Optional.h"
27#include "llvm/CodeGen/MachineBasicBlock.h"
28#include "llvm/CodeGen/MachineFunction.h"
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000029#include "llvm/CodeGen/MachineFunctionPass.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000031#include "llvm/CodeGen/MachineMemOperand.h"
32#include "llvm/CodeGen/MachineModuleInfo.h"
33#include "llvm/CodeGen/MachineOperand.h"
34#include "llvm/IR/DebugLoc.h"
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000035#include "llvm/IR/DiagnosticInfo.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000036#include "llvm/IR/Function.h"
37#include "llvm/IR/LLVMContext.h"
38#include "llvm/MC/MCInstrDesc.h"
39#include "llvm/Pass.h"
40#include "llvm/Support/AtomicOrdering.h"
Tony Tyea5a7c332018-06-07 22:28:32 +000041#include "llvm/Support/MathExtras.h"
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +000042#include <cassert>
43#include <list>
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +000044
45using namespace llvm;
46using namespace llvm::AMDGPU;
47
48#define DEBUG_TYPE "si-memory-legalizer"
49#define PASS_NAME "SI Memory Legalizer"
50
51namespace {
52
Tony Tyea5a7c332018-06-07 22:28:32 +000053LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
54
55/// Memory operation flags. Can be ORed together.
56enum class SIMemOp {
57 NONE = 0u,
58 LOAD = 1u << 0,
59 STORE = 1u << 1,
60 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
61};
62
63/// Position to insert a new instruction relative to an existing
64/// instruction.
65enum class Position {
66 BEFORE,
67 AFTER
68};
69
70/// The atomic synchronization scopes supported by the AMDGPU target.
71enum class SIAtomicScope {
72 NONE,
73 SINGLETHREAD,
74 WAVEFRONT,
75 WORKGROUP,
76 AGENT,
77 SYSTEM
78};
79
80/// The distinct address spaces supported by the AMDGPU target for
81/// atomic memory operation. Can be ORed toether.
82enum class SIAtomicAddrSpace {
83 NONE = 0u,
84 GLOBAL = 1u << 0,
85 LDS = 1u << 1,
86 SCRATCH = 1u << 2,
87 GDS = 1u << 3,
88 OTHER = 1u << 4,
89
90 /// The address spaces that can be accessed by a FLAT instruction.
91 FLAT = GLOBAL | LDS | SCRATCH,
92
93 /// The address spaces that support atomic instructions.
94 ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
95
96 /// All address spaces.
97 ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
98
99 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
100};
101
102/// Sets named bit \p BitName to "true" if present in instruction \p MI.
103/// \returns Returns true if \p MI is modified, false otherwise.
104template <uint16_t BitName>
105bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
106 int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
107 if (BitIdx == -1)
108 return false;
109
110 MachineOperand &Bit = MI->getOperand(BitIdx);
111 if (Bit.getImm() != 0)
112 return false;
113
114 Bit.setImm(1);
115 return true;
116}
117
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000118class SIMemOpInfo final {
119private:
Tony Tyea5a7c332018-06-07 22:28:32 +0000120
121 friend class SIMemOpAccess;
122
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000123 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
124 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
Tony Tyea5a7c332018-06-07 22:28:32 +0000125 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
126 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
127 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
128 bool IsCrossAddressSpaceOrdering = false;
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000129 bool IsNonTemporal = false;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000130
Tony Tyea5a7c332018-06-07 22:28:32 +0000131 SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
132 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
133 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
134 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
135 bool IsCrossAddressSpaceOrdering = true,
136 AtomicOrdering FailureOrdering =
137 AtomicOrdering::SequentiallyConsistent,
138 bool IsNonTemporal = false)
139 : Ordering(Ordering), FailureOrdering(FailureOrdering),
140 Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
141 InstrAddrSpace(InstrAddrSpace),
142 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
143 IsNonTemporal(IsNonTemporal) {
144 // There is also no cross address space ordering if the ordering
145 // address space is the same as the instruction address space and
146 // only contains a single address space.
147 if ((OrderingAddrSpace == InstrAddrSpace) &&
148 isPowerOf2_32(uint32_t(InstrAddrSpace)))
149 IsCrossAddressSpaceOrdering = false;
150 }
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000151
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000152public:
Tony Tyea5a7c332018-06-07 22:28:32 +0000153 /// \returns Atomic synchronization scope of the machine instruction used to
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000154 /// create this SIMemOpInfo.
Tony Tyea5a7c332018-06-07 22:28:32 +0000155 SIAtomicScope getScope() const {
156 return Scope;
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000157 }
Tony Tyea5a7c332018-06-07 22:28:32 +0000158
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000159 /// \returns Ordering constraint of the machine instruction used to
160 /// create this SIMemOpInfo.
161 AtomicOrdering getOrdering() const {
162 return Ordering;
163 }
Tony Tyea5a7c332018-06-07 22:28:32 +0000164
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000165 /// \returns Failure ordering constraint of the machine instruction used to
166 /// create this SIMemOpInfo.
167 AtomicOrdering getFailureOrdering() const {
168 return FailureOrdering;
169 }
Tony Tyea5a7c332018-06-07 22:28:32 +0000170
171 /// \returns The address spaces be accessed by the machine
172 /// instruction used to create this SiMemOpInfo.
173 SIAtomicAddrSpace getInstrAddrSpace() const {
174 return InstrAddrSpace;
175 }
176
177 /// \returns The address spaces that must be ordered by the machine
178 /// instruction used to create this SiMemOpInfo.
179 SIAtomicAddrSpace getOrderingAddrSpace() const {
180 return OrderingAddrSpace;
181 }
182
183 /// \returns Return true iff memory ordering of operations on
184 /// different address spaces is required.
185 bool getIsCrossAddressSpaceOrdering() const {
186 return IsCrossAddressSpaceOrdering;
187 }
188
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000189 /// \returns True if memory access of the machine instruction used to
190 /// create this SIMemOpInfo is non-temporal, false otherwise.
191 bool isNonTemporal() const {
192 return IsNonTemporal;
193 }
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000194
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000195 /// \returns True if ordering constraint of the machine instruction used to
196 /// create this SIMemOpInfo is unordered or higher, false otherwise.
197 bool isAtomic() const {
198 return Ordering != AtomicOrdering::NotAtomic;
199 }
200
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000201};
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000202
Tony Tyea5a7c332018-06-07 22:28:32 +0000203class SIMemOpAccess final {
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000204private:
Tony Tyea5a7c332018-06-07 22:28:32 +0000205
206 AMDGPUAS SIAddrSpaceInfo;
207 AMDGPUMachineModuleInfo *MMI = nullptr;
208
209 /// Reports unsupported message \p Msg for \p MI to LLVM context.
210 void reportUnsupported(const MachineBasicBlock::iterator &MI,
211 const char *Msg) const;
212
213 /// Inspects the target synchonization scope \p SSID and determines
214 /// the SI atomic scope it corresponds to, the address spaces it
215 /// covers, and whether the memory ordering applies between address
216 /// spaces.
217 Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
218 toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
219
220 /// \return Return a bit set of the address spaces accessed by \p AS.
221 SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
222
223 /// \returns Info constructed from \p MI, which has at least machine memory
224 /// operand.
225 Optional<SIMemOpInfo> constructFromMIWithMMO(
226 const MachineBasicBlock::iterator &MI) const;
227
228public:
229 /// Construct class to support accessing the machine memory operands
230 /// of instructions in the machine function \p MF.
231 SIMemOpAccess(MachineFunction &MF);
232
233 /// \returns Load info if \p MI is a load operation, "None" otherwise.
234 Optional<SIMemOpInfo> getLoadInfo(
235 const MachineBasicBlock::iterator &MI) const;
236
237 /// \returns Store info if \p MI is a store operation, "None" otherwise.
238 Optional<SIMemOpInfo> getStoreInfo(
239 const MachineBasicBlock::iterator &MI) const;
240
241 /// \returns Atomic fence info if \p MI is an atomic fence operation,
242 /// "None" otherwise.
243 Optional<SIMemOpInfo> getAtomicFenceInfo(
244 const MachineBasicBlock::iterator &MI) const;
245
246 /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
247 /// rmw operation, "None" otherwise.
248 Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
249 const MachineBasicBlock::iterator &MI) const;
250};
251
252class SICacheControl {
253protected:
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +0000254
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000255 /// Instruction info.
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000256 const SIInstrInfo *TII = nullptr;
257
Tony Tyea5a7c332018-06-07 22:28:32 +0000258 IsaInfo::IsaVersion IV;
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +0000259
Tom Stellard5bfbae52018-07-11 20:59:01 +0000260 SICacheControl(const GCNSubtarget &ST);
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000261
Tony Tyea5a7c332018-06-07 22:28:32 +0000262public:
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000263
Tony Tyea5a7c332018-06-07 22:28:32 +0000264 /// Create a cache control for the subtarget \p ST.
Tom Stellard5bfbae52018-07-11 20:59:01 +0000265 static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000266
Tony Tyea5a7c332018-06-07 22:28:32 +0000267 /// Update \p MI memory load instruction to bypass any caches up to
268 /// the \p Scope memory scope for address spaces \p
269 /// AddrSpace. Return true iff the instruction was modified.
270 virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
271 SIAtomicScope Scope,
272 SIAtomicAddrSpace AddrSpace) const = 0;
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000273
Tony Tyea5a7c332018-06-07 22:28:32 +0000274 /// Update \p MI memory instruction to indicate it is
275 /// nontemporal. Return true iff the instruction was modified.
276 virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
277 const = 0;
278
279 /// Inserts any necessary instructions at position \p Pos relative
280 /// to instruction \p MI to ensure any caches associated with
281 /// address spaces \p AddrSpace for memory scopes up to memory scope
282 /// \p Scope are invalidated. Returns true iff any instructions
283 /// inserted.
284 virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
285 SIAtomicScope Scope,
286 SIAtomicAddrSpace AddrSpace,
287 Position Pos) const = 0;
288
289 /// Inserts any necessary instructions at position \p Pos relative
290 /// to instruction \p MI to ensure memory instructions of kind \p Op
291 /// associated with address spaces \p AddrSpace have completed as
292 /// observed by other memory instructions executing in memory scope
293 /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
294 /// ordering is between address spaces. Returns true iff any
295 /// instructions inserted.
296 virtual bool insertWait(MachineBasicBlock::iterator &MI,
297 SIAtomicScope Scope,
298 SIAtomicAddrSpace AddrSpace,
299 SIMemOp Op,
300 bool IsCrossAddrSpaceOrdering,
301 Position Pos) const = 0;
Tony Tye6db1f5d2018-06-08 01:00:11 +0000302
303 /// Virtual destructor to allow derivations to be deleted.
304 virtual ~SICacheControl() = default;
305
Tony Tyea5a7c332018-06-07 22:28:32 +0000306};
307
308class SIGfx6CacheControl : public SICacheControl {
309protected:
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000310
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000311 /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000312 /// is modified, false otherwise.
313 bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
314 return enableNamedBit<AMDGPU::OpName::glc>(MI);
315 }
316
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000317 /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000318 /// is modified, false otherwise.
319 bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
320 return enableNamedBit<AMDGPU::OpName::slc>(MI);
321 }
322
Tony Tyea5a7c332018-06-07 22:28:32 +0000323public:
324
Tom Stellard5bfbae52018-07-11 20:59:01 +0000325 SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
Tony Tyea5a7c332018-06-07 22:28:32 +0000326
327 bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
328 SIAtomicScope Scope,
329 SIAtomicAddrSpace AddrSpace) const override;
330
331 bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
332
333 bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
334 SIAtomicScope Scope,
335 SIAtomicAddrSpace AddrSpace,
336 Position Pos) const override;
337
338 bool insertWait(MachineBasicBlock::iterator &MI,
339 SIAtomicScope Scope,
340 SIAtomicAddrSpace AddrSpace,
341 SIMemOp Op,
342 bool IsCrossAddrSpaceOrdering,
343 Position Pos) const override;
344};
345
346class SIGfx7CacheControl : public SIGfx6CacheControl {
347public:
348
Tom Stellard5bfbae52018-07-11 20:59:01 +0000349 SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
Tony Tyea5a7c332018-06-07 22:28:32 +0000350
351 bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
352 SIAtomicScope Scope,
353 SIAtomicAddrSpace AddrSpace,
354 Position Pos) const override;
355
356};
357
358class SIMemoryLegalizer final : public MachineFunctionPass {
359private:
360
361 /// Cache Control.
362 std::unique_ptr<SICacheControl> CC = nullptr;
363
364 /// List of atomic pseudo instructions.
365 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
366
367 /// Return true iff instruction \p MI is a atomic instruction that
368 /// returns a result.
369 bool isAtomicRet(const MachineInstr &MI) const {
370 return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
371 }
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000372
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000373 /// Removes all processed atomic pseudo instructions from the current
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000374 /// function. Returns true if current function is modified, false otherwise.
375 bool removeAtomicPseudoMIs();
376
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000377 /// Expands load operation \p MI. Returns true if instructions are
Konstantin Zhuravlyovf5d826a2017-08-18 17:30:02 +0000378 /// added/deleted or \p MI is modified, false otherwise.
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000379 bool expandLoad(const SIMemOpInfo &MOI,
380 MachineBasicBlock::iterator &MI);
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000381 /// Expands store operation \p MI. Returns true if instructions are
Konstantin Zhuravlyovf5d826a2017-08-18 17:30:02 +0000382 /// added/deleted or \p MI is modified, false otherwise.
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000383 bool expandStore(const SIMemOpInfo &MOI,
384 MachineBasicBlock::iterator &MI);
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000385 /// Expands atomic fence operation \p MI. Returns true if
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000386 /// instructions are added/deleted or \p MI is modified, false otherwise.
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000387 bool expandAtomicFence(const SIMemOpInfo &MOI,
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000388 MachineBasicBlock::iterator &MI);
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000389 /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000390 /// instructions are added/deleted or \p MI is modified, false otherwise.
Stanislav Mekhanoshin9c6cd042018-02-09 06:05:33 +0000391 bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
392 MachineBasicBlock::iterator &MI);
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000393
394public:
395 static char ID;
396
Eugene Zelenkoc8fbf6f2017-08-10 00:46:15 +0000397 SIMemoryLegalizer() : MachineFunctionPass(ID) {}
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000398
399 void getAnalysisUsage(AnalysisUsage &AU) const override {
400 AU.setPreservesCFG();
401 MachineFunctionPass::getAnalysisUsage(AU);
402 }
403
404 StringRef getPassName() const override {
405 return PASS_NAME;
406 }
407
408 bool runOnMachineFunction(MachineFunction &MF) override;
409};
410
411} // end namespace anonymous
412
Tony Tyea5a7c332018-06-07 22:28:32 +0000413void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
414 const char *Msg) const {
415 const Function &Func = MI->getParent()->getParent()->getFunction();
416 DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
417 Func.getContext().diagnose(Diag);
418}
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000419
Tony Tyea5a7c332018-06-07 22:28:32 +0000420Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
421SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
422 SIAtomicAddrSpace InstrScope) const {
423 /// TODO: For now assume OpenCL memory model which treats each
424 /// address space as having a separate happens-before relation, and
425 /// so an instruction only has ordering with respect to the address
426 /// space it accesses, and if it accesses multiple address spaces it
427 /// does not require ordering of operations in different address
428 /// spaces.
429 if (SSID == SyncScope::System)
430 return std::make_tuple(SIAtomicScope::SYSTEM,
431 SIAtomicAddrSpace::ATOMIC & InstrScope,
432 false);
433 if (SSID == MMI->getAgentSSID())
434 return std::make_tuple(SIAtomicScope::AGENT,
435 SIAtomicAddrSpace::ATOMIC & InstrScope,
436 false);
437 if (SSID == MMI->getWorkgroupSSID())
438 return std::make_tuple(SIAtomicScope::WORKGROUP,
439 SIAtomicAddrSpace::ATOMIC & InstrScope,
440 false);
441 if (SSID == MMI->getWavefrontSSID())
442 return std::make_tuple(SIAtomicScope::WAVEFRONT,
443 SIAtomicAddrSpace::ATOMIC & InstrScope,
444 false);
445 if (SSID == SyncScope::SingleThread)
446 return std::make_tuple(SIAtomicScope::SINGLETHREAD,
447 SIAtomicAddrSpace::ATOMIC & InstrScope,
448 false);
449 /// TODO: To support HSA Memory Model need to add additional memory
450 /// scopes that specify that do require cross address space
451 /// ordering.
452 return None;
453}
454
455SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
456 if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
457 return SIAtomicAddrSpace::FLAT;
458 if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
459 return SIAtomicAddrSpace::GLOBAL;
460 if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
461 return SIAtomicAddrSpace::LDS;
462 if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
463 return SIAtomicAddrSpace::SCRATCH;
464 if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
465 return SIAtomicAddrSpace::GDS;
466
467 return SIAtomicAddrSpace::OTHER;
468}
469
470SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
471 SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
472 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
473}
474
475Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
476 const MachineBasicBlock::iterator &MI) const {
477 assert(MI->getNumMemOperands() > 0);
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000478
479 SyncScope::ID SSID = SyncScope::SingleThread;
480 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
481 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
Tony Tyea5a7c332018-06-07 22:28:32 +0000482 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000483 bool IsNonTemporal = true;
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000484
485 // Validator should check whether or not MMOs cover the entire set of
486 // locations accessed by the memory instruction.
487 for (const auto &MMO : MI->memoperands()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000488 IsNonTemporal &= MMO->isNonTemporal();
489 InstrAddrSpace |=
490 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
491 AtomicOrdering OpOrdering = MMO->getOrdering();
492 if (OpOrdering != AtomicOrdering::NotAtomic) {
493 const auto &IsSyncScopeInclusion =
494 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
495 if (!IsSyncScopeInclusion) {
496 reportUnsupported(MI,
497 "Unsupported non-inclusive atomic synchronization scope");
498 return None;
499 }
500
501 SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
502 Ordering =
503 isStrongerThan(Ordering, OpOrdering) ?
504 Ordering : MMO->getOrdering();
505 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
506 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
507 FailureOrdering =
508 isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
509 FailureOrdering : MMO->getFailureOrdering();
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000510 }
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000511 }
512
Tony Tyea5a7c332018-06-07 22:28:32 +0000513 SIAtomicScope Scope = SIAtomicScope::NONE;
514 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
515 bool IsCrossAddressSpaceOrdering = false;
516 if (Ordering != AtomicOrdering::NotAtomic) {
517 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
518 if (!ScopeOrNone) {
519 reportUnsupported(MI, "Unsupported atomic synchronization scope");
520 return None;
521 }
522 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
523 ScopeOrNone.getValue();
524 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
525 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
526 reportUnsupported(MI, "Unsupported atomic address space");
527 return None;
528 }
529 }
530 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
531 IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000532}
533
Tony Tyea5a7c332018-06-07 22:28:32 +0000534Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
535 const MachineBasicBlock::iterator &MI) const {
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000536 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
537
538 if (!(MI->mayLoad() && !MI->mayStore()))
539 return None;
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000540
541 // Be conservative if there are no memory operands.
542 if (MI->getNumMemOperands() == 0)
Tony Tyea5a7c332018-06-07 22:28:32 +0000543 return SIMemOpInfo();
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000544
Tony Tyea5a7c332018-06-07 22:28:32 +0000545 return constructFromMIWithMMO(MI);
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000546}
547
Tony Tyea5a7c332018-06-07 22:28:32 +0000548Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
549 const MachineBasicBlock::iterator &MI) const {
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000550 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
551
552 if (!(!MI->mayLoad() && MI->mayStore()))
553 return None;
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000554
555 // Be conservative if there are no memory operands.
556 if (MI->getNumMemOperands() == 0)
Tony Tyea5a7c332018-06-07 22:28:32 +0000557 return SIMemOpInfo();
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000558
Tony Tyea5a7c332018-06-07 22:28:32 +0000559 return constructFromMIWithMMO(MI);
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000560}
561
Tony Tyea5a7c332018-06-07 22:28:32 +0000562Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
563 const MachineBasicBlock::iterator &MI) const {
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000564 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
565
566 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
567 return None;
568
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000569 AtomicOrdering Ordering =
Tony Tyea5a7c332018-06-07 22:28:32 +0000570 static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
571
572 SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
573 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
574 if (!ScopeOrNone) {
575 reportUnsupported(MI, "Unsupported atomic synchronization scope");
576 return None;
577 }
578
579 SIAtomicScope Scope = SIAtomicScope::NONE;
580 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
581 bool IsCrossAddressSpaceOrdering = false;
582 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
583 ScopeOrNone.getValue();
584
585 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
586 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
587 reportUnsupported(MI, "Unsupported atomic address space");
588 return None;
589 }
590
591 return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
592 IsCrossAddressSpaceOrdering);
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000593}
594
Tony Tyea5a7c332018-06-07 22:28:32 +0000595Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
596 const MachineBasicBlock::iterator &MI) const {
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000597 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
598
599 if (!(MI->mayLoad() && MI->mayStore()))
600 return None;
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000601
602 // Be conservative if there are no memory operands.
603 if (MI->getNumMemOperands() == 0)
Tony Tyea5a7c332018-06-07 22:28:32 +0000604 return SIMemOpInfo();
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000605
Tony Tyea5a7c332018-06-07 22:28:32 +0000606 return constructFromMIWithMMO(MI);
607}
608
Tom Stellard5bfbae52018-07-11 20:59:01 +0000609SICacheControl::SICacheControl(const GCNSubtarget &ST) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000610 TII = ST.getInstrInfo();
611 IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000612}
613
614/* static */
Tom Stellard5bfbae52018-07-11 20:59:01 +0000615std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
616 GCNSubtarget::Generation Generation = ST.getGeneration();
Tony Tyea5a7c332018-06-07 22:28:32 +0000617 if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
618 return make_unique<SIGfx6CacheControl>(ST);
619 return make_unique<SIGfx7CacheControl>(ST);
Konstantin Zhuravlyov1aa667f2017-09-05 16:41:25 +0000620}
621
Tony Tyea5a7c332018-06-07 22:28:32 +0000622bool SIGfx6CacheControl::enableLoadCacheBypass(
623 const MachineBasicBlock::iterator &MI,
624 SIAtomicScope Scope,
625 SIAtomicAddrSpace AddrSpace) const {
626 assert(MI->mayLoad() && !MI->mayStore());
627 bool Changed = false;
628
629 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
630 /// TODO: Do not set glc for rmw atomic operations as they
631 /// implicitly bypass the L1 cache.
632
633 switch (Scope) {
634 case SIAtomicScope::SYSTEM:
635 case SIAtomicScope::AGENT:
636 Changed |= enableGLCBit(MI);
637 break;
638 case SIAtomicScope::WORKGROUP:
639 case SIAtomicScope::WAVEFRONT:
640 case SIAtomicScope::SINGLETHREAD:
641 // No cache to bypass.
642 break;
643 default:
644 llvm_unreachable("Unsupported synchronization scope");
645 }
646 }
647
648 /// The scratch address space does not need the global memory caches
649 /// to be bypassed as all memory operations by the same thread are
650 /// sequentially consistent, and no other thread can access scratch
651 /// memory.
652
653 /// Other address spaces do not hava a cache.
654
655 return Changed;
656}
657
658bool SIGfx6CacheControl::enableNonTemporal(
659 const MachineBasicBlock::iterator &MI) const {
660 assert(MI->mayLoad() ^ MI->mayStore());
661 bool Changed = false;
662
663 /// TODO: Do not enableGLCBit if rmw atomic.
664 Changed |= enableGLCBit(MI);
665 Changed |= enableSLCBit(MI);
666
667 return Changed;
668}
669
670bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
671 SIAtomicScope Scope,
672 SIAtomicAddrSpace AddrSpace,
673 Position Pos) const {
674 bool Changed = false;
675
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000676 MachineBasicBlock &MBB = *MI->getParent();
677 DebugLoc DL = MI->getDebugLoc();
678
Tony Tyea5a7c332018-06-07 22:28:32 +0000679 if (Pos == Position::AFTER)
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000680 ++MI;
681
Tony Tyea5a7c332018-06-07 22:28:32 +0000682 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
683 switch (Scope) {
684 case SIAtomicScope::SYSTEM:
685 case SIAtomicScope::AGENT:
686 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
687 Changed = true;
688 break;
689 case SIAtomicScope::WORKGROUP:
690 case SIAtomicScope::WAVEFRONT:
691 case SIAtomicScope::SINGLETHREAD:
692 // No cache to invalidate.
693 break;
694 default:
695 llvm_unreachable("Unsupported synchronization scope");
696 }
697 }
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000698
Tony Tyea5a7c332018-06-07 22:28:32 +0000699 /// The scratch address space does not need the global memory cache
700 /// to be flushed as all memory operations by the same thread are
701 /// sequentially consistent, and no other thread can access scratch
702 /// memory.
703
704 /// Other address spaces do not hava a cache.
705
706 if (Pos == Position::AFTER)
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000707 --MI;
708
Tony Tyea5a7c332018-06-07 22:28:32 +0000709 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000710}
711
Tony Tyea5a7c332018-06-07 22:28:32 +0000712bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
713 SIAtomicScope Scope,
714 SIAtomicAddrSpace AddrSpace,
715 SIMemOp Op,
716 bool IsCrossAddrSpaceOrdering,
717 Position Pos) const {
718 bool Changed = false;
719
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000720 MachineBasicBlock &MBB = *MI->getParent();
721 DebugLoc DL = MI->getDebugLoc();
722
Tony Tyea5a7c332018-06-07 22:28:32 +0000723 if (Pos == Position::AFTER)
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000724 ++MI;
725
Tony Tyea5a7c332018-06-07 22:28:32 +0000726 bool VMCnt = false;
727 bool LGKMCnt = false;
728 bool EXPCnt = false;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000729
Tony Tyea5a7c332018-06-07 22:28:32 +0000730 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
731 switch (Scope) {
732 case SIAtomicScope::SYSTEM:
733 case SIAtomicScope::AGENT:
734 VMCnt = true;
735 break;
736 case SIAtomicScope::WORKGROUP:
737 case SIAtomicScope::WAVEFRONT:
738 case SIAtomicScope::SINGLETHREAD:
739 // The L1 cache keeps all memory operations in order for
Mark Searles72da47d2018-07-16 10:02:41 +0000740 // wavefronts in the same work-group.
Tony Tyea5a7c332018-06-07 22:28:32 +0000741 break;
742 default:
743 llvm_unreachable("Unsupported synchronization scope");
744 }
745 }
746
747 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
748 switch (Scope) {
749 case SIAtomicScope::SYSTEM:
750 case SIAtomicScope::AGENT:
751 case SIAtomicScope::WORKGROUP:
752 // If no cross address space ordering then an LDS waitcnt is not
753 // needed as LDS operations for all waves are executed in a
754 // total global ordering as observed by all waves. Required if
755 // also synchronizing with global/GDS memory as LDS operations
756 // could be reordered with respect to later global/GDS memory
757 // operations of the same wave.
758 LGKMCnt = IsCrossAddrSpaceOrdering;
759 break;
760 case SIAtomicScope::WAVEFRONT:
761 case SIAtomicScope::SINGLETHREAD:
762 // The LDS keeps all memory operations in order for
763 // the same wavesfront.
764 break;
765 default:
766 llvm_unreachable("Unsupported synchronization scope");
767 }
768 }
769
770 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
771 switch (Scope) {
772 case SIAtomicScope::SYSTEM:
773 case SIAtomicScope::AGENT:
774 // If no cross address space ordering then an GDS waitcnt is not
775 // needed as GDS operations for all waves are executed in a
776 // total global ordering as observed by all waves. Required if
777 // also synchronizing with global/LDS memory as GDS operations
778 // could be reordered with respect to later global/LDS memory
779 // operations of the same wave.
780 EXPCnt = IsCrossAddrSpaceOrdering;
781 break;
782 case SIAtomicScope::WORKGROUP:
783 case SIAtomicScope::WAVEFRONT:
784 case SIAtomicScope::SINGLETHREAD:
785 // The GDS keeps all memory operations in order for
786 // the same work-group.
787 break;
788 default:
789 llvm_unreachable("Unsupported synchronization scope");
790 }
791 }
792
793 if (VMCnt || LGKMCnt || EXPCnt) {
794 unsigned WaitCntImmediate =
795 AMDGPU::encodeWaitcnt(IV,
796 VMCnt ? 0 : getVmcntBitMask(IV),
797 EXPCnt ? 0 : getExpcntBitMask(IV),
798 LGKMCnt ? 0 : getLgkmcntBitMask(IV));
799 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
800 Changed = true;
801 }
802
803 if (Pos == Position::AFTER)
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000804 --MI;
805
Tony Tyea5a7c332018-06-07 22:28:32 +0000806 return Changed;
807}
808
809bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
810 SIAtomicScope Scope,
811 SIAtomicAddrSpace AddrSpace,
812 Position Pos) const {
813 bool Changed = false;
814
815 MachineBasicBlock &MBB = *MI->getParent();
816 DebugLoc DL = MI->getDebugLoc();
817
818 if (Pos == Position::AFTER)
819 ++MI;
820
821 if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
822 switch (Scope) {
823 case SIAtomicScope::SYSTEM:
824 case SIAtomicScope::AGENT:
825 BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
826 Changed = true;
827 break;
828 case SIAtomicScope::WORKGROUP:
829 case SIAtomicScope::WAVEFRONT:
830 case SIAtomicScope::SINGLETHREAD:
831 // No cache to invalidate.
832 break;
833 default:
834 llvm_unreachable("Unsupported synchronization scope");
835 }
836 }
837
838 /// The scratch address space does not need the global memory cache
839 /// to be flushed as all memory operations by the same thread are
840 /// sequentially consistent, and no other thread can access scratch
841 /// memory.
842
843 /// Other address spaces do not hava a cache.
844
845 if (Pos == Position::AFTER)
846 --MI;
847
848 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000849}
850
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000851bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
852 if (AtomicPseudoMIs.empty())
853 return false;
854
855 for (auto &MI : AtomicPseudoMIs)
856 MI->eraseFromParent();
857
858 AtomicPseudoMIs.clear();
859 return true;
860}
861
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000862bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
Konstantin Zhuravlyovf5d826a2017-08-18 17:30:02 +0000863 MachineBasicBlock::iterator &MI) {
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000864 assert(MI->mayLoad() && !MI->mayStore());
865
866 bool Changed = false;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000867
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000868 if (MOI.isAtomic()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000869 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
870 MOI.getOrdering() == AtomicOrdering::Acquire ||
871 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
872 Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
873 MOI.getOrderingAddrSpace());
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000874 }
875
Tony Tyea5a7c332018-06-07 22:28:32 +0000876 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
877 Changed |= CC->insertWait(MI, MOI.getScope(),
878 MOI.getOrderingAddrSpace(),
879 SIMemOp::LOAD | SIMemOp::STORE,
880 MOI.getIsCrossAddressSpaceOrdering(),
881 Position::BEFORE);
882
883 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
884 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
885 Changed |= CC->insertWait(MI, MOI.getScope(),
886 MOI.getInstrAddrSpace(),
887 SIMemOp::LOAD,
888 MOI.getIsCrossAddressSpaceOrdering(),
889 Position::AFTER);
890 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
891 MOI.getOrderingAddrSpace(),
892 Position::AFTER);
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000893 }
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +0000894
Tony Tyea5a7c332018-06-07 22:28:32 +0000895 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000896 }
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000897
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000898 // Atomic instructions do not have the nontemporal attribute.
899 if (MOI.isNonTemporal()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000900 Changed |= CC->enableNonTemporal(MI);
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000901 return Changed;
902 }
903
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000904 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000905}
906
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000907bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
Konstantin Zhuravlyovf5d826a2017-08-18 17:30:02 +0000908 MachineBasicBlock::iterator &MI) {
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000909 assert(!MI->mayLoad() && MI->mayStore());
910
911 bool Changed = false;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000912
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000913 if (MOI.isAtomic()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000914 if (MOI.getOrdering() == AtomicOrdering::Release ||
915 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
916 Changed |= CC->insertWait(MI, MOI.getScope(),
917 MOI.getOrderingAddrSpace(),
918 SIMemOp::LOAD | SIMemOp::STORE,
919 MOI.getIsCrossAddressSpaceOrdering(),
920 Position::BEFORE);
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000921
Tony Tyea5a7c332018-06-07 22:28:32 +0000922 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000923 }
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000924
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000925 // Atomic instructions do not have the nontemporal attribute.
926 if (MOI.isNonTemporal()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000927 Changed |= CC->enableNonTemporal(MI);
Konstantin Zhuravlyov5f5b5862017-09-07 17:14:54 +0000928 return Changed;
929 }
930
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000931 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000932}
933
Konstantin Zhuravlyov844845a2017-09-05 16:18:05 +0000934bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000935 MachineBasicBlock::iterator &MI) {
936 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
937
Tony Tyea5a7c332018-06-07 22:28:32 +0000938 AtomicPseudoMIs.push_back(MI);
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000939 bool Changed = false;
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000940
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000941 if (MOI.isAtomic()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000942 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
943 MOI.getOrdering() == AtomicOrdering::Release ||
944 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
945 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
946 /// TODO: This relies on a barrier always generating a waitcnt
947 /// for LDS to ensure it is not reordered with the completion of
948 /// the proceeding LDS operations. If barrier had a memory
949 /// ordering and memory scope, then library does not need to
950 /// generate a fence. Could add support in this file for
951 /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
952 /// adding waitcnt before a S_BARRIER.
953 Changed |= CC->insertWait(MI, MOI.getScope(),
954 MOI.getOrderingAddrSpace(),
955 SIMemOp::LOAD | SIMemOp::STORE,
956 MOI.getIsCrossAddressSpaceOrdering(),
957 Position::BEFORE);
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000958
Tony Tyea5a7c332018-06-07 22:28:32 +0000959 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
960 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
961 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
962 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
963 MOI.getOrderingAddrSpace(),
964 Position::BEFORE);
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000965
Tony Tyea5a7c332018-06-07 22:28:32 +0000966 return Changed;
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000967 }
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000968
969 return Changed;
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +0000970}
971
Stanislav Mekhanoshin9c6cd042018-02-09 06:05:33 +0000972bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
973 MachineBasicBlock::iterator &MI) {
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000974 assert(MI->mayLoad() && MI->mayStore());
975
976 bool Changed = false;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +0000977
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000978 if (MOI.isAtomic()) {
Tony Tyea5a7c332018-06-07 22:28:32 +0000979 if (MOI.getOrdering() == AtomicOrdering::Release ||
980 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
981 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
982 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
983 Changed |= CC->insertWait(MI, MOI.getScope(),
984 MOI.getOrderingAddrSpace(),
985 SIMemOp::LOAD | SIMemOp::STORE,
986 MOI.getIsCrossAddressSpaceOrdering(),
987 Position::BEFORE);
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +0000988
Tony Tyea5a7c332018-06-07 22:28:32 +0000989 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
990 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
991 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
992 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
993 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
994 Changed |= CC->insertWait(MI, MOI.getScope(),
995 MOI.getOrderingAddrSpace(),
996 isAtomicRet(*MI) ? SIMemOp::LOAD :
997 SIMemOp::STORE,
998 MOI.getIsCrossAddressSpaceOrdering(),
999 Position::AFTER);
1000 Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
1001 MOI.getOrderingAddrSpace(),
1002 Position::AFTER);
Konstantin Zhuravlyovc8c9d4a2017-09-07 16:14:21 +00001003 }
1004
Tony Tyea5a7c332018-06-07 22:28:32 +00001005 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001006 }
Konstantin Zhuravlyov80528702017-09-05 19:01:10 +00001007
1008 return Changed;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001009}
1010
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001011bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
1012 bool Changed = false;
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001013
Tony Tyea5a7c332018-06-07 22:28:32 +00001014 SIMemOpAccess MOA(MF);
Tom Stellard5bfbae52018-07-11 20:59:01 +00001015 CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001016
1017 for (auto &MBB : MF) {
1018 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
1019 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
1020 continue;
1021
Tony Tyea5a7c332018-06-07 22:28:32 +00001022 if (const auto &MOI = MOA.getLoadInfo(MI))
Konstantin Zhuravlyovf5d826a2017-08-18 17:30:02 +00001023 Changed |= expandLoad(MOI.getValue(), MI);
Tony Tyea5a7c332018-06-07 22:28:32 +00001024 else if (const auto &MOI = MOA.getStoreInfo(MI))
Konstantin Zhuravlyovf5d826a2017-08-18 17:30:02 +00001025 Changed |= expandStore(MOI.getValue(), MI);
Tony Tyea5a7c332018-06-07 22:28:32 +00001026 else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
Konstantin Zhuravlyov89377c42017-08-19 18:44:27 +00001027 Changed |= expandAtomicFence(MOI.getValue(), MI);
Tony Tyea5a7c332018-06-07 22:28:32 +00001028 else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
Stanislav Mekhanoshin9c6cd042018-02-09 06:05:33 +00001029 Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
Konstantin Zhuravlyove9a5a772017-07-21 21:19:23 +00001030 }
1031 }
1032
1033 Changed |= removeAtomicPseudoMIs();
1034 return Changed;
1035}
1036
1037INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
1038
1039char SIMemoryLegalizer::ID = 0;
1040char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
1041
1042FunctionPass *llvm::createSIMemoryLegalizerPass() {
1043 return new SIMemoryLegalizer();
1044}