blob: a5bada2890d2c6f2ae047c94bfa557b1400d2f94 [file] [log] [blame]
Eugene Zelenkod96089b2017-02-14 00:33:36 +00001//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
Tom Stellard347ac792015-06-26 21:15:07 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tom Stellard347ac792015-06-26 21:15:07 +00006//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
Yaxun Liu1a14bfa2017-03-27 14:04:01 +000012#include "AMDGPU.h"
Tom Stellard347ac792015-06-26 21:15:07 +000013#include "AMDKernelCodeT.h"
Matt Arsenault4bd72362016-12-10 00:39:12 +000014#include "SIDefines.h"
Eugene Zelenkod96089b2017-02-14 00:33:36 +000015#include "llvm/ADT/StringRef.h"
16#include "llvm/IR/CallingConv.h"
17#include "llvm/MC/MCInstrDesc.h"
Scott Linder1e8c2c72018-06-21 19:38:56 +000018#include "llvm/Support/AMDHSAKernelDescriptor.h"
Eugene Zelenkod96089b2017-02-14 00:33:36 +000019#include "llvm/Support/Compiler.h"
20#include "llvm/Support/ErrorHandling.h"
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000021#include "llvm/Support/TargetParser.h"
Eugene Zelenkod96089b2017-02-14 00:33:36 +000022#include <cstdint>
Konstantin Zhuravlyov9c05b2b2017-10-14 15:40:33 +000023#include <string>
Eugene Zelenkod96089b2017-02-14 00:33:36 +000024#include <utility>
Matt Arsenault4bd72362016-12-10 00:39:12 +000025
Tom Stellard347ac792015-06-26 21:15:07 +000026namespace llvm {
27
Matt Arsenault894e53d2017-07-26 20:39:42 +000028class Argument;
Tim Renouf4f703f52018-08-21 11:07:10 +000029class AMDGPUSubtarget;
Tom Stellard347ac792015-06-26 21:15:07 +000030class FeatureBitset;
Tom Stellardac00eb52015-12-15 16:26:16 +000031class Function;
Tim Renouf4f703f52018-08-21 11:07:10 +000032class GCNSubtarget;
Tom Stellarde3b5aea2015-12-02 17:00:42 +000033class GlobalValue;
Tom Stellarde135ffd2015-09-25 21:41:28 +000034class MCContext;
Krzysztof Parzyszekc8715502016-10-19 17:40:36 +000035class MCRegisterClass;
Sam Kolton1eeb11b2016-09-09 14:44:04 +000036class MCRegisterInfo;
Tom Stellarde135ffd2015-09-25 21:41:28 +000037class MCSection;
Tom Stellard2b65ed32015-12-21 18:44:27 +000038class MCSubtargetInfo;
Scott Linder1e8c2c72018-06-21 19:38:56 +000039class MachineMemOperand;
Eugene Zelenkod96089b2017-02-14 00:33:36 +000040class Triple;
Tom Stellard347ac792015-06-26 21:15:07 +000041
42namespace AMDGPU {
Nicolai Haehnle0ab200b2018-06-21 13:36:44 +000043
Piotr Sobczak4a801172019-11-20 22:30:02 +010044struct GcnBufferFormatInfo {
45 unsigned Format;
46 unsigned BitsPerComp;
47 unsigned NumComponents;
48 unsigned NumFormat;
49 unsigned DataFormat;
50};
51
Nicolai Haehnle0ab200b2018-06-21 13:36:44 +000052#define GET_MIMGBaseOpcode_DECL
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +000053#define GET_MIMGDim_DECL
Nicolai Haehnle0ab200b2018-06-21 13:36:44 +000054#define GET_MIMGEncoding_DECL
Ryan Taylor894c8fd2018-08-01 12:12:01 +000055#define GET_MIMGLZMapping_DECL
Piotr Sobczak9b11e932019-06-10 15:58:51 +000056#define GET_MIMGMIPMapping_DECL
Nicolai Haehnle0ab200b2018-06-21 13:36:44 +000057#include "AMDGPUGenSearchableTables.inc"
58
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000059namespace IsaInfo {
Sam Koltona3ec5c12016-10-07 14:46:06 +000060
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000061enum {
62 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
63 // doesn't spill SGPRs as much as when 80 is set.
Konstantin Zhuravlyovc72ece62018-05-16 20:47:48 +000064 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
65 TRAP_NUM_SGPRS = 16
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000066};
67
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000068/// Streams isa version string for given subtarget \p STI into \p Stream.
Konstantin Zhuravlyov9c05b2b2017-10-14 15:40:33 +000069void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
70
Konstantin Zhuravlyov00f2cb12018-06-12 18:02:46 +000071/// \returns True if given subtarget \p STI supports code object version 3,
Konstantin Zhuravlyoveda425e2017-10-14 15:59:07 +000072/// false otherwise.
Konstantin Zhuravlyov00f2cb12018-06-12 18:02:46 +000073bool hasCodeObjectV3(const MCSubtargetInfo *STI);
Konstantin Zhuravlyoveda425e2017-10-14 15:59:07 +000074
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000075/// \returns Wavefront size for given subtarget \p STI.
76unsigned getWavefrontSize(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000077
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000078/// \returns Local memory size in bytes for given subtarget \p STI.
79unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000080
81/// \returns Number of execution units per compute unit for given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000082/// STI.
83unsigned getEUsPerCU(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000084
85/// \returns Maximum number of work groups per compute unit for given subtarget
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000086/// \p STI and limited by given \p FlatWorkGroupSize.
87unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000088 unsigned FlatWorkGroupSize);
89
90/// \returns Maximum number of waves per compute unit for given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000091/// STI without any kind of limitation.
92unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000093
94/// \returns Maximum number of waves per compute unit for given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +000095/// STI and limited by given \p FlatWorkGroupSize.
96unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +000097 unsigned FlatWorkGroupSize);
98
99/// \returns Minimum number of waves per execution unit for given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000100/// STI.
101unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000102
103/// \returns Maximum number of waves per execution unit for given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000104/// STI without any kind of limitation.
Stanislav Mekhanoshin7b5a54e2019-07-19 21:29:51 +0000105unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000106
107/// \returns Maximum number of waves per execution unit for given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000108/// STI and limited by given \p FlatWorkGroupSize.
109unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000110 unsigned FlatWorkGroupSize);
111
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000112/// \returns Minimum flat work group size for given subtarget \p STI.
113unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000114
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000115/// \returns Maximum flat work group size for given subtarget \p STI.
116unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000117
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000118/// \returns Number of waves per work group for given subtarget \p STI and
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000119/// limited by given \p FlatWorkGroupSize.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000120unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000121 unsigned FlatWorkGroupSize);
122
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000123/// \returns SGPR allocation granularity for given subtarget \p STI.
124unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000125
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000126/// \returns SGPR encoding granularity for given subtarget \p STI.
127unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000128
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000129/// \returns Total number of SGPRs for given subtarget \p STI.
130unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000131
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000132/// \returns Addressable number of SGPRs for given subtarget \p STI.
133unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000134
135/// \returns Minimum number of SGPRs that meets the given number of waves per
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000136/// execution unit requirement for given subtarget \p STI.
137unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000138
139/// \returns Maximum number of SGPRs that meets the given number of waves per
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000140/// execution unit requirement for given subtarget \p STI.
141unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000142 bool Addressable);
143
Scott Linder1e8c2c72018-06-21 19:38:56 +0000144/// \returns Number of extra SGPRs implicitly required by given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000145/// STI when the given special registers are used.
146unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
Scott Linder1e8c2c72018-06-21 19:38:56 +0000147 bool FlatScrUsed, bool XNACKUsed);
148
149/// \returns Number of extra SGPRs implicitly required by given subtarget \p
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000150/// STI when the given special registers are used. XNACK is inferred from
151/// \p STI.
152unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
Scott Linder1e8c2c72018-06-21 19:38:56 +0000153 bool FlatScrUsed);
154
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000155/// \returns Number of SGPR blocks needed for given subtarget \p STI when
Scott Linder1e8c2c72018-06-21 19:38:56 +0000156/// \p NumSGPRs are used. \p NumSGPRs should already include any special
157/// register counts.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000158unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
Scott Linder1e8c2c72018-06-21 19:38:56 +0000159
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000160/// \returns VGPR allocation granularity for given subtarget \p STI.
Stanislav Mekhanoshin8bcc9bb2019-06-13 19:18:29 +0000161///
162/// For subtargets which support it, \p EnableWavefrontSize32 should match
163/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
164unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
165 Optional<bool> EnableWavefrontSize32 = None);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000166
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000167/// \returns VGPR encoding granularity for given subtarget \p STI.
Stanislav Mekhanoshin8bcc9bb2019-06-13 19:18:29 +0000168///
169/// For subtargets which support it, \p EnableWavefrontSize32 should match
170/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
171unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
172 Optional<bool> EnableWavefrontSize32 = None);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000173
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000174/// \returns Total number of VGPRs for given subtarget \p STI.
175unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000176
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000177/// \returns Addressable number of VGPRs for given subtarget \p STI.
178unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000179
180/// \returns Minimum number of VGPRs that meets given number of waves per
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000181/// execution unit requirement for given subtarget \p STI.
182unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000183
184/// \returns Maximum number of VGPRs that meets given number of waves per
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000185/// execution unit requirement for given subtarget \p STI.
186unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000187
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000188/// \returns Number of VGPR blocks needed for given subtarget \p STI when
Scott Linder1e8c2c72018-06-21 19:38:56 +0000189/// \p NumVGPRs are used.
Stanislav Mekhanoshin8bcc9bb2019-06-13 19:18:29 +0000190///
191/// For subtargets which support it, \p EnableWavefrontSize32 should match the
192/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
193unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
194 Optional<bool> EnableWavefrontSize32 = None);
Scott Linder1e8c2c72018-06-21 19:38:56 +0000195
Eugene Zelenkod96089b2017-02-14 00:33:36 +0000196} // end namespace IsaInfo
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000197
198LLVM_READONLY
199int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
200
Ryan Taylor9ab812d2019-06-26 17:34:57 +0000201LLVM_READONLY
202int getSOPPWithRelaxation(uint16_t Opcode);
203
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +0000204struct MIMGBaseOpcodeInfo {
205 MIMGBaseOpcode BaseOpcode;
206 bool Store;
207 bool Atomic;
208 bool AtomicX2;
209 bool Sampler;
David Stuttardf77079f2019-01-14 11:55:24 +0000210 bool Gather4;
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +0000211
212 uint8_t NumExtraArgs;
213 bool Gradients;
214 bool Coordinates;
215 bool LodOrClampOrMip;
216 bool HasD16;
217};
218
219LLVM_READONLY
220const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
221
222struct MIMGDimInfo {
223 MIMGDim Dim;
224 uint8_t NumCoords;
225 uint8_t NumGradients;
226 bool DA;
Stanislav Mekhanoshin692560d2019-05-01 16:32:58 +0000227 uint8_t Encoding;
228 const char *AsmSuffix;
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +0000229};
230
231LLVM_READONLY
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000232const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
233
234LLVM_READONLY
235const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
236
237LLVM_READONLY
238const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +0000239
Ryan Taylor894c8fd2018-08-01 12:12:01 +0000240struct MIMGLZMappingInfo {
241 MIMGBaseOpcode L;
242 MIMGBaseOpcode LZ;
243};
244
Piotr Sobczak9b11e932019-06-10 15:58:51 +0000245struct MIMGMIPMappingInfo {
246 MIMGBaseOpcode MIP;
247 MIMGBaseOpcode NONMIP;
248};
249
Ryan Taylor894c8fd2018-08-01 12:12:01 +0000250LLVM_READONLY
251const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
252
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +0000253LLVM_READONLY
Piotr Sobczak9b11e932019-06-10 15:58:51 +0000254const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
255
256LLVM_READONLY
Nicolai Haehnle7a9c03f2018-06-21 13:36:57 +0000257int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
258 unsigned VDataDwords, unsigned VAddrDwords);
259
Matt Arsenaultcad7fa82017-12-13 21:07:51 +0000260LLVM_READONLY
Nicolai Haehnle0ab200b2018-06-21 13:36:44 +0000261int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
Nicolai Haehnlef2674312018-06-21 13:36:01 +0000262
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000263struct MIMGInfo {
264 uint16_t Opcode;
265 uint16_t BaseOpcode;
266 uint8_t MIMGEncoding;
267 uint8_t VDataDwords;
268 uint8_t VAddrDwords;
269};
270
271LLVM_READONLY
272const MIMGInfo *getMIMGInfo(unsigned Opc);
273
Nicolai Haehnlef2674312018-06-21 13:36:01 +0000274LLVM_READONLY
Piotr Sobczak265e94e2019-10-02 17:22:36 +0000275int getMTBUFBaseOpcode(unsigned Opc);
276
277LLVM_READONLY
278int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
279
280LLVM_READONLY
281int getMTBUFElements(unsigned Opc);
282
283LLVM_READONLY
284bool getMTBUFHasVAddr(unsigned Opc);
285
286LLVM_READONLY
287bool getMTBUFHasSrsrc(unsigned Opc);
288
289LLVM_READONLY
290bool getMTBUFHasSoffset(unsigned Opc);
291
292LLVM_READONLY
Neil Henning76504a42018-12-12 16:15:21 +0000293int getMUBUFBaseOpcode(unsigned Opc);
294
295LLVM_READONLY
Matt Arsenaultcfdc2b92019-08-18 00:20:43 +0000296int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
Neil Henning76504a42018-12-12 16:15:21 +0000297
298LLVM_READONLY
Matt Arsenaultcfdc2b92019-08-18 00:20:43 +0000299int getMUBUFElements(unsigned Opc);
Neil Henning76504a42018-12-12 16:15:21 +0000300
301LLVM_READONLY
302bool getMUBUFHasVAddr(unsigned Opc);
303
304LLVM_READONLY
305bool getMUBUFHasSrsrc(unsigned Opc);
306
307LLVM_READONLY
308bool getMUBUFHasSoffset(unsigned Opc);
309
310LLVM_READONLY
Piotr Sobczak4a801172019-11-20 22:30:02 +0100311const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
312 uint8_t NumComponents,
313 uint8_t NumFormat,
314 const MCSubtargetInfo &STI);
315LLVM_READONLY
316const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
317 const MCSubtargetInfo &STI);
318
319LLVM_READONLY
Matt Arsenaultcad7fa82017-12-13 21:07:51 +0000320int getMCOpcode(uint16_t Opcode, unsigned Gen);
321
Tom Stellardff7416b2015-06-26 21:58:31 +0000322void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000323 const MCSubtargetInfo *STI);
Tom Stellard9760f032015-12-03 03:34:32 +0000324
Stanislav Mekhanoshincee607e2019-04-24 17:03:15 +0000325amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
326 const MCSubtargetInfo *STI);
Scott Linder1e8c2c72018-06-21 19:38:56 +0000327
Konstantin Zhuravlyov435151a2017-11-01 19:12:38 +0000328bool isGroupSegment(const GlobalValue *GV);
329bool isGlobalSegment(const GlobalValue *GV);
330bool isReadOnlySegment(const GlobalValue *GV);
Tom Stellarde3b5aea2015-12-02 17:00:42 +0000331
Konstantin Zhuravlyov08326b62016-10-20 18:12:38 +0000332/// \returns True if constants should be emitted to .text section for given
333/// target triple \p TT, false otherwise.
334bool shouldEmitConstantsToTextSection(const Triple &TT);
335
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000336/// \returns Integer value requested using \p F's \p Name attribute.
337///
338/// \returns \p Default if attribute is not present.
339///
340/// \returns \p Default and emits error if requested value cannot be converted
341/// to integer.
Matt Arsenault83002722016-05-12 02:45:18 +0000342int getIntegerAttribute(const Function &F, StringRef Name, int Default);
343
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000344/// \returns A pair of integer values requested using \p F's \p Name attribute
345/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
346/// is false).
347///
348/// \returns \p Default if attribute is not present.
349///
350/// \returns \p Default and emits error if one of the requested values cannot be
351/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
352/// not present.
353std::pair<int, int> getIntegerPairAttribute(const Function &F,
354 StringRef Name,
355 std::pair<int, int> Default,
356 bool OnlyFirstRequired = false);
357
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000358/// Represents the counter values to wait for in an s_waitcnt instruction.
359///
360/// Large values (including the maximum possible integer) can be used to
361/// represent "don't care" waits.
362struct Waitcnt {
363 unsigned VmCnt = ~0u;
364 unsigned ExpCnt = ~0u;
365 unsigned LgkmCnt = ~0u;
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000366 unsigned VsCnt = ~0u;
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000367
368 Waitcnt() {}
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000369 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
370 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000371
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000372 static Waitcnt allZero(const IsaVersion &Version) {
373 return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
374 }
375 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
376
377 bool hasWait() const {
378 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
379 }
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000380
381 bool dominates(const Waitcnt &Other) const {
382 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000383 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000384 }
385
386 Waitcnt combined(const Waitcnt &Other) const {
387 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000388 std::min(LgkmCnt, Other.LgkmCnt),
389 std::min(VsCnt, Other.VsCnt));
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000390 }
391};
392
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000393/// \returns Vmcnt bit mask for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000394unsigned getVmcntBitMask(const IsaVersion &Version);
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +0000395
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000396/// \returns Expcnt bit mask for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000397unsigned getExpcntBitMask(const IsaVersion &Version);
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +0000398
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000399/// \returns Lgkmcnt bit mask for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000400unsigned getLgkmcntBitMask(const IsaVersion &Version);
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000401
402/// \returns Waitcnt bit mask for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000403unsigned getWaitcntBitMask(const IsaVersion &Version);
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +0000404
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000405/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000406unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +0000407
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000408/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000409unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000410
411/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000412unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000413
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000414/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000415/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
416/// \p Lgkmcnt respectively.
417///
418/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000419/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
420/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000421/// \p Expcnt = \p Waitcnt[6:4]
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000422/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
423/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000424void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000425 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
426
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000427Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
428
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000429/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000430unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000431 unsigned Vmcnt);
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000432
433/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000434unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000435 unsigned Expcnt);
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000436
437/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000438unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
Konstantin Zhuravlyov9f89ede2017-02-08 14:05:23 +0000439 unsigned Lgkmcnt);
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000440
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000441/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000442/// \p Version.
443///
444/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
Matt Arsenaulte823d922017-02-18 18:29:53 +0000445/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
446/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
447/// Waitcnt[6:4] = \p Expcnt
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000448/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
449/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
Matt Arsenaulte823d922017-02-18 18:29:53 +0000450/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000451///
452/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
453/// isa \p Version.
Konstantin Zhuravlyov71e43ee2018-09-12 18:50:47 +0000454unsigned encodeWaitcnt(const IsaVersion &Version,
Konstantin Zhuravlyovcdd45472016-10-11 18:58:22 +0000455 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
Konstantin Zhuravlyov836cbff2016-09-30 17:01:40 +0000456
Nicolai Haehnle1a94cbb2018-11-29 11:06:06 +0000457unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
458
Dmitry Preobrazhensky1fca3b12019-06-13 12:46:37 +0000459namespace Hwreg {
460
461LLVM_READONLY
462int64_t getHwregId(const StringRef Name);
463
464LLVM_READNONE
465bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
466
467LLVM_READNONE
468bool isValidHwreg(int64_t Id);
469
470LLVM_READNONE
471bool isValidHwregOffset(int64_t Offset);
472
473LLVM_READNONE
474bool isValidHwregWidth(int64_t Width);
475
476LLVM_READNONE
Dmitry Preobrazhensky2eff0312019-07-08 14:27:37 +0000477uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
Dmitry Preobrazhensky1fca3b12019-06-13 12:46:37 +0000478
479LLVM_READNONE
480StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
481
482void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
483
484} // namespace Hwreg
485
Dmitry Preobrazhensky1d572ce2019-06-28 14:14:02 +0000486namespace SendMsg {
487
488LLVM_READONLY
489int64_t getMsgId(const StringRef Name);
490
491LLVM_READONLY
492int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
493
494LLVM_READNONE
495StringRef getMsgName(int64_t MsgId);
496
497LLVM_READNONE
498StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
499
500LLVM_READNONE
501bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
502
503LLVM_READNONE
504bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
505
506LLVM_READNONE
507bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
508
509LLVM_READNONE
510bool msgRequiresOp(int64_t MsgId);
511
512LLVM_READNONE
513bool msgSupportsStream(int64_t MsgId, int64_t OpId);
514
515void decodeMsg(unsigned Val,
516 uint16_t &MsgId,
517 uint16_t &OpId,
518 uint16_t &StreamId);
519
520LLVM_READNONE
Dmitry Preobrazhenskye1eb25f2019-06-28 16:28:46 +0000521uint64_t encodeMsg(uint64_t MsgId,
522 uint64_t OpId,
523 uint64_t StreamId);
Dmitry Preobrazhensky1d572ce2019-06-28 14:14:02 +0000524
525} // namespace SendMsg
526
527
Marek Olsakfccabaf2016-01-13 11:45:36 +0000528unsigned getInitialPSInputAddr(const Function &F);
529
Matt Arsenaulte622dc32017-04-11 22:29:24 +0000530LLVM_READNONE
531bool isShader(CallingConv::ID CC);
532
533LLVM_READNONE
534bool isCompute(CallingConv::ID CC);
535
536LLVM_READNONE
537bool isEntryFunctionCC(CallingConv::ID CC);
538
Matt Arsenaultefa9f4b2017-04-11 22:29:28 +0000539// FIXME: Remove this when calling conventions cleaned up
540LLVM_READNONE
541inline bool isKernel(CallingConv::ID CC) {
542 switch (CC) {
Matt Arsenaultefa9f4b2017-04-11 22:29:28 +0000543 case CallingConv::AMDGPU_KERNEL:
544 case CallingConv::SPIR_KERNEL:
545 return true;
546 default:
547 return false;
548 }
549}
Tom Stellardac00eb52015-12-15 16:26:16 +0000550
Dmitry Preobrazhensky3afbd822018-01-10 14:22:19 +0000551bool hasXNACK(const MCSubtargetInfo &STI);
Konstantin Zhuravlyov108927b2018-11-05 22:44:19 +0000552bool hasSRAMECC(const MCSubtargetInfo &STI);
Dmitry Preobrazhenskye3271ae2018-02-05 12:45:43 +0000553bool hasMIMG_R128(const MCSubtargetInfo &STI);
Dmitry Preobrazhensky0a1ff462018-02-05 14:18:53 +0000554bool hasPackedD16(const MCSubtargetInfo &STI);
Dmitry Preobrazhenskye3271ae2018-02-05 12:45:43 +0000555
Tom Stellard2b65ed32015-12-21 18:44:27 +0000556bool isSI(const MCSubtargetInfo &STI);
557bool isCI(const MCSubtargetInfo &STI);
558bool isVI(const MCSubtargetInfo &STI);
Sam Koltonf7659d712017-05-23 10:08:55 +0000559bool isGFX9(const MCSubtargetInfo &STI);
Stanislav Mekhanoshincee607e2019-04-24 17:03:15 +0000560bool isGFX10(const MCSubtargetInfo &STI);
Sam Koltonf7659d712017-05-23 10:08:55 +0000561
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000562/// Is Reg - scalar register
Sam Koltonf7659d712017-05-23 10:08:55 +0000563bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
Tom Stellard2b65ed32015-12-21 18:44:27 +0000564
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000565/// Is there any intersection between registers
Dmitry Preobrazhenskydc4ac822017-06-21 14:41:34 +0000566bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
567
Tom Stellard2b65ed32015-12-21 18:44:27 +0000568/// If \p Reg is a pseudo reg, return the correct hardware register given
569/// \p STI otherwise return \p Reg.
570unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
571
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000572/// Convert hardware register \p Reg to a pseudo register
Dmitry Preobrazhensky03880f82017-03-03 14:31:06 +0000573LLVM_READNONE
574unsigned mc2PseudoReg(unsigned Reg);
575
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000576/// Can this operand also contain immediate values?
Sam Kolton1eeb11b2016-09-09 14:44:04 +0000577bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
578
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000579/// Is this floating-point operand?
Sam Kolton1eeb11b2016-09-09 14:44:04 +0000580bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
581
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000582/// Does this opearnd support only inlinable literals?
Sam Kolton1eeb11b2016-09-09 14:44:04 +0000583bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
584
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000585/// Get the size in bits of a register from the register class \p RC.
Tom Stellardb133fbb2016-10-27 23:05:31 +0000586unsigned getRegBitWidth(unsigned RCID);
587
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000588/// Get the size in bits of a register from the register class \p RC.
Krzysztof Parzyszekc8715502016-10-19 17:40:36 +0000589unsigned getRegBitWidth(const MCRegisterClass &RC);
590
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000591/// Get size of register operand
Sam Kolton1eeb11b2016-09-09 14:44:04 +0000592unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
593 unsigned OpNo);
594
Matt Arsenault4bd72362016-12-10 00:39:12 +0000595LLVM_READNONE
596inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
597 switch (OpInfo.OperandType) {
598 case AMDGPU::OPERAND_REG_IMM_INT32:
599 case AMDGPU::OPERAND_REG_IMM_FP32:
600 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
601 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
Stanislav Mekhanoshin50d7f4642019-07-09 21:43:09 +0000602 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
603 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
Matt Arsenault4bd72362016-12-10 00:39:12 +0000604 return 4;
605
606 case AMDGPU::OPERAND_REG_IMM_INT64:
607 case AMDGPU::OPERAND_REG_IMM_FP64:
608 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
609 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
610 return 8;
611
612 case AMDGPU::OPERAND_REG_IMM_INT16:
613 case AMDGPU::OPERAND_REG_IMM_FP16:
614 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
615 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
Matt Arsenault9be7b0d2017-02-27 18:49:11 +0000616 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
617 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
Stanislav Mekhanoshin50d7f4642019-07-09 21:43:09 +0000618 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
619 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
620 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
621 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
Stanislav Mekhanoshin956b0be2019-04-25 18:53:41 +0000622 case AMDGPU::OPERAND_REG_IMM_V2INT16:
623 case AMDGPU::OPERAND_REG_IMM_V2FP16:
Matt Arsenault4bd72362016-12-10 00:39:12 +0000624 return 2;
625
626 default:
627 llvm_unreachable("unhandled operand type");
628 }
629}
630
631LLVM_READNONE
632inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
633 return getOperandSize(Desc.OpInfo[OpNo]);
634}
635
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000636/// Is this literal inlinable
Matt Arsenault26faed32016-12-05 22:26:17 +0000637LLVM_READNONE
638bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
639
640LLVM_READNONE
641bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
642
Matt Arsenault4bd72362016-12-10 00:39:12 +0000643LLVM_READNONE
644bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
Sam Kolton1eeb11b2016-09-09 14:44:04 +0000645
Matt Arsenault9be7b0d2017-02-27 18:49:11 +0000646LLVM_READNONE
647bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
648
Matt Arsenault894e53d2017-07-26 20:39:42 +0000649bool isArgPassedInSGPR(const Argument *Arg);
Tom Stellard08efb7e2017-01-27 18:41:14 +0000650
651/// \returns The encoding that will be used for \p ByteOffset in the SMRD
652/// offset field.
653int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
654
655/// \returns true if this offset is small enough to fit in the SMRD
656/// offset field. \p ByteOffset should be the offset in bytes and
657/// not the encoded offset.
658bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
659
Tim Renouf4f703f52018-08-21 11:07:10 +0000660bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
Nicolai Haehnlea7b00052018-11-30 22:55:38 +0000661 const GCNSubtarget *Subtarget, uint32_t Align = 4);
Tim Renouf4f703f52018-08-21 11:07:10 +0000662
Alexander Timofeev2e5eece2018-03-05 15:12:21 +0000663/// \returns true if the intrinsic is divergent
664bool isIntrinsicSourceOfDivergence(unsigned IntrID);
665
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000666// Track defaults for fields in the MODE registser.
667struct SIModeRegisterDefaults {
668 /// Floating point opcodes that support exception flag gathering quiet and
669 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
670 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
671 /// quieting.
672 bool IEEE : 1;
673
674 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
675 /// clamp NaN to zero; otherwise, pass NaN through.
676 bool DX10Clamp : 1;
677
Matt Arsenault19e7f8a2019-10-27 23:38:52 -0700678 /// If this is set, neither input or output denormals are flushed for most f32
679 /// instructions.
680 ///
681 /// TODO: Split into separate input and output fields if necessary like the
682 /// control bits really provide?
683 bool FP32Denormals : 1;
684
685 /// If this is set, neither input or output denormals are flushed for both f64
686 /// and f16/v2f16 instructions.
687 bool FP64FP16Denormals : 1;
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000688
689 SIModeRegisterDefaults() :
690 IEEE(true),
Matt Arsenault19e7f8a2019-10-27 23:38:52 -0700691 DX10Clamp(true),
692 FP32Denormals(true),
693 FP64FP16Denormals(true) {}
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000694
Matt Arsenaultdb0ed3e2019-10-31 18:50:30 -0700695 // FIXME: Should not depend on the subtarget
696 SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000697
698 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
Matt Arsenault19e7f8a2019-10-27 23:38:52 -0700699 const bool IsCompute = AMDGPU::isCompute(CC);
700
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000701 SIModeRegisterDefaults Mode;
702 Mode.DX10Clamp = true;
Matt Arsenault19e7f8a2019-10-27 23:38:52 -0700703 Mode.IEEE = IsCompute;
704 Mode.FP32Denormals = false; // FIXME: Should be on by default.
705 Mode.FP64FP16Denormals = true;
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000706 return Mode;
707 }
708
709 bool operator ==(const SIModeRegisterDefaults Other) const {
Matt Arsenault19e7f8a2019-10-27 23:38:52 -0700710 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
711 FP32Denormals == Other.FP32Denormals &&
712 FP64FP16Denormals == Other.FP64FP16Denormals;
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000713 }
714
Matt Arsenaultdb0ed3e2019-10-31 18:50:30 -0700715 /// Returns true if a flag is compatible if it's enabled in the callee, but
716 /// disabled in the caller.
717 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
718 return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
719 }
720
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000721 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
722 // be able to override.
723 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
Matt Arsenaultdb0ed3e2019-10-31 18:50:30 -0700724 if (DX10Clamp != CalleeMode.DX10Clamp)
725 return false;
726 if (IEEE != CalleeMode.IEEE)
727 return false;
728
729 // Allow inlining denormals enabled into denormals flushed functions.
730 return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
731 oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
Matt Arsenault055e4dc2019-03-29 19:14:54 +0000732 }
733};
734
Tom Stellard347ac792015-06-26 21:15:07 +0000735} // end namespace AMDGPU
736} // end namespace llvm
737
Eugene Zelenkod96089b2017-02-14 00:33:36 +0000738#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H