blob: f94aa0fe8cc66547dd03d04fbf3f911189c788f6 [file] [log] [blame]
Erich Keaneebba5922017-07-21 22:37:03 +00001//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements AMDGPU TargetInfo objects.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "clang/Basic/Builtins.h"
16#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "clang/Frontend/CodeGenOptions.h"
20#include "llvm/ADT/StringSwitch.h"
21
22using namespace clang;
23using namespace clang::targets;
24
25namespace clang {
26namespace targets {
27
28// If you edit the description strings, make sure you update
29// getPointerWidthV().
30
31static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
Yaxun Liuf5f45e52018-02-02 16:08:24 +000033 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
Erich Keaneebba5922017-07-21 22:37:03 +000034
35static const char *const DataLayoutStringSIPrivateIsZero =
Matt Arsenaulte7da1362018-02-09 16:58:41 +000036 "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
Erich Keaneebba5922017-07-21 22:37:03 +000037 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
39
40static const char *const DataLayoutStringSIGenericIsZero =
Yaxun Liu651bd732018-02-13 18:01:21 +000041 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
Erich Keaneebba5922017-07-21 22:37:03 +000042 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
43 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
44
Alexander Richardson6d989432017-10-15 18:48:14 +000045static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = {
Erich Keaneebba5922017-07-21 22:37:03 +000046 4, // Default
47 1, // opencl_global
48 3, // opencl_local
Yaxun Liu651bd732018-02-13 18:01:21 +000049 4, // opencl_constant
Yaxun Liub7318e02017-10-13 03:37:48 +000050 0, // opencl_private
Erich Keaneebba5922017-07-21 22:37:03 +000051 4, // opencl_generic
52 1, // cuda_device
Yaxun Liu651bd732018-02-13 18:01:21 +000053 4, // cuda_constant
Erich Keaneebba5922017-07-21 22:37:03 +000054 3 // cuda_shared
55};
56
Alexander Richardson6d989432017-10-15 18:48:14 +000057static const LangASMap AMDGPUGenIsZeroDefIsGenMap = {
Erich Keaneebba5922017-07-21 22:37:03 +000058 0, // Default
59 1, // opencl_global
60 3, // opencl_local
Yaxun Liu651bd732018-02-13 18:01:21 +000061 4, // opencl_constant
Yaxun Liub7318e02017-10-13 03:37:48 +000062 5, // opencl_private
Erich Keaneebba5922017-07-21 22:37:03 +000063 0, // opencl_generic
64 1, // cuda_device
Yaxun Liu651bd732018-02-13 18:01:21 +000065 4, // cuda_constant
Erich Keaneebba5922017-07-21 22:37:03 +000066 3 // cuda_shared
67};
68
Alexander Richardson6d989432017-10-15 18:48:14 +000069static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = {
Erich Keaneebba5922017-07-21 22:37:03 +000070 0, // Default
71 1, // opencl_global
72 3, // opencl_local
Yaxun Liu651bd732018-02-13 18:01:21 +000073 4, // opencl_constant
Yaxun Liub7318e02017-10-13 03:37:48 +000074 0, // opencl_private
Erich Keaneebba5922017-07-21 22:37:03 +000075 4, // opencl_generic
76 1, // cuda_device
Yaxun Liu651bd732018-02-13 18:01:21 +000077 4, // cuda_constant
Erich Keaneebba5922017-07-21 22:37:03 +000078 3 // cuda_shared
79};
80
Alexander Richardson6d989432017-10-15 18:48:14 +000081static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = {
Erich Keaneebba5922017-07-21 22:37:03 +000082 5, // Default
83 1, // opencl_global
84 3, // opencl_local
Yaxun Liu651bd732018-02-13 18:01:21 +000085 4, // opencl_constant
Yaxun Liub7318e02017-10-13 03:37:48 +000086 5, // opencl_private
Erich Keaneebba5922017-07-21 22:37:03 +000087 0, // opencl_generic
88 1, // cuda_device
Yaxun Liu651bd732018-02-13 18:01:21 +000089 4, // cuda_constant
Erich Keaneebba5922017-07-21 22:37:03 +000090 3 // cuda_shared
91};
92} // namespace targets
93} // namespace clang
94
95const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
96#define BUILTIN(ID, TYPE, ATTRS) \
97 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
98#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
99 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
100#include "clang/Basic/BuiltinsAMDGPU.def"
101};
102
103const char *const AMDGPUTargetInfo::GCCRegNames[] = {
104 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
105 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
106 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
107 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
108 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
109 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
110 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
111 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
112 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
113 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
114 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
115 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
116 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
117 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
118 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
119 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
120 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
121 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
122 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
123 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
124 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
125 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
126 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
127 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
128 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
129 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
130 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
131 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
132 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
133 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
134 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
135 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
136 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
137 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
138 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
139 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
140 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
141 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
142 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
143 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
144 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
145 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
146 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
Matt Arsenaulte7da1362018-02-09 16:58:41 +0000147 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
Erich Keaneebba5922017-07-21 22:37:03 +0000148 "flat_scratch_lo", "flat_scratch_hi"
149};
150
151ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
152 return llvm::makeArrayRef(GCCRegNames);
153}
154
155bool AMDGPUTargetInfo::initFeatureMap(
156 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
157 const std::vector<std::string> &FeatureVec) const {
158
159 // XXX - What does the member GPU mean if device name string passed here?
160 if (getTriple().getArch() == llvm::Triple::amdgcn) {
161 if (CPU.empty())
162 CPU = "tahiti";
163
164 switch (parseAMDGCNName(CPU)) {
165 case GK_GFX6:
166 case GK_GFX7:
167 break;
168
169 case GK_GFX9:
170 Features["gfx9-insts"] = true;
171 LLVM_FALLTHROUGH;
172 case GK_GFX8:
173 Features["s-memrealtime"] = true;
174 Features["16-bit-insts"] = true;
175 Features["dpp"] = true;
176 break;
177
178 case GK_NONE:
179 return false;
180 default:
181 llvm_unreachable("unhandled subtarget");
182 }
183 } else {
184 if (CPU.empty())
185 CPU = "r600";
186
187 switch (parseR600Name(CPU)) {
188 case GK_R600:
189 case GK_R700:
190 case GK_EVERGREEN:
191 case GK_NORTHERN_ISLANDS:
192 break;
193 case GK_R600_DOUBLE_OPS:
194 case GK_R700_DOUBLE_OPS:
195 case GK_EVERGREEN_DOUBLE_OPS:
196 case GK_CAYMAN:
Matt Arsenaulte4f62802017-12-05 03:51:26 +0000197 // TODO: Add fp64 when implemented.
Erich Keaneebba5922017-07-21 22:37:03 +0000198 break;
199 case GK_NONE:
200 return false;
201 default:
202 llvm_unreachable("unhandled subtarget");
203 }
204 }
205
206 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
207}
208
209void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
210 TargetOptions &TargetOpts) const {
211 bool hasFP32Denormals = false;
212 bool hasFP64Denormals = false;
213 for (auto &I : TargetOpts.FeaturesAsWritten) {
214 if (I == "+fp32-denormals" || I == "-fp32-denormals")
215 hasFP32Denormals = true;
216 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
217 hasFP64Denormals = true;
218 }
219 if (!hasFP32Denormals)
220 TargetOpts.Features.push_back(
221 (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) && !CGOpts.FlushDenorm
222 ? '+'
223 : '-') +
224 Twine("fp32-denormals"))
225 .str());
226 // Always do not flush fp64 or fp16 denorms.
227 if (!hasFP64Denormals && hasFP64)
228 TargetOpts.Features.push_back("+fp64-fp16-denormals");
229}
230
Erich Keanee44bdb32018-02-08 23:16:55 +0000231
232constexpr AMDGPUTargetInfo::NameGPUKind AMDGPUTargetInfo::R600Names[];
233constexpr AMDGPUTargetInfo::NameGPUKind AMDGPUTargetInfo::AMDGCNNames[];
Erich Keaneebba5922017-07-21 22:37:03 +0000234AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseR600Name(StringRef Name) {
Erich Keanee44bdb32018-02-08 23:16:55 +0000235 const auto *Result = llvm::find_if(
236 R600Names, [Name](const NameGPUKind &Kind) { return Kind.Name == Name; });
237
238 if (Result == std::end(R600Names))
239 return GK_NONE;
240 return Result->Kind;
Erich Keaneebba5922017-07-21 22:37:03 +0000241}
242
243AMDGPUTargetInfo::GPUKind AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
Erich Keanee44bdb32018-02-08 23:16:55 +0000244 const auto *Result =
245 llvm::find_if(AMDGCNNames, [Name](const NameGPUKind &Kind) {
246 return Kind.Name == Name;
247 });
248
249 if (Result == std::end(AMDGCNNames))
250 return GK_NONE;
251 return Result->Kind;
252}
253
254void AMDGPUTargetInfo::fillValidCPUList(
255 SmallVectorImpl<StringRef> &Values) const {
256 if (getTriple().getArch() == llvm::Triple::amdgcn)
257 llvm::for_each(AMDGCNNames, [&Values](const NameGPUKind &Kind) {
258 Values.emplace_back(Kind.Name);});
259 else
260 llvm::for_each(R600Names, [&Values](const NameGPUKind &Kind) {
261 Values.emplace_back(Kind.Name);});
Erich Keaneebba5922017-07-21 22:37:03 +0000262}
263
264void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265 if (isGenericZero(getTriple())) {
266 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
267 : &AMDGPUGenIsZeroDefIsGenMap;
268 } else {
269 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
270 : &AMDGPUPrivIsZeroDefIsGenMap;
271 }
272}
273
274AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
275 const TargetOptions &Opts)
Jan Veselycda72c92017-10-19 20:40:13 +0000276 : TargetInfo(Triple),
277 GPU(isAMDGCN(Triple) ? GK_GFX6 : parseR600Name(Opts.CPU)),
Erich Keaneebba5922017-07-21 22:37:03 +0000278 hasFP64(false), hasFMAF(false), hasLDEXPF(false),
279 AS(isGenericZero(Triple)) {
280 if (getTriple().getArch() == llvm::Triple::amdgcn) {
281 hasFP64 = true;
282 hasFMAF = true;
283 hasLDEXPF = true;
284 }
Jan Veselycda72c92017-10-19 20:40:13 +0000285 if (getTriple().getArch() == llvm::Triple::r600) {
286 if (GPU == GK_EVERGREEN_DOUBLE_OPS || GPU == GK_CAYMAN) {
287 hasFMAF = true;
288 }
289 }
Erich Keaneebba5922017-07-21 22:37:03 +0000290 auto IsGenericZero = isGenericZero(Triple);
291 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn
292 ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
293 : DataLayoutStringSIPrivateIsZero)
294 : DataLayoutStringR600);
295 assert(DataLayout->getAllocaAddrSpace() == AS.Private);
296
297 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
298 Triple.getEnvironment() == llvm::Triple::OpenCL ||
299 Triple.getEnvironmentName() == "amdgizcl" ||
300 !isAMDGCN(Triple));
301 UseAddrSpaceMapMangling = true;
302
303 // Set pointer width and alignment for target address space 0.
304 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
305 if (getMaxPointerWidth() == 64) {
306 LongWidth = LongAlign = 64;
307 SizeType = UnsignedLong;
308 PtrDiffType = SignedLong;
309 IntPtrType = SignedLong;
310 }
Yaxun Liu39195062017-08-04 18:16:31 +0000311
312 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
Erich Keaneebba5922017-07-21 22:37:03 +0000313}
314
315void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
316 TargetInfo::adjust(Opts);
317 setAddressSpaceMap(Opts.OpenCL || !isAMDGCN(getTriple()));
318}
319
320ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
321 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
322 Builtin::FirstTSBuiltin);
323}
324
325void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
326 MacroBuilder &Builder) const {
327 if (getTriple().getArch() == llvm::Triple::amdgcn)
328 Builder.defineMacro("__AMDGCN__");
329 else
330 Builder.defineMacro("__R600__");
331
332 if (hasFMAF)
333 Builder.defineMacro("__HAS_FMAF__");
334 if (hasLDEXPF)
335 Builder.defineMacro("__HAS_LDEXPF__");
336 if (hasFP64)
337 Builder.defineMacro("__HAS_FP64__");
338}