blob: 481630c0fa455102b9a7d98cd02507b017645236 [file] [log] [blame]
Erich Keaneebba5922017-07-21 22:37:03 +00001//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Erich Keaneebba5922017-07-21 22:37:03 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
Richard Trieu63688182018-12-11 03:18:39 +000015#include "clang/Basic/CodeGenOptions.h"
Erich Keaneebba5922017-07-21 22:37:03 +000016#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
Erich Keaneebba5922017-07-21 22:37:03 +000019#include "llvm/ADT/StringSwitch.h"
Bjorn Pettersson78424e52019-10-21 17:58:14 +000020#include "llvm/IR/DataLayout.h"
Erich Keaneebba5922017-07-21 22:37:03 +000021
22using namespace clang;
23using namespace clang::targets;
24
25namespace clang {
26namespace targets {
27
28// If you edit the description strings, make sure you update
29// getPointerWidthV().
30
31static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
Matt Arsenaultb130ea52018-03-27 19:26:51 +000033 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
Erich Keaneebba5922017-07-21 22:37:03 +000034
Yaxun Liu1578a0a2018-03-05 17:50:10 +000035static const char *const DataLayoutStringAMDGCN =
Yaxun Liu651bd732018-02-13 18:01:21 +000036 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
Erich Keaneebba5922017-07-21 22:37:03 +000037 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
Michael Liao3c2aadb2019-03-18 18:11:37 +000038 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39 "-ni:7";
Erich Keaneebba5922017-07-21 22:37:03 +000040
Yaxun Liu1578a0a2018-03-05 17:50:10 +000041const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 Generic, // Default
43 Global, // opencl_global
44 Local, // opencl_local
45 Constant, // opencl_constant
46 Private, // opencl_private
47 Generic, // opencl_generic
48 Global, // cuda_device
49 Constant, // cuda_constant
50 Local // cuda_shared
Erich Keaneebba5922017-07-21 22:37:03 +000051};
52
Yaxun Liu1578a0a2018-03-05 17:50:10 +000053const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
54 Private, // Default
55 Global, // opencl_global
56 Local, // opencl_local
57 Constant, // opencl_constant
58 Private, // opencl_private
59 Generic, // opencl_generic
60 Global, // cuda_device
61 Constant, // cuda_constant
62 Local // cuda_shared
Erich Keaneebba5922017-07-21 22:37:03 +000063};
64} // namespace targets
65} // namespace clang
66
67const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
68#define BUILTIN(ID, TYPE, ATTRS) \
69 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
70#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
71 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
72#include "clang/Basic/BuiltinsAMDGPU.def"
73};
74
75const char *const AMDGPUTargetInfo::GCCRegNames[] = {
76 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
77 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
78 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
79 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
80 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
81 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
82 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
83 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
84 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
85 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
86 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
87 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
88 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
89 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
90 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
91 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
92 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
93 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
94 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
95 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
96 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
97 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
98 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
99 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
100 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
101 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
102 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
103 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
104 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
105 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
106 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
107 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
108 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
109 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
110 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
111 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
112 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
113 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
114 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
115 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
116 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
117 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
118 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
Matt Arsenaulte7da1362018-02-09 16:58:41 +0000119 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
Erich Keaneebba5922017-07-21 22:37:03 +0000120 "flat_scratch_lo", "flat_scratch_hi"
121};
122
123ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
124 return llvm::makeArrayRef(GCCRegNames);
125}
126
127bool AMDGPUTargetInfo::initFeatureMap(
128 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
129 const std::vector<std::string> &FeatureVec) const {
130
Matt Arsenaultb666e732018-08-21 16:13:29 +0000131 using namespace llvm::AMDGPU;
132
Erich Keaneebba5922017-07-21 22:37:03 +0000133 // XXX - What does the member GPU mean if device name string passed here?
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000134 if (isAMDGCN(getTriple())) {
Matt Arsenaultb666e732018-08-21 16:13:29 +0000135 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
Stanislav Mekhanoshincafccd72019-06-14 00:33:59 +0000136 case GK_GFX1012:
137 case GK_GFX1011:
138 Features["dot1-insts"] = true;
139 Features["dot2-insts"] = true;
140 Features["dot5-insts"] = true;
141 Features["dot6-insts"] = true;
142 LLVM_FALLTHROUGH;
Stanislav Mekhanoshin91792f12019-05-13 23:15:59 +0000143 case GK_GFX1010:
144 Features["dl-insts"] = true;
Matt Arsenaultfc849252019-06-22 01:30:00 +0000145 Features["ci-insts"] = true;
Matt Arsenault281f2e22019-09-05 03:00:43 +0000146 Features["flat-address-space"] = true;
Stanislav Mekhanoshin91792f12019-05-13 23:15:59 +0000147 Features["16-bit-insts"] = true;
148 Features["dpp"] = true;
Matt Arsenaultfc849252019-06-22 01:30:00 +0000149 Features["gfx8-insts"] = true;
Stanislav Mekhanoshin91792f12019-05-13 23:15:59 +0000150 Features["gfx9-insts"] = true;
151 Features["gfx10-insts"] = true;
152 Features["s-memrealtime"] = true;
153 break;
Stanislav Mekhanoshin0cfd75a2019-07-09 18:19:00 +0000154 case GK_GFX908:
155 Features["dot3-insts"] = true;
156 Features["dot4-insts"] = true;
157 Features["dot5-insts"] = true;
158 Features["dot6-insts"] = true;
159 LLVM_FALLTHROUGH;
Matt Arsenaultd2da3c22018-04-30 19:08:27 +0000160 case GK_GFX906:
161 Features["dl-insts"] = true;
Stanislav Mekhanoshin1607a372019-02-09 00:34:41 +0000162 Features["dot1-insts"] = true;
163 Features["dot2-insts"] = true;
Matt Arsenaultd2da3c22018-04-30 19:08:27 +0000164 LLVM_FALLTHROUGH;
Konstantin Zhuravlyov06570952018-10-24 19:07:56 +0000165 case GK_GFX909:
Matt Arsenaultd2da3c22018-04-30 19:08:27 +0000166 case GK_GFX904:
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000167 case GK_GFX902:
168 case GK_GFX900:
Erich Keaneebba5922017-07-21 22:37:03 +0000169 Features["gfx9-insts"] = true;
170 LLVM_FALLTHROUGH;
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000171 case GK_GFX810:
172 case GK_GFX803:
173 case GK_GFX802:
174 case GK_GFX801:
Stanislav Mekhanoshin1d9f2862019-04-05 18:25:00 +0000175 Features["gfx8-insts"] = true;
Erich Keaneebba5922017-07-21 22:37:03 +0000176 Features["16-bit-insts"] = true;
177 Features["dpp"] = true;
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000178 Features["s-memrealtime"] = true;
Matt Arsenault24f39242018-08-07 07:49:04 +0000179 LLVM_FALLTHROUGH;
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000180 case GK_GFX704:
181 case GK_GFX703:
182 case GK_GFX702:
183 case GK_GFX701:
184 case GK_GFX700:
Matt Arsenault24f39242018-08-07 07:49:04 +0000185 Features["ci-insts"] = true;
Matt Arsenault281f2e22019-09-05 03:00:43 +0000186 Features["flat-address-space"] = true;
Matt Arsenault24f39242018-08-07 07:49:04 +0000187 LLVM_FALLTHROUGH;
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000188 case GK_GFX601:
189 case GK_GFX600:
190 break;
Erich Keaneebba5922017-07-21 22:37:03 +0000191 case GK_NONE:
Stanislav Mekhanoshinc17705b72019-08-14 20:55:15 +0000192 break;
Erich Keaneebba5922017-07-21 22:37:03 +0000193 default:
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000194 llvm_unreachable("Unhandled GPU!");
Erich Keaneebba5922017-07-21 22:37:03 +0000195 }
196 } else {
197 if (CPU.empty())
198 CPU = "r600";
199
Matt Arsenaultb666e732018-08-21 16:13:29 +0000200 switch (llvm::AMDGPU::parseArchR600(CPU)) {
Erich Keaneebba5922017-07-21 22:37:03 +0000201 case GK_CAYMAN:
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000202 case GK_CYPRESS:
203 case GK_RV770:
204 case GK_RV670:
Matt Arsenaulte4f62802017-12-05 03:51:26 +0000205 // TODO: Add fp64 when implemented.
Erich Keaneebba5922017-07-21 22:37:03 +0000206 break;
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000207 case GK_TURKS:
208 case GK_CAICOS:
209 case GK_BARTS:
210 case GK_SUMO:
211 case GK_REDWOOD:
212 case GK_JUNIPER:
213 case GK_CEDAR:
214 case GK_RV730:
215 case GK_RV710:
216 case GK_RS880:
217 case GK_R630:
218 case GK_R600:
219 break;
Erich Keaneebba5922017-07-21 22:37:03 +0000220 default:
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000221 llvm_unreachable("Unhandled GPU!");
Erich Keaneebba5922017-07-21 22:37:03 +0000222 }
223 }
224
225 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
226}
227
228void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
229 TargetOptions &TargetOpts) const {
230 bool hasFP32Denormals = false;
231 bool hasFP64Denormals = false;
Matt Arsenaultb666e732018-08-21 16:13:29 +0000232
Erich Keaneebba5922017-07-21 22:37:03 +0000233 for (auto &I : TargetOpts.FeaturesAsWritten) {
234 if (I == "+fp32-denormals" || I == "-fp32-denormals")
235 hasFP32Denormals = true;
236 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
237 hasFP64Denormals = true;
238 }
239 if (!hasFP32Denormals)
240 TargetOpts.Features.push_back(
Matt Arsenaultb666e732018-08-21 16:13:29 +0000241 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
242 ? '+' : '-') + Twine("fp32-denormals"))
Erich Keaneebba5922017-07-21 22:37:03 +0000243 .str());
244 // Always do not flush fp64 or fp16 denorms.
Matt Arsenaultb666e732018-08-21 16:13:29 +0000245 if (!hasFP64Denormals && hasFP64())
Erich Keaneebba5922017-07-21 22:37:03 +0000246 TargetOpts.Features.push_back("+fp64-fp16-denormals");
247}
248
Erich Keanee44bdb32018-02-08 23:16:55 +0000249void AMDGPUTargetInfo::fillValidCPUList(
250 SmallVectorImpl<StringRef> &Values) const {
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000251 if (isAMDGCN(getTriple()))
Matt Arsenaultb666e732018-08-21 16:13:29 +0000252 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
Erich Keanee44bdb32018-02-08 23:16:55 +0000253 else
Matt Arsenaultb666e732018-08-21 16:13:29 +0000254 llvm::AMDGPU::fillValidArchListR600(Values);
Erich Keaneebba5922017-07-21 22:37:03 +0000255}
256
257void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
Yaxun Liu1578a0a2018-03-05 17:50:10 +0000258 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
Erich Keaneebba5922017-07-21 22:37:03 +0000259}
260
261AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
262 const TargetOptions &Opts)
Yaxun Liu1578a0a2018-03-05 17:50:10 +0000263 : TargetInfo(Triple),
Matt Arsenaultb666e732018-08-21 16:13:29 +0000264 GPUKind(isAMDGCN(Triple) ?
265 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
266 llvm::AMDGPU::parseArchR600(Opts.CPU)),
267 GPUFeatures(isAMDGCN(Triple) ?
268 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
269 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
Yaxun Liu1578a0a2018-03-05 17:50:10 +0000270 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
271 : DataLayoutStringR600);
272 assert(DataLayout->getAllocaAddrSpace() == Private);
Erich Keaneebba5922017-07-21 22:37:03 +0000273
274 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
Erich Keaneebba5922017-07-21 22:37:03 +0000275 !isAMDGCN(Triple));
276 UseAddrSpaceMapMangling = true;
277
Yaxun Liu44697012019-04-30 18:35:37 +0000278 HasLegalHalfType = true;
279 HasFloat16 = true;
280
Erich Keaneebba5922017-07-21 22:37:03 +0000281 // Set pointer width and alignment for target address space 0.
282 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
283 if (getMaxPointerWidth() == 64) {
284 LongWidth = LongAlign = 64;
285 SizeType = UnsignedLong;
286 PtrDiffType = SignedLong;
287 IntPtrType = SignedLong;
288 }
Yaxun Liu39195062017-08-04 18:16:31 +0000289
290 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
Erich Keaneebba5922017-07-21 22:37:03 +0000291}
292
293void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
294 TargetInfo::adjust(Opts);
Yaxun Liu1578a0a2018-03-05 17:50:10 +0000295 // ToDo: There are still a few places using default address space as private
296 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
297 // can be removed from the following line.
298 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
299 !isAMDGCN(getTriple()));
Erich Keaneebba5922017-07-21 22:37:03 +0000300}
301
302ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
303 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
304 Builtin::FirstTSBuiltin);
305}
306
307void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
308 MacroBuilder &Builder) const {
Konstantin Zhuravlyovcf717612018-02-15 02:37:04 +0000309 Builder.defineMacro("__AMD__");
310 Builder.defineMacro("__AMDGPU__");
311
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000312 if (isAMDGCN(getTriple()))
Erich Keaneebba5922017-07-21 22:37:03 +0000313 Builder.defineMacro("__AMDGCN__");
314 else
315 Builder.defineMacro("__R600__");
316
Matt Arsenaultb666e732018-08-21 16:13:29 +0000317 if (GPUKind != llvm::AMDGPU::GK_NONE) {
318 StringRef CanonName = isAMDGCN(getTriple()) ?
319 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
320 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
321 }
Konstantin Zhuravlyovcf717612018-02-15 02:37:04 +0000322
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000323 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
324 // removed in the near future.
Matt Arsenaultb666e732018-08-21 16:13:29 +0000325 if (hasFMAF())
Erich Keaneebba5922017-07-21 22:37:03 +0000326 Builder.defineMacro("__HAS_FMAF__");
Matt Arsenaultb666e732018-08-21 16:13:29 +0000327 if (hasFastFMAF())
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000328 Builder.defineMacro("FP_FAST_FMAF");
Matt Arsenaultb666e732018-08-21 16:13:29 +0000329 if (hasLDEXPF())
Erich Keaneebba5922017-07-21 22:37:03 +0000330 Builder.defineMacro("__HAS_LDEXPF__");
Matt Arsenaultb666e732018-08-21 16:13:29 +0000331 if (hasFP64())
Erich Keaneebba5922017-07-21 22:37:03 +0000332 Builder.defineMacro("__HAS_FP64__");
Matt Arsenaultb666e732018-08-21 16:13:29 +0000333 if (hasFastFMA())
Konstantin Zhuravlyovd6b34532018-02-27 21:48:05 +0000334 Builder.defineMacro("FP_FAST_FMA");
Erich Keaneebba5922017-07-21 22:37:03 +0000335}
Yaxun Liu95f2ca52019-01-30 12:26:54 +0000336
337void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
Yaxun Liu277e0642019-01-31 21:57:51 +0000338 assert(HalfFormat == Aux->HalfFormat);
339 assert(FloatFormat == Aux->FloatFormat);
340 assert(DoubleFormat == Aux->DoubleFormat);
341
342 // On x86_64 long double is 80-bit extended precision format, which is
343 // not supported by AMDGPU. 128-bit floating point format is also not
344 // supported by AMDGPU. Therefore keep its own format for these two types.
345 auto SaveLongDoubleFormat = LongDoubleFormat;
346 auto SaveFloat128Format = Float128Format;
Yaxun Liu95f2ca52019-01-30 12:26:54 +0000347 copyAuxTarget(Aux);
Yaxun Liu277e0642019-01-31 21:57:51 +0000348 LongDoubleFormat = SaveLongDoubleFormat;
349 Float128Format = SaveFloat128Format;
Yaxun Liu95f2ca52019-01-30 12:26:54 +0000350}