blob: 7ad34f4db1d37f716d4d84a640469bd77c60f572 [file] [log] [blame]
Tom Stellard45bb48e2015-06-13 03:28:10 +00001//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13/// code. When passed an MCAsmStreamer it prints assembly and when passed
14/// an MCObjectStreamer it outputs binary code.
15//
16//===----------------------------------------------------------------------===//
17//
18
19#include "AMDGPUAsmPrinter.h"
Tom Stellard347ac792015-06-26 21:15:07 +000020#include "MCTargetDesc/AMDGPUTargetStreamer.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000021#include "InstPrinter/AMDGPUInstPrinter.h"
Tom Stellard347ac792015-06-26 21:15:07 +000022#include "Utils/AMDGPUBaseInfo.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000023#include "AMDGPU.h"
24#include "AMDKernelCodeT.h"
25#include "AMDGPUSubtarget.h"
26#include "R600Defines.h"
27#include "R600MachineFunctionInfo.h"
28#include "R600RegisterInfo.h"
29#include "SIDefines.h"
30#include "SIMachineFunctionInfo.h"
Matt Arsenaulta9720c62016-06-20 17:51:32 +000031#include "SIInstrInfo.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000032#include "SIRegisterInfo.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
Matt Arsenaultff982412016-06-20 18:13:04 +000034#include "llvm/IR/DiagnosticInfo.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000035#include "llvm/MC/MCContext.h"
36#include "llvm/MC/MCSectionELF.h"
37#include "llvm/MC/MCStreamer.h"
38#include "llvm/Support/ELF.h"
39#include "llvm/Support/MathExtras.h"
40#include "llvm/Support/TargetRegistry.h"
41#include "llvm/Target/TargetLoweringObjectFile.h"
Yaxun Liua711cc72016-07-16 05:09:21 +000042#include "AMDGPURuntimeMetadata.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000043
Yaxun Liua711cc72016-07-16 05:09:21 +000044using namespace ::AMDGPU;
Tom Stellard45bb48e2015-06-13 03:28:10 +000045using namespace llvm;
46
47// TODO: This should get the default rounding mode from the kernel. We just set
48// the default here, but this could change if the OpenCL rounding mode pragmas
49// are used.
50//
51// The denormal mode here should match what is reported by the OpenCL runtime
52// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
53// can also be override to flush with the -cl-denorms-are-zero compiler flag.
54//
55// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
56// precision, and leaves single precision to flush all and does not report
57// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
58// CL_FP_DENORM for both.
59//
60// FIXME: It seems some instructions do not support single precision denormals
61// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
62// and sin_f32, cos_f32 on most parts).
63
64// We want to use these instructions, and using fp32 denormals also causes
65// instructions to run at the double precision rate for the device so it's
66// probably best to just report no single precision denormals.
67static uint32_t getFPMode(const MachineFunction &F) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +000068 const SISubtarget& ST = F.getSubtarget<SISubtarget>();
Tom Stellard45bb48e2015-06-13 03:28:10 +000069 // TODO: Is there any real use for the flush in only / flush out only modes?
70
71 uint32_t FP32Denormals =
72 ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
73
74 uint32_t FP64Denormals =
75 ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
76
77 return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
78 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
79 FP_DENORM_MODE_SP(FP32Denormals) |
80 FP_DENORM_MODE_DP(FP64Denormals);
81}
82
83static AsmPrinter *
84createAMDGPUAsmPrinterPass(TargetMachine &tm,
85 std::unique_ptr<MCStreamer> &&Streamer) {
86 return new AMDGPUAsmPrinter(tm, std::move(Streamer));
87}
88
89extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
90 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
91 TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
92}
93
94AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
95 std::unique_ptr<MCStreamer> Streamer)
Matt Arsenault11f74022016-10-06 17:19:11 +000096 : AsmPrinter(TM, std::move(Streamer)) {}
Tom Stellard45bb48e2015-06-13 03:28:10 +000097
Mehdi Amini117296c2016-10-01 02:56:57 +000098StringRef AMDGPUAsmPrinter::getPassName() const {
Matt Arsenaultf9245b72016-07-22 17:01:25 +000099 return "AMDGPU Assembly Printer";
100}
101
Tom Stellardf4218372016-01-12 17:18:17 +0000102void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
103 if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
104 return;
105
106 // Need to construct an MCSubtargetInfo here in case we have no functions
107 // in the module.
108 std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
109 TM.getTargetTriple().str(), TM.getTargetCPU(),
110 TM.getTargetFeatureString()));
111
112 AMDGPUTargetStreamer *TS =
113 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
114
Tom Stellard418beb72016-07-13 14:23:33 +0000115 TS->EmitDirectiveHSACodeObjectVersion(2, 1);
Tom Stellardfcfaea42016-05-05 17:03:33 +0000116
Tom Stellardf4218372016-01-12 17:18:17 +0000117 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
118 TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
119 "AMD", "AMDGPU");
Yaxun Liua711cc72016-07-16 05:09:21 +0000120 emitStartOfRuntimeMetadata(M);
Tom Stellardf4218372016-01-12 17:18:17 +0000121}
122
Matt Arsenault6bc43d82016-10-06 16:20:41 +0000123bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
124 const MachineBasicBlock *MBB) const {
125 if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
126 return false;
127
128 if (MBB->empty())
129 return true;
130
131 // If this is a block implementing a long branch, an expression relative to
132 // the start of the block is needed. to the start of the block.
133 // XXX - Is there a smarter way to check this?
134 return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
135}
136
137
Tom Stellardf151a452015-06-26 21:14:58 +0000138void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
139 const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
140 SIProgramInfo KernelInfo;
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000141 if (STM.isAmdCodeObjectV2()) {
Tom Stellardf151a452015-06-26 21:14:58 +0000142 getSIProgramInfo(KernelInfo, *MF);
143 EmitAmdKernelCodeT(*MF, KernelInfo);
144 }
145}
146
Tom Stellard1e1b05d2015-11-06 11:45:14 +0000147void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
148 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
149 const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
Tom Stellard0b76fc4c2016-09-16 21:34:26 +0000150 if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
Tom Stellard1e1b05d2015-11-06 11:45:14 +0000151 AMDGPUTargetStreamer *TS =
152 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
Tom Stellard1b9748c2016-09-26 17:29:25 +0000153 SmallString<128> SymbolName;
154 getNameWithPrefix(SymbolName, MF->getFunction()),
155 TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
Tom Stellard1e1b05d2015-11-06 11:45:14 +0000156 }
157
158 AsmPrinter::EmitFunctionEntryLabel();
159}
160
Tom Stellarde3b5aea2015-12-02 17:00:42 +0000161void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
162
Tom Stellard00f2f912015-12-02 19:47:57 +0000163 // Group segment variables aren't emitted in HSA.
164 if (AMDGPU::isGroupSegment(GV))
165 return;
166
Tom Stellardfcfaea42016-05-05 17:03:33 +0000167 AsmPrinter::EmitGlobalVariable(GV);
Tom Stellarde3b5aea2015-12-02 17:00:42 +0000168}
169
Tom Stellard45bb48e2015-06-13 03:28:10 +0000170bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
171
172 // The starting address of all shader programs must be 256 bytes aligned.
173 MF.setAlignment(8);
174
175 SetupMachineFunction(MF);
176
177 MCContext &Context = getObjFileLowering().getContext();
178 MCSectionELF *ConfigSection =
179 Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
180 OutStreamer->SwitchSection(ConfigSection);
181
182 const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
183 SIProgramInfo KernelInfo;
Tom Stellardf151a452015-06-26 21:14:58 +0000184 if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
Matt Arsenault297ae312015-08-15 00:12:39 +0000185 getSIProgramInfo(KernelInfo, MF);
Tom Stellardf151a452015-06-26 21:14:58 +0000186 if (!STM.isAmdHsaOS()) {
Tom Stellardf151a452015-06-26 21:14:58 +0000187 EmitProgramInfoSI(MF, KernelInfo);
188 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000189 } else {
190 EmitProgramInfoR600(MF);
191 }
192
193 DisasmLines.clear();
194 HexLines.clear();
195 DisasmLineMaxLen = 0;
196
197 EmitFunctionBody();
198
199 if (isVerbose()) {
200 MCSectionELF *CommentSection =
201 Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
202 OutStreamer->SwitchSection(CommentSection);
203
204 if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
205 OutStreamer->emitRawComment(" Kernel info:", false);
206 OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
207 false);
208 OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
209 false);
210 OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
211 false);
212 OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
213 false);
214 OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
215 false);
216 OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
217 false);
Matt Arsenaultfd8ab092016-04-14 22:11:51 +0000218 OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
219 " bytes/workgroup (compile time only)", false);
Matt Arsenaultd41c0db2015-11-05 05:27:07 +0000220
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000221 OutStreamer->emitRawComment(" SGPRBlocks: " +
222 Twine(KernelInfo.SGPRBlocks), false);
223 OutStreamer->emitRawComment(" VGPRBlocks: " +
224 Twine(KernelInfo.VGPRBlocks), false);
225
226 OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " +
227 Twine(KernelInfo.NumSGPRsForWavesPerEU), false);
228 OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " +
229 Twine(KernelInfo.NumVGPRsForWavesPerEU), false);
230
Konstantin Zhuravlyov1d99c4d2016-04-26 15:43:14 +0000231 OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
232 false);
233 OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
234 false);
235
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000236 if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
237 OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
238 Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
239 OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
240 Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
241 }
242
Matt Arsenaultd41c0db2015-11-05 05:27:07 +0000243 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Matt Arsenault8246d4a2015-11-11 00:27:46 +0000244 Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
Matt Arsenaultd41c0db2015-11-05 05:27:07 +0000245 false);
Matt Arsenault8246d4a2015-11-11 00:27:46 +0000246 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
247 Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
248 false);
249 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
250 Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
251 false);
252 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
253 Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
254 false);
255 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
256 Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
257 false);
258
Tom Stellard45bb48e2015-06-13 03:28:10 +0000259 } else {
260 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
261 OutStreamer->emitRawComment(
Matt Arsenaultf9245b72016-07-22 17:01:25 +0000262 Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->CFStackSize)));
Tom Stellard45bb48e2015-06-13 03:28:10 +0000263 }
264 }
265
266 if (STM.dumpCode()) {
267
268 OutStreamer->SwitchSection(
269 Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
270
271 for (size_t i = 0; i < DisasmLines.size(); ++i) {
272 std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
273 Comment += " ; " + HexLines[i] + "\n";
274
275 OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
276 OutStreamer->EmitBytes(StringRef(Comment));
277 }
278 }
279
Yaxun Liua711cc72016-07-16 05:09:21 +0000280 emitRuntimeMetadata(*MF.getFunction());
281
Tom Stellard45bb48e2015-06-13 03:28:10 +0000282 return false;
283}
284
285void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
286 unsigned MaxGPR = 0;
287 bool killPixel = false;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000288 const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
289 const R600RegisterInfo *RI = STM.getRegisterInfo();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000290 const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
291
292 for (const MachineBasicBlock &MBB : MF) {
293 for (const MachineInstr &MI : MBB) {
294 if (MI.getOpcode() == AMDGPU::KILLGT)
295 killPixel = true;
296 unsigned numOperands = MI.getNumOperands();
297 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
298 const MachineOperand &MO = MI.getOperand(op_idx);
299 if (!MO.isReg())
300 continue;
301 unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
302
303 // Register with value > 127 aren't GPR
304 if (HWReg > 127)
305 continue;
306 MaxGPR = std::max(MaxGPR, HWReg);
307 }
308 }
309 }
310
311 unsigned RsrcReg;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000312 if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000313 // Evergreen / Northern Islands
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000314 switch (MF.getFunction()->getCallingConv()) {
Justin Bognercd1d5aa2016-08-17 20:30:52 +0000315 default: LLVM_FALLTHROUGH;
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000316 case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
317 case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
318 case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
319 case CallingConv::AMDGPU_VS: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000320 }
321 } else {
322 // R600 / R700
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000323 switch (MF.getFunction()->getCallingConv()) {
Justin Bognercd1d5aa2016-08-17 20:30:52 +0000324 default: LLVM_FALLTHROUGH;
325 case CallingConv::AMDGPU_GS: LLVM_FALLTHROUGH;
326 case CallingConv::AMDGPU_CS: LLVM_FALLTHROUGH;
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000327 case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
328 case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000329 }
330 }
331
332 OutStreamer->EmitIntValue(RsrcReg, 4);
333 OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
Matt Arsenaultf9245b72016-07-22 17:01:25 +0000334 S_STACK_SIZE(MFI->CFStackSize), 4);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000335 OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
336 OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
337
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000338 if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000339 OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
Matt Arsenault52ef4012016-07-26 16:45:58 +0000340 OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000341 }
342}
343
344void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
345 const MachineFunction &MF) const {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000346 const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000347 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
348 uint64_t CodeSize = 0;
349 unsigned MaxSGPR = 0;
350 unsigned MaxVGPR = 0;
351 bool VCCUsed = false;
352 bool FlatUsed = false;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000353 const SIRegisterInfo *RI = STM.getRegisterInfo();
354 const SIInstrInfo *TII = STM.getInstrInfo();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000355
356 for (const MachineBasicBlock &MBB : MF) {
357 for (const MachineInstr &MI : MBB) {
358 // TODO: CodeSize should account for multiple functions.
Matt Arsenaultc5746862015-08-12 09:04:44 +0000359
360 // TODO: Should we count size of debug info?
361 if (MI.isDebugValue())
362 continue;
363
Matt Arsenault10c17ca2016-10-06 10:13:23 +0000364 if (isVerbose())
365 CodeSize += TII->getInstSizeInBytes(MI);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000366
367 unsigned numOperands = MI.getNumOperands();
368 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
369 const MachineOperand &MO = MI.getOperand(op_idx);
370 unsigned width = 0;
371 bool isSGPR = false;
372
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000373 if (!MO.isReg())
Tom Stellard45bb48e2015-06-13 03:28:10 +0000374 continue;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000375
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000376 unsigned reg = MO.getReg();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000377 switch (reg) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000378 case AMDGPU::EXEC:
Nicolai Haehnle74839372016-04-19 21:58:17 +0000379 case AMDGPU::EXEC_LO:
380 case AMDGPU::EXEC_HI:
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000381 case AMDGPU::SCC:
Tom Stellard45bb48e2015-06-13 03:28:10 +0000382 case AMDGPU::M0:
383 continue;
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000384
385 case AMDGPU::VCC:
386 case AMDGPU::VCC_LO:
387 case AMDGPU::VCC_HI:
388 VCCUsed = true;
389 continue;
390
391 case AMDGPU::FLAT_SCR:
392 case AMDGPU::FLAT_SCR_LO:
393 case AMDGPU::FLAT_SCR_HI:
394 FlatUsed = true;
395 continue;
396
Artem Tamazoveb4d5a92016-04-13 16:18:41 +0000397 case AMDGPU::TBA:
398 case AMDGPU::TBA_LO:
399 case AMDGPU::TBA_HI:
400 case AMDGPU::TMA:
401 case AMDGPU::TMA_LO:
402 case AMDGPU::TMA_HI:
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000403 llvm_unreachable("trap handler registers should not be used");
Artem Tamazoveb4d5a92016-04-13 16:18:41 +0000404
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000405 default:
406 break;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000407 }
408
409 if (AMDGPU::SReg_32RegClass.contains(reg)) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000410 assert(!AMDGPU::TTMP_32RegClass.contains(reg) &&
411 "trap handler registers should not be used");
Tom Stellard45bb48e2015-06-13 03:28:10 +0000412 isSGPR = true;
413 width = 1;
414 } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
415 isSGPR = false;
416 width = 1;
417 } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000418 assert(!AMDGPU::TTMP_64RegClass.contains(reg) &&
419 "trap handler registers should not be used");
Tom Stellard45bb48e2015-06-13 03:28:10 +0000420 isSGPR = true;
421 width = 2;
422 } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
423 isSGPR = false;
424 width = 2;
425 } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
426 isSGPR = false;
427 width = 3;
428 } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
429 isSGPR = true;
430 width = 4;
431 } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
432 isSGPR = false;
433 width = 4;
434 } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
435 isSGPR = true;
436 width = 8;
437 } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
438 isSGPR = false;
439 width = 8;
440 } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
441 isSGPR = true;
442 width = 16;
443 } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
444 isSGPR = false;
445 width = 16;
446 } else {
447 llvm_unreachable("Unknown register class");
448 }
449 unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
450 unsigned maxUsed = hwReg + width - 1;
451 if (isSGPR) {
452 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
453 } else {
454 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
455 }
456 }
457 }
458 }
459
Nicolai Haehnle3c05d6d2016-01-07 17:10:20 +0000460 unsigned ExtraSGPRs = 0;
461
462 if (VCCUsed)
463 ExtraSGPRs = 2;
464
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000465 if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
Nicolai Haehnle3c05d6d2016-01-07 17:10:20 +0000466 if (FlatUsed)
467 ExtraSGPRs = 4;
468 } else {
469 if (STM.isXNACKEnabled())
470 ExtraSGPRs = 4;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000471
Nicolai Haehnle5b504972016-01-04 23:35:53 +0000472 if (FlatUsed)
Nicolai Haehnle3c05d6d2016-01-07 17:10:20 +0000473 ExtraSGPRs = 6;
Tom Stellardcaaa3aa2015-12-17 17:05:09 +0000474 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000475
Konstantin Zhuravlyov29ddd2b2016-05-24 18:37:18 +0000476 // Record first reserved register and reserved register count fields, and
477 // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000478 // requested.
479 ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
480 ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
Konstantin Zhuravlyov1d99c4d2016-04-26 15:43:14 +0000481
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000482 // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
483 // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000484 // attribute was requested.
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000485 if (STM.debuggerEmitPrologue()) {
486 ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
487 RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
488 ProgInfo.DebuggerPrivateSegmentBufferSGPR =
489 RI->getHWRegIndex(MFI->getScratchRSrcReg());
490 }
491
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000492 // Account for extra SGPRs and VGPRs reserved for debugger use.
493 MaxSGPR += ExtraSGPRs;
494 MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
495
Tom Stellard45bb48e2015-06-13 03:28:10 +0000496 // We found the maximum register index. They start at 0, so add one to get the
497 // number of registers.
498 ProgInfo.NumVGPR = MaxVGPR + 1;
499 ProgInfo.NumSGPR = MaxSGPR + 1;
500
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000501 // Adjust number of registers used to meet default/requested minimum/maximum
502 // number of waves per execution unit request.
503 ProgInfo.NumSGPRsForWavesPerEU = std::max(
504 ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
505 ProgInfo.NumVGPRsForWavesPerEU = std::max(
506 ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
507
Tom Stellard45bb48e2015-06-13 03:28:10 +0000508 if (STM.hasSGPRInitBug()) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000509 if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
Matt Arsenault417c93e2015-06-17 20:55:25 +0000510 LLVMContext &Ctx = MF.getFunction()->getContext();
Matt Arsenaultff982412016-06-20 18:13:04 +0000511 DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
512 "SGPRs with SGPR init bug",
513 ProgInfo.NumSGPR, DS_Error);
514 Ctx.diagnose(Diag);
Matt Arsenault417c93e2015-06-17 20:55:25 +0000515 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000516
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000517 ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000518 ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000519 }
520
Matt Arsenault41003af2015-11-30 21:16:07 +0000521 if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
522 LLVMContext &Ctx = MF.getFunction()->getContext();
Matt Arsenaultff982412016-06-20 18:13:04 +0000523 DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs",
524 MFI->NumUserSGPRs, DS_Error);
525 Ctx.diagnose(Diag);
Matt Arsenault41003af2015-11-30 21:16:07 +0000526 }
527
Matt Arsenault52ef4012016-07-26 16:45:58 +0000528 if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
Matt Arsenault1c4d0ef2016-04-28 19:37:35 +0000529 LLVMContext &Ctx = MF.getFunction()->getContext();
Matt Arsenaultff982412016-06-20 18:13:04 +0000530 DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
Matt Arsenault52ef4012016-07-26 16:45:58 +0000531 MFI->getLDSSize(), DS_Error);
Matt Arsenaultff982412016-06-20 18:13:04 +0000532 Ctx.diagnose(Diag);
Matt Arsenault1c4d0ef2016-04-28 19:37:35 +0000533 }
534
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000535 // SGPRBlocks is actual number of SGPR blocks minus 1.
536 ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
537 RI->getSGPRAllocGranule());
538 ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
539
540 // VGPRBlocks is actual number of VGPR blocks minus 1.
541 ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
542 RI->getVGPRAllocGranule());
543 ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
544
Tom Stellard45bb48e2015-06-13 03:28:10 +0000545 // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
546 // register.
547 ProgInfo.FloatMode = getFPMode(MF);
548
Tom Stellard45bb48e2015-06-13 03:28:10 +0000549 ProgInfo.IEEEMode = 0;
550
Matt Arsenault7293f982016-01-28 20:53:35 +0000551 // Make clamp modifier on NaN input returns 0.
552 ProgInfo.DX10Clamp = 1;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000553
Matthias Braun941a7052016-07-28 18:40:00 +0000554 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
555 ProgInfo.ScratchSize = FrameInfo.getStackSize();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000556
557 ProgInfo.FlatUsed = FlatUsed;
558 ProgInfo.VCCUsed = VCCUsed;
559 ProgInfo.CodeLen = CodeSize;
560
561 unsigned LDSAlignShift;
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000562 if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000563 // LDS is allocated in 64 dword blocks.
564 LDSAlignShift = 8;
565 } else {
566 // LDS is allocated in 128 dword blocks.
567 LDSAlignShift = 9;
568 }
569
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000570 unsigned LDSSpillSize =
571 MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000572
Matt Arsenault52ef4012016-07-26 16:45:58 +0000573 ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000574 ProgInfo.LDSBlocks =
Aaron Ballmanef0fe1e2016-03-30 21:30:00 +0000575 alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000576
577 // Scratch is allocated in 256 dword blocks.
578 unsigned ScratchAlignShift = 10;
579 // We need to program the hardware with the amount of scratch memory that
580 // is used by the entire wave. ProgInfo.ScratchSize is the amount of
581 // scratch memory used per thread.
582 ProgInfo.ScratchBlocks =
Rui Ueyamada00f2f2016-01-14 21:06:47 +0000583 alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
Aaron Ballmanef0fe1e2016-03-30 21:30:00 +0000584 1ULL << ScratchAlignShift) >>
Rui Ueyamada00f2f2016-01-14 21:06:47 +0000585 ScratchAlignShift;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000586
587 ProgInfo.ComputePGMRSrc1 =
588 S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
589 S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
590 S_00B848_PRIORITY(ProgInfo.Priority) |
591 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
592 S_00B848_PRIV(ProgInfo.Priv) |
593 S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000594 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
Tom Stellard45bb48e2015-06-13 03:28:10 +0000595 S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
596
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000597 // 0 = X, 1 = XY, 2 = XYZ
598 unsigned TIDIGCompCnt = 0;
599 if (MFI->hasWorkItemIDZ())
600 TIDIGCompCnt = 2;
601 else if (MFI->hasWorkItemIDY())
602 TIDIGCompCnt = 1;
603
Tom Stellard45bb48e2015-06-13 03:28:10 +0000604 ProgInfo.ComputePGMRSrc2 =
605 S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000606 S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
607 S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
608 S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
609 S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
610 S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
611 S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
612 S_00B84C_EXCP_EN_MSB(0) |
613 S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
614 S_00B84C_EXCP_EN(0);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000615}
616
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000617static unsigned getRsrcReg(CallingConv::ID CallConv) {
618 switch (CallConv) {
Justin Bognercd1d5aa2016-08-17 20:30:52 +0000619 default: LLVM_FALLTHROUGH;
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000620 case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
621 case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
622 case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
623 case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000624 }
625}
626
627void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
628 const SIProgramInfo &KernelInfo) {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000629 const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000630 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000631 unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000632
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000633 if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000634 OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
635
636 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
637
638 OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
639 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
640
641 OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
642 OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
643
644 // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
645 // 0" comment but I don't see a corresponding field in the register spec.
646 } else {
647 OutStreamer->EmitIntValue(RsrcReg, 4);
648 OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
649 S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000650 if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000651 OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
652 OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
653 }
654 }
655
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000656 if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000657 OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
658 OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
659 OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
Marek Olsakfccabaf2016-01-13 11:45:36 +0000660 OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
661 OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
662 OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000663 }
Marek Olsak0532c192016-07-13 17:35:15 +0000664
665 OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
666 OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
667 OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
668 OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000669}
670
Matt Arsenault24ee0782016-02-12 02:40:47 +0000671// This is supposed to be log2(Size)
672static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
673 switch (Size) {
674 case 4:
675 return AMD_ELEMENT_4_BYTES;
676 case 8:
677 return AMD_ELEMENT_8_BYTES;
678 case 16:
679 return AMD_ELEMENT_16_BYTES;
680 default:
681 llvm_unreachable("invalid private_element_size");
682 }
683}
684
Tom Stellard45bb48e2015-06-13 03:28:10 +0000685void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
Tom Stellardff7416b2015-06-26 21:58:31 +0000686 const SIProgramInfo &KernelInfo) const {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000687 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000688 const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000689 amd_kernel_code_t header;
690
Tom Stellardff7416b2015-06-26 21:58:31 +0000691 AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000692
693 header.compute_pgm_resource_registers =
694 KernelInfo.ComputePGMRSrc1 |
695 (KernelInfo.ComputePGMRSrc2 << 32);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000696 header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
697
Matt Arsenault24ee0782016-02-12 02:40:47 +0000698
699 AMD_HSA_BITS_SET(header.code_properties,
700 AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
701 getElementByteSizeValue(STM.getMaxPrivateElementSize()));
702
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000703 if (MFI->hasPrivateSegmentBuffer()) {
704 header.code_properties |=
705 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
706 }
707
708 if (MFI->hasDispatchPtr())
709 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
710
711 if (MFI->hasQueuePtr())
712 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
713
714 if (MFI->hasKernargSegmentPtr())
715 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
716
717 if (MFI->hasDispatchID())
718 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
719
720 if (MFI->hasFlatScratchInit())
721 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
722
723 // TODO: Private segment size
724
725 if (MFI->hasGridWorkgroupCountX()) {
726 header.code_properties |=
727 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
728 }
729
730 if (MFI->hasGridWorkgroupCountY()) {
731 header.code_properties |=
732 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
733 }
734
735 if (MFI->hasGridWorkgroupCountZ()) {
736 header.code_properties |=
737 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
738 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000739
Tom Stellard48f29f22015-11-26 00:43:29 +0000740 if (MFI->hasDispatchPtr())
741 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
742
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000743 if (STM.debuggerSupported())
744 header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
745
Nicolai Haehnle5b504972016-01-04 23:35:53 +0000746 if (STM.isXNACKEnabled())
747 header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
748
Matt Arsenault52ef4012016-07-26 16:45:58 +0000749 // FIXME: Should use getKernArgSize
Tom Stellarde88bbc32016-09-23 01:33:26 +0000750 header.kernarg_segment_byte_size =
751 STM.getKernArgSegmentSize(MFI->getABIArgOffset());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000752 header.wavefront_sgpr_count = KernelInfo.NumSGPR;
753 header.workitem_vgpr_count = KernelInfo.NumVGPR;
Tom Stellarda4953072015-12-15 22:55:30 +0000754 header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
Tom Stellard7750f4e2015-12-15 23:15:25 +0000755 header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
Konstantin Zhuravlyov1d99c4d2016-04-26 15:43:14 +0000756 header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
757 header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000758
Konstantin Zhuravlyovf2f3d142016-06-25 03:11:28 +0000759 if (STM.debuggerEmitPrologue()) {
760 header.debug_wavefront_private_segment_offset_sgpr =
761 KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
762 header.debug_private_segment_buffer_sgpr =
763 KernelInfo.DebuggerPrivateSegmentBufferSGPR;
764 }
765
Tom Stellardff7416b2015-06-26 21:58:31 +0000766 AMDGPUTargetStreamer *TS =
767 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
Tom Stellardfcfaea42016-05-05 17:03:33 +0000768
769 OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
Tom Stellardff7416b2015-06-26 21:58:31 +0000770 TS->EmitAMDKernelCodeT(header);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000771}
772
773bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
774 unsigned AsmVariant,
775 const char *ExtraCode, raw_ostream &O) {
776 if (ExtraCode && ExtraCode[0]) {
777 if (ExtraCode[1] != 0)
778 return true; // Unknown modifier.
779
780 switch (ExtraCode[0]) {
781 default:
782 // See if this is a generic print operand
783 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
784 case 'r':
785 break;
786 }
787 }
788
789 AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
790 *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
791 return false;
792}
Yaxun Liua711cc72016-07-16 05:09:21 +0000793
794// Emit a key and an integer value for runtime metadata.
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000795static void emitRuntimeMDIntValue(MCStreamer &Streamer,
Yaxun Liua711cc72016-07-16 05:09:21 +0000796 RuntimeMD::Key K, uint64_t V,
797 unsigned Size) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000798 Streamer.EmitIntValue(K, 1);
799 Streamer.EmitIntValue(V, Size);
Yaxun Liua711cc72016-07-16 05:09:21 +0000800}
801
802// Emit a key and a string value for runtime metadata.
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000803static void emitRuntimeMDStringValue(MCStreamer &Streamer,
Yaxun Liua711cc72016-07-16 05:09:21 +0000804 RuntimeMD::Key K, StringRef S) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000805 Streamer.EmitIntValue(K, 1);
806 Streamer.EmitIntValue(S.size(), 4);
807 Streamer.EmitBytes(S);
Yaxun Liua711cc72016-07-16 05:09:21 +0000808}
809
810// Emit a key and three integer values for runtime metadata.
811// The three integer values are obtained from MDNode \p Node;
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000812static void emitRuntimeMDThreeIntValues(MCStreamer &Streamer,
Yaxun Liua711cc72016-07-16 05:09:21 +0000813 RuntimeMD::Key K, MDNode *Node,
814 unsigned Size) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000815 assert(Node->getNumOperands() == 3);
816
817 Streamer.EmitIntValue(K, 1);
818 for (const MDOperand &Op : Node->operands()) {
819 const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
820 Streamer.EmitIntValue(CI->getZExtValue(), Size);
821 }
Yaxun Liua711cc72016-07-16 05:09:21 +0000822}
823
824void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
825 OutStreamer->SwitchSection(getObjFileLowering().getContext()
826 .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
827
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000828 emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyMDVersion,
Yaxun Liua711cc72016-07-16 05:09:21 +0000829 RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
830 if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000831 if (MD->getNumOperands() != 0) {
Yaxun Liu4b1d9f72016-07-20 14:38:06 +0000832 auto Node = MD->getOperand(0);
833 if (Node->getNumOperands() > 1) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000834 emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyLanguage,
Yaxun Liu4b1d9f72016-07-20 14:38:06 +0000835 RuntimeMD::OpenCL_C, 1);
836 uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
837 ->getZExtValue();
838 uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
839 ->getZExtValue();
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000840 emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyLanguageVersion,
Yaxun Liu4b1d9f72016-07-20 14:38:06 +0000841 Major * 100 + Minor * 10, 2);
842 }
843 }
Yaxun Liua711cc72016-07-16 05:09:21 +0000844 }
Yaxun Liu63891402016-09-07 17:44:00 +0000845
846 if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
847 for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
848 auto Node = MD->getOperand(I);
849 if (Node->getNumOperands() > 0)
850 emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyPrintfInfo,
851 cast<MDString>(Node->getOperand(0))->getString());
852 }
853 }
Yaxun Liua711cc72016-07-16 05:09:21 +0000854}
855
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000856static std::string getOCLTypeName(Type *Ty, bool Signed) {
Yaxun Liua711cc72016-07-16 05:09:21 +0000857 switch (Ty->getTypeID()) {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000858 case Type::HalfTyID:
859 return "half";
860 case Type::FloatTyID:
861 return "float";
862 case Type::DoubleTyID:
863 return "double";
Yaxun Liua711cc72016-07-16 05:09:21 +0000864 case Type::IntegerTyID: {
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000865 if (!Signed)
866 return (Twine('u') + getOCLTypeName(Ty, true)).str();
867 unsigned BW = Ty->getIntegerBitWidth();
Yaxun Liua711cc72016-07-16 05:09:21 +0000868 switch (BW) {
869 case 8:
870 return "char";
871 case 16:
872 return "short";
873 case 32:
874 return "int";
875 case 64:
876 return "long";
877 default:
878 return (Twine('i') + Twine(BW)).str();
879 }
880 }
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000881 case Type::VectorTyID: {
882 VectorType *VecTy = cast<VectorType>(Ty);
883 Type *EleTy = VecTy->getElementType();
884 unsigned Size = VecTy->getVectorNumElements();
885 return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
886 }
Yaxun Liua711cc72016-07-16 05:09:21 +0000887 default:
Yaxun Liu86c052232016-08-04 19:45:00 +0000888 return "unknown";
Yaxun Liua711cc72016-07-16 05:09:21 +0000889 }
890}
891
892static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000893 Type *Ty, StringRef TypeName) {
894 switch (Ty->getTypeID()) {
895 case Type::HalfTyID:
Yaxun Liua711cc72016-07-16 05:09:21 +0000896 return RuntimeMD::KernelArg::F16;
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000897 case Type::FloatTyID:
Yaxun Liua711cc72016-07-16 05:09:21 +0000898 return RuntimeMD::KernelArg::F32;
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000899 case Type::DoubleTyID:
Yaxun Liua711cc72016-07-16 05:09:21 +0000900 return RuntimeMD::KernelArg::F64;
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000901 case Type::IntegerTyID: {
Yaxun Liua711cc72016-07-16 05:09:21 +0000902 bool Signed = !TypeName.startswith("u");
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000903 switch (Ty->getIntegerBitWidth()) {
Yaxun Liua711cc72016-07-16 05:09:21 +0000904 case 8:
905 return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
906 case 16:
907 return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
908 case 32:
909 return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
910 case 64:
911 return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
912 default:
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000913 // Runtime does not recognize other integer types. Report as struct type.
Yaxun Liua711cc72016-07-16 05:09:21 +0000914 return RuntimeMD::KernelArg::Struct;
915 }
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000916 }
917 case Type::VectorTyID:
918 return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
919 case Type::PointerTyID:
920 return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
921 default:
Yaxun Liua711cc72016-07-16 05:09:21 +0000922 return RuntimeMD::KernelArg::Struct;
Matt Arsenaultb06db8f2016-07-26 21:03:36 +0000923 }
Yaxun Liua711cc72016-07-16 05:09:21 +0000924}
925
Yaxun Liu63891402016-09-07 17:44:00 +0000926static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
927 AMDGPUAS::AddressSpaces A) {
928 switch (A) {
929 case AMDGPUAS::GLOBAL_ADDRESS:
930 return RuntimeMD::KernelArg::Global;
931 case AMDGPUAS::CONSTANT_ADDRESS:
932 return RuntimeMD::KernelArg::Constant;
933 case AMDGPUAS::LOCAL_ADDRESS:
934 return RuntimeMD::KernelArg::Local;
935 case AMDGPUAS::FLAT_ADDRESS:
936 return RuntimeMD::KernelArg::Generic;
937 case AMDGPUAS::REGION_ADDRESS:
938 return RuntimeMD::KernelArg::Region;
939 default:
940 return RuntimeMD::KernelArg::Private;
941 }
942}
943
944static void emitRuntimeMetadataForKernelArg(const DataLayout &DL,
945 MCStreamer &OutStreamer, Type *T,
946 RuntimeMD::KernelArg::Kind Kind,
947 StringRef BaseTypeName = "", StringRef TypeName = "",
948 StringRef ArgName = "", StringRef TypeQual = "", StringRef AccQual = "") {
949 // Emit KeyArgBegin.
950 OutStreamer.EmitIntValue(RuntimeMD::KeyArgBegin, 1);
951
952 // Emit KeyArgSize and KeyArgAlign.
953 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
954 DL.getTypeAllocSize(T), 4);
955 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
956 DL.getABITypeAlignment(T), 4);
957 if (auto PT = dyn_cast<PointerType>(T)) {
958 auto ET = PT->getElementType();
959 if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
960 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgPointeeAlign,
961 DL.getABITypeAlignment(ET), 4);
962 }
963
964 // Emit KeyArgTypeName.
965 if (!TypeName.empty())
966 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
967
968 // Emit KeyArgName.
969 if (!ArgName.empty())
970 emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
971
972 // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
973 SmallVector<StringRef, 1> SplitQ;
974 TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
975
976 for (StringRef KeyName : SplitQ) {
977 auto Key = StringSwitch<RuntimeMD::Key>(KeyName)
978 .Case("volatile", RuntimeMD::KeyArgIsVolatile)
979 .Case("restrict", RuntimeMD::KeyArgIsRestrict)
980 .Case("const", RuntimeMD::KeyArgIsConst)
981 .Case("pipe", RuntimeMD::KeyArgIsPipe)
982 .Default(RuntimeMD::KeyNull);
983 OutStreamer.EmitIntValue(Key, 1);
984 }
985
986 // Emit KeyArgKind.
987 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgKind, Kind, 1);
988
989 // Emit KeyArgValueType.
990 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
991 getRuntimeMDValueType(T, BaseTypeName), 2);
992
993 // Emit KeyArgAccQual.
994 if (!AccQual.empty()) {
995 auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
996 .Case("read_only", RuntimeMD::KernelArg::ReadOnly)
997 .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
998 .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
999 .Default(RuntimeMD::KernelArg::None);
1000 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1);
1001 }
1002
1003 // Emit KeyArgAddrQual.
1004 if (auto *PT = dyn_cast<PointerType>(T))
1005 emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
1006 getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
1007 PT->getAddressSpace())), 1);
1008
1009 // Emit KeyArgEnd
1010 OutStreamer.EmitIntValue(RuntimeMD::KeyArgEnd, 1);
1011}
1012
Yaxun Liua711cc72016-07-16 05:09:21 +00001013void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
1014 if (!F.getMetadata("kernel_arg_type"))
1015 return;
1016
1017 MCContext &Context = getObjFileLowering().getContext();
1018 OutStreamer->SwitchSection(
1019 Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
1020 OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001021 emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyKernelName, F.getName());
Yaxun Liua711cc72016-07-16 05:09:21 +00001022
Yaxun Liu63891402016-09-07 17:44:00 +00001023 const DataLayout &DL = F.getParent()->getDataLayout();
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001024 for (auto &Arg : F.args()) {
Yaxun Liua711cc72016-07-16 05:09:21 +00001025 unsigned I = Arg.getArgNo();
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001026 Type *T = Arg.getType();
Yaxun Liua711cc72016-07-16 05:09:21 +00001027 auto TypeName = dyn_cast<MDString>(F.getMetadata(
Yaxun Liu63891402016-09-07 17:44:00 +00001028 "kernel_arg_type")->getOperand(I))->getString();
1029 auto BaseTypeName = cast<MDString>(F.getMetadata(
1030 "kernel_arg_base_type")->getOperand(I))->getString();
1031 StringRef ArgName;
1032 if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
1033 ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
Yaxun Liua711cc72016-07-16 05:09:21 +00001034 auto TypeQual = cast<MDString>(F.getMetadata(
Yaxun Liu63891402016-09-07 17:44:00 +00001035 "kernel_arg_type_qual")->getOperand(I))->getString();
1036 auto AccQual = cast<MDString>(F.getMetadata(
1037 "kernel_arg_access_qual")->getOperand(I))->getString();
1038 RuntimeMD::KernelArg::Kind Kind;
1039 if (TypeQual.find("pipe") != StringRef::npos)
1040 Kind = RuntimeMD::KernelArg::Pipe;
1041 else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName)
Yaxun Liua711cc72016-07-16 05:09:21 +00001042 .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
1043 .Case("queue_t", RuntimeMD::KernelArg::Queue)
1044 .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
1045 "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
1046 .Cases("image2d_depth_t", "image2d_array_depth_t",
1047 "image2d_msaa_t", "image2d_array_msaa_t",
1048 "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
1049 .Cases("image2d_array_msaa_depth_t", "image3d_t",
1050 RuntimeMD::KernelArg::Image)
Yaxun Liu63891402016-09-07 17:44:00 +00001051 .Default(isa<PointerType>(T) ?
1052 (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
1053 RuntimeMD::KernelArg::DynamicSharedPointer :
1054 RuntimeMD::KernelArg::GlobalBuffer) :
1055 RuntimeMD::KernelArg::ByValue);
1056 emitRuntimeMetadataForKernelArg(DL, *OutStreamer, T,
1057 Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual);
1058 }
Yaxun Liua711cc72016-07-16 05:09:21 +00001059
Yaxun Liu63891402016-09-07 17:44:00 +00001060 // Emit hidden kernel arguments for OpenCL kernels.
1061 if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
1062 auto Int64T = Type::getInt64Ty(F.getContext());
1063 emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T,
1064 RuntimeMD::KernelArg::HiddenGlobalOffsetX);
1065 emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T,
1066 RuntimeMD::KernelArg::HiddenGlobalOffsetY);
1067 emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T,
1068 RuntimeMD::KernelArg::HiddenGlobalOffsetZ);
Yaxun Liu90658ff2016-09-07 18:31:11 +00001069 if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
Yaxun Liu63891402016-09-07 17:44:00 +00001070 auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
1071 RuntimeMD::KernelArg::Global);
1072 emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int8PtrT,
1073 RuntimeMD::KernelArg::HiddenPrintfBuffer);
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001074 }
Yaxun Liua711cc72016-07-16 05:09:21 +00001075 }
1076
1077 // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001078 if (auto RWGS = F.getMetadata("reqd_work_group_size")) {
1079 emitRuntimeMDThreeIntValues(*OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
Yaxun Liua711cc72016-07-16 05:09:21 +00001080 RWGS, 4);
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001081 }
1082
1083 if (auto WGSH = F.getMetadata("work_group_size_hint")) {
1084 emitRuntimeMDThreeIntValues(*OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
Yaxun Liua711cc72016-07-16 05:09:21 +00001085 WGSH, 4);
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001086 }
1087
Yaxun Liua711cc72016-07-16 05:09:21 +00001088 if (auto VTH = F.getMetadata("vec_type_hint")) {
1089 auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
1090 VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
1091 VTH->getOperand(1))->getZExtValue());
Matt Arsenaultb06db8f2016-07-26 21:03:36 +00001092 emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyVecTypeHint, TypeName);
Yaxun Liua711cc72016-07-16 05:09:21 +00001093 }
1094
1095 // Emit KeyKernelEnd
1096 OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
1097}