blob: 0b7477db2f3aa5fb2138886aeece4fb960ced423 [file] [log] [blame]
Tom Stellard45bb48e2015-06-13 03:28:10 +00001//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13/// code. When passed an MCAsmStreamer it prints assembly and when passed
14/// an MCObjectStreamer it outputs binary code.
15//
16//===----------------------------------------------------------------------===//
17//
18
19#include "AMDGPUAsmPrinter.h"
Tom Stellard347ac792015-06-26 21:15:07 +000020#include "MCTargetDesc/AMDGPUTargetStreamer.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000021#include "InstPrinter/AMDGPUInstPrinter.h"
Tom Stellard347ac792015-06-26 21:15:07 +000022#include "Utils/AMDGPUBaseInfo.h"
Tom Stellard45bb48e2015-06-13 03:28:10 +000023#include "AMDGPU.h"
24#include "AMDKernelCodeT.h"
25#include "AMDGPUSubtarget.h"
26#include "R600Defines.h"
27#include "R600MachineFunctionInfo.h"
28#include "R600RegisterInfo.h"
29#include "SIDefines.h"
30#include "SIMachineFunctionInfo.h"
31#include "SIRegisterInfo.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/MC/MCContext.h"
34#include "llvm/MC/MCSectionELF.h"
35#include "llvm/MC/MCStreamer.h"
36#include "llvm/Support/ELF.h"
37#include "llvm/Support/MathExtras.h"
38#include "llvm/Support/TargetRegistry.h"
39#include "llvm/Target/TargetLoweringObjectFile.h"
40
41using namespace llvm;
42
43// TODO: This should get the default rounding mode from the kernel. We just set
44// the default here, but this could change if the OpenCL rounding mode pragmas
45// are used.
46//
47// The denormal mode here should match what is reported by the OpenCL runtime
48// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
49// can also be override to flush with the -cl-denorms-are-zero compiler flag.
50//
51// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
52// precision, and leaves single precision to flush all and does not report
53// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
54// CL_FP_DENORM for both.
55//
56// FIXME: It seems some instructions do not support single precision denormals
57// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
58// and sin_f32, cos_f32 on most parts).
59
60// We want to use these instructions, and using fp32 denormals also causes
61// instructions to run at the double precision rate for the device so it's
62// probably best to just report no single precision denormals.
63static uint32_t getFPMode(const MachineFunction &F) {
64 const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
65 // TODO: Is there any real use for the flush in only / flush out only modes?
66
67 uint32_t FP32Denormals =
68 ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
69
70 uint32_t FP64Denormals =
71 ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
72
73 return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
74 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
75 FP_DENORM_MODE_SP(FP32Denormals) |
76 FP_DENORM_MODE_DP(FP64Denormals);
77}
78
79static AsmPrinter *
80createAMDGPUAsmPrinterPass(TargetMachine &tm,
81 std::unique_ptr<MCStreamer> &&Streamer) {
82 return new AMDGPUAsmPrinter(tm, std::move(Streamer));
83}
84
85extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
86 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
87 TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
88}
89
90AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
91 std::unique_ptr<MCStreamer> Streamer)
92 : AsmPrinter(TM, std::move(Streamer)) {}
93
Tom Stellardf151a452015-06-26 21:14:58 +000094void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
95 const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
96 SIProgramInfo KernelInfo;
97 if (STM.isAmdHsaOS()) {
98 getSIProgramInfo(KernelInfo, *MF);
99 EmitAmdKernelCodeT(*MF, KernelInfo);
100 }
101}
102
Tom Stellard1e1b05d2015-11-06 11:45:14 +0000103void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
104 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
105 const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
106 if (MFI->isKernel() && STM.isAmdHsaOS()) {
107 AMDGPUTargetStreamer *TS =
108 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
109 TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(),
110 ELF::STT_AMDGPU_HSA_KERNEL);
111 }
112
113 AsmPrinter::EmitFunctionEntryLabel();
114}
115
Tom Stellard00f2f912015-12-02 19:47:57 +0000116static bool isModuleLinkage(const GlobalValue *GV) {
117 switch (GV->getLinkage()) {
118 case GlobalValue::InternalLinkage:
119 case GlobalValue::CommonLinkage:
120 return true;
121 case GlobalValue::ExternalLinkage:
122 return false;
123 default: llvm_unreachable("unknown linkage type");
124 }
125}
126
Tom Stellarde3b5aea2015-12-02 17:00:42 +0000127void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
128
Tom Stellard29dd05e2015-12-15 22:39:36 +0000129 if (TM.getTargetTriple().getOS() != Triple::AMDHSA) {
130 AsmPrinter::EmitGlobalVariable(GV);
131 return;
132 }
133
134 if (GV->isDeclaration() || GV->getLinkage() == GlobalValue::PrivateLinkage) {
Tom Stellard00f2f912015-12-02 19:47:57 +0000135 AsmPrinter::EmitGlobalVariable(GV);
136 return;
137 }
138
139 // Group segment variables aren't emitted in HSA.
140 if (AMDGPU::isGroupSegment(GV))
141 return;
142
143 AMDGPUTargetStreamer *TS =
144 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
145 if (isModuleLinkage(GV)) {
146 TS->EmitAMDGPUHsaModuleScopeGlobal(GV->getName());
147 } else {
148 TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
149 }
150
151 const DataLayout &DL = getDataLayout();
152 OutStreamer->PushSection();
153 OutStreamer->SwitchSection(
154 getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
155 MCSymbol *GVSym = getSymbol(GV);
156 const Constant *C = GV->getInitializer();
157 OutStreamer->EmitLabel(GVSym);
158 EmitGlobalConstant(DL, C);
159 OutStreamer->PopSection();
Tom Stellarde3b5aea2015-12-02 17:00:42 +0000160}
161
Tom Stellard45bb48e2015-06-13 03:28:10 +0000162bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
163
164 // The starting address of all shader programs must be 256 bytes aligned.
165 MF.setAlignment(8);
166
167 SetupMachineFunction(MF);
168
169 MCContext &Context = getObjFileLowering().getContext();
170 MCSectionELF *ConfigSection =
171 Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
172 OutStreamer->SwitchSection(ConfigSection);
173
174 const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
175 SIProgramInfo KernelInfo;
Tom Stellardf151a452015-06-26 21:14:58 +0000176 if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
Matt Arsenault297ae312015-08-15 00:12:39 +0000177 getSIProgramInfo(KernelInfo, MF);
Tom Stellardf151a452015-06-26 21:14:58 +0000178 if (!STM.isAmdHsaOS()) {
Tom Stellardf151a452015-06-26 21:14:58 +0000179 EmitProgramInfoSI(MF, KernelInfo);
180 }
Tom Stellard347ac792015-06-26 21:15:07 +0000181 // Emit directives
182 AMDGPUTargetStreamer *TS =
183 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
184 TS->EmitDirectiveHSACodeObjectVersion(1, 0);
185 AMDGPU::IsaVersion ISA = STM.getIsaVersion();
186 TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
187 "AMD", "AMDGPU");
Tom Stellard45bb48e2015-06-13 03:28:10 +0000188 } else {
189 EmitProgramInfoR600(MF);
190 }
191
192 DisasmLines.clear();
193 HexLines.clear();
194 DisasmLineMaxLen = 0;
195
196 EmitFunctionBody();
197
198 if (isVerbose()) {
199 MCSectionELF *CommentSection =
200 Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
201 OutStreamer->SwitchSection(CommentSection);
202
203 if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
204 OutStreamer->emitRawComment(" Kernel info:", false);
205 OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
206 false);
207 OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
208 false);
209 OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
210 false);
211 OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
212 false);
213 OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
214 false);
215 OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
216 false);
Matt Arsenaultd41c0db2015-11-05 05:27:07 +0000217
Matt Arsenaultd41c0db2015-11-05 05:27:07 +0000218 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Matt Arsenault8246d4a2015-11-11 00:27:46 +0000219 Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
Matt Arsenaultd41c0db2015-11-05 05:27:07 +0000220 false);
Matt Arsenault8246d4a2015-11-11 00:27:46 +0000221 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
222 Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
223 false);
224 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
225 Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
226 false);
227 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
228 Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
229 false);
230 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
231 Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
232 false);
233
Tom Stellard45bb48e2015-06-13 03:28:10 +0000234 } else {
235 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
236 OutStreamer->emitRawComment(
237 Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
238 }
239 }
240
241 if (STM.dumpCode()) {
242
243 OutStreamer->SwitchSection(
244 Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
245
246 for (size_t i = 0; i < DisasmLines.size(); ++i) {
247 std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
248 Comment += " ; " + HexLines[i] + "\n";
249
250 OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
251 OutStreamer->EmitBytes(StringRef(Comment));
252 }
253 }
254
255 return false;
256}
257
258void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
259 unsigned MaxGPR = 0;
260 bool killPixel = false;
261 const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
262 const R600RegisterInfo *RI =
263 static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
264 const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
265
266 for (const MachineBasicBlock &MBB : MF) {
267 for (const MachineInstr &MI : MBB) {
268 if (MI.getOpcode() == AMDGPU::KILLGT)
269 killPixel = true;
270 unsigned numOperands = MI.getNumOperands();
271 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
272 const MachineOperand &MO = MI.getOperand(op_idx);
273 if (!MO.isReg())
274 continue;
275 unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
276
277 // Register with value > 127 aren't GPR
278 if (HWReg > 127)
279 continue;
280 MaxGPR = std::max(MaxGPR, HWReg);
281 }
282 }
283 }
284
285 unsigned RsrcReg;
286 if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
287 // Evergreen / Northern Islands
288 switch (MFI->getShaderType()) {
289 default: // Fall through
290 case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
291 case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
292 case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
293 case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
294 }
295 } else {
296 // R600 / R700
297 switch (MFI->getShaderType()) {
298 default: // Fall through
299 case ShaderType::GEOMETRY: // Fall through
300 case ShaderType::COMPUTE: // Fall through
301 case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
302 case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
303 }
304 }
305
306 OutStreamer->EmitIntValue(RsrcReg, 4);
307 OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
308 S_STACK_SIZE(MFI->StackSize), 4);
309 OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
310 OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
311
312 if (MFI->getShaderType() == ShaderType::COMPUTE) {
313 OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
314 OutStreamer->EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
315 }
316}
317
318void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
319 const MachineFunction &MF) const {
320 const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
321 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
322 uint64_t CodeSize = 0;
323 unsigned MaxSGPR = 0;
324 unsigned MaxVGPR = 0;
325 bool VCCUsed = false;
326 bool FlatUsed = false;
327 const SIRegisterInfo *RI =
328 static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
329
330 for (const MachineBasicBlock &MBB : MF) {
331 for (const MachineInstr &MI : MBB) {
332 // TODO: CodeSize should account for multiple functions.
Matt Arsenaultc5746862015-08-12 09:04:44 +0000333
334 // TODO: Should we count size of debug info?
335 if (MI.isDebugValue())
336 continue;
337
338 // FIXME: This is reporting 0 for many instructions.
Tom Stellard45bb48e2015-06-13 03:28:10 +0000339 CodeSize += MI.getDesc().Size;
340
341 unsigned numOperands = MI.getNumOperands();
342 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
343 const MachineOperand &MO = MI.getOperand(op_idx);
344 unsigned width = 0;
345 bool isSGPR = false;
346
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000347 if (!MO.isReg())
Tom Stellard45bb48e2015-06-13 03:28:10 +0000348 continue;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000349
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000350 unsigned reg = MO.getReg();
Tom Stellard45bb48e2015-06-13 03:28:10 +0000351 switch (reg) {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000352 case AMDGPU::EXEC:
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000353 case AMDGPU::SCC:
Tom Stellard45bb48e2015-06-13 03:28:10 +0000354 case AMDGPU::M0:
355 continue;
Matt Arsenaultd2c75892015-10-01 21:51:59 +0000356
357 case AMDGPU::VCC:
358 case AMDGPU::VCC_LO:
359 case AMDGPU::VCC_HI:
360 VCCUsed = true;
361 continue;
362
363 case AMDGPU::FLAT_SCR:
364 case AMDGPU::FLAT_SCR_LO:
365 case AMDGPU::FLAT_SCR_HI:
366 FlatUsed = true;
367 continue;
368
369 default:
370 break;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000371 }
372
373 if (AMDGPU::SReg_32RegClass.contains(reg)) {
374 isSGPR = true;
375 width = 1;
376 } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
377 isSGPR = false;
378 width = 1;
379 } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
380 isSGPR = true;
381 width = 2;
382 } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
383 isSGPR = false;
384 width = 2;
385 } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
386 isSGPR = false;
387 width = 3;
388 } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
389 isSGPR = true;
390 width = 4;
391 } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
392 isSGPR = false;
393 width = 4;
394 } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
395 isSGPR = true;
396 width = 8;
397 } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
398 isSGPR = false;
399 width = 8;
400 } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
401 isSGPR = true;
402 width = 16;
403 } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
404 isSGPR = false;
405 width = 16;
406 } else {
407 llvm_unreachable("Unknown register class");
408 }
409 unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
410 unsigned maxUsed = hwReg + width - 1;
411 if (isSGPR) {
412 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
413 } else {
414 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
415 }
416 }
417 }
418 }
419
420 if (VCCUsed)
421 MaxSGPR += 2;
422
423 if (FlatUsed)
424 MaxSGPR += 2;
425
426 // We found the maximum register index. They start at 0, so add one to get the
427 // number of registers.
428 ProgInfo.NumVGPR = MaxVGPR + 1;
429 ProgInfo.NumSGPR = MaxSGPR + 1;
430
431 if (STM.hasSGPRInitBug()) {
Matt Arsenault417c93e2015-06-17 20:55:25 +0000432 if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
433 LLVMContext &Ctx = MF.getFunction()->getContext();
434 Ctx.emitError("too many SGPRs used with the SGPR init bug");
435 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000436
437 ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
438 }
439
Matt Arsenault41003af2015-11-30 21:16:07 +0000440 if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
441 LLVMContext &Ctx = MF.getFunction()->getContext();
442 Ctx.emitError("too many user SGPRs used");
443 }
444
Tom Stellard45bb48e2015-06-13 03:28:10 +0000445 ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
446 ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
447 // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
448 // register.
449 ProgInfo.FloatMode = getFPMode(MF);
450
451 // XXX: Not quite sure what this does, but sc seems to unset this.
452 ProgInfo.IEEEMode = 0;
453
454 // Do not clamp NAN to 0.
455 ProgInfo.DX10Clamp = 0;
456
457 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
458 ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
459
460 ProgInfo.FlatUsed = FlatUsed;
461 ProgInfo.VCCUsed = VCCUsed;
462 ProgInfo.CodeLen = CodeSize;
463
464 unsigned LDSAlignShift;
465 if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
466 // LDS is allocated in 64 dword blocks.
467 LDSAlignShift = 8;
468 } else {
469 // LDS is allocated in 128 dword blocks.
470 LDSAlignShift = 9;
471 }
472
473 unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
474 MFI->getMaximumWorkGroupSize(MF);
475
476 ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
477 ProgInfo.LDSBlocks =
478 RoundUpToAlignment(ProgInfo.LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
479
480 // Scratch is allocated in 256 dword blocks.
481 unsigned ScratchAlignShift = 10;
482 // We need to program the hardware with the amount of scratch memory that
483 // is used by the entire wave. ProgInfo.ScratchSize is the amount of
484 // scratch memory used per thread.
485 ProgInfo.ScratchBlocks =
486 RoundUpToAlignment(ProgInfo.ScratchSize * STM.getWavefrontSize(),
487 1 << ScratchAlignShift) >> ScratchAlignShift;
488
489 ProgInfo.ComputePGMRSrc1 =
490 S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
491 S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
492 S_00B848_PRIORITY(ProgInfo.Priority) |
493 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
494 S_00B848_PRIV(ProgInfo.Priv) |
495 S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000496 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
Tom Stellard45bb48e2015-06-13 03:28:10 +0000497 S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
498
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000499 // 0 = X, 1 = XY, 2 = XYZ
500 unsigned TIDIGCompCnt = 0;
501 if (MFI->hasWorkItemIDZ())
502 TIDIGCompCnt = 2;
503 else if (MFI->hasWorkItemIDY())
504 TIDIGCompCnt = 1;
505
Tom Stellard45bb48e2015-06-13 03:28:10 +0000506 ProgInfo.ComputePGMRSrc2 =
507 S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000508 S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
509 S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
510 S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
511 S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
512 S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
513 S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
514 S_00B84C_EXCP_EN_MSB(0) |
515 S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
516 S_00B84C_EXCP_EN(0);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000517}
518
519static unsigned getRsrcReg(unsigned ShaderType) {
520 switch (ShaderType) {
521 default: // Fall through
522 case ShaderType::COMPUTE: return R_00B848_COMPUTE_PGM_RSRC1;
523 case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
524 case ShaderType::PIXEL: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
525 case ShaderType::VERTEX: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
526 }
527}
528
529void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
530 const SIProgramInfo &KernelInfo) {
531 const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
532 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
533 unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
534
535 if (MFI->getShaderType() == ShaderType::COMPUTE) {
536 OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
537
538 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
539
540 OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
541 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4);
542
543 OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
544 OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4);
545
546 // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
547 // 0" comment but I don't see a corresponding field in the register spec.
548 } else {
549 OutStreamer->EmitIntValue(RsrcReg, 4);
550 OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
551 S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
552 if (STM.isVGPRSpillingEnabled(MFI)) {
553 OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
554 OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
555 }
556 }
557
558 if (MFI->getShaderType() == ShaderType::PIXEL) {
559 OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
560 OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
561 OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
562 OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
563 }
564}
565
566void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
Tom Stellardff7416b2015-06-26 21:58:31 +0000567 const SIProgramInfo &KernelInfo) const {
Tom Stellard45bb48e2015-06-13 03:28:10 +0000568 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
569 const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
570 amd_kernel_code_t header;
571
Tom Stellardff7416b2015-06-26 21:58:31 +0000572 AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
Tom Stellard45bb48e2015-06-13 03:28:10 +0000573
574 header.compute_pgm_resource_registers =
575 KernelInfo.ComputePGMRSrc1 |
576 (KernelInfo.ComputePGMRSrc2 << 32);
Matt Arsenault26f8f3d2015-11-30 21:16:03 +0000577 header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
578
579 if (MFI->hasPrivateSegmentBuffer()) {
580 header.code_properties |=
581 AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
582 }
583
584 if (MFI->hasDispatchPtr())
585 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
586
587 if (MFI->hasQueuePtr())
588 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
589
590 if (MFI->hasKernargSegmentPtr())
591 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
592
593 if (MFI->hasDispatchID())
594 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
595
596 if (MFI->hasFlatScratchInit())
597 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
598
599 // TODO: Private segment size
600
601 if (MFI->hasGridWorkgroupCountX()) {
602 header.code_properties |=
603 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
604 }
605
606 if (MFI->hasGridWorkgroupCountY()) {
607 header.code_properties |=
608 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
609 }
610
611 if (MFI->hasGridWorkgroupCountZ()) {
612 header.code_properties |=
613 AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
614 }
Tom Stellard45bb48e2015-06-13 03:28:10 +0000615
Tom Stellard48f29f22015-11-26 00:43:29 +0000616 if (MFI->hasDispatchPtr())
617 header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
618
Tom Stellard45bb48e2015-06-13 03:28:10 +0000619 header.kernarg_segment_byte_size = MFI->ABIArgOffset;
Tom Stellard45bb48e2015-06-13 03:28:10 +0000620 header.wavefront_sgpr_count = KernelInfo.NumSGPR;
621 header.workitem_vgpr_count = KernelInfo.NumVGPR;
622
Tom Stellardff7416b2015-06-26 21:58:31 +0000623 AMDGPUTargetStreamer *TS =
624 static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
625 TS->EmitAMDKernelCodeT(header);
Tom Stellard45bb48e2015-06-13 03:28:10 +0000626}
627
628bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
629 unsigned AsmVariant,
630 const char *ExtraCode, raw_ostream &O) {
631 if (ExtraCode && ExtraCode[0]) {
632 if (ExtraCode[1] != 0)
633 return true; // Unknown modifier.
634
635 switch (ExtraCode[0]) {
636 default:
637 // See if this is a generic print operand
638 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
639 case 'r':
640 break;
641 }
642 }
643
644 AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
645 *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
646 return false;
647}