blob: 1a0a7db9b1cdd3d139f6c8e5491f1baf8bad8866 [file] [log] [blame]
Tom Stellard75aadc22012-12-11 21:25:42 +00001//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13/// code. When passed an MCAsmStreamer it prints assembly and when passed
14/// an MCObjectStreamer it outputs binary code.
15//
16//===----------------------------------------------------------------------===//
17//
18
Tom Stellard75aadc22012-12-11 21:25:42 +000019#include "AMDGPUAsmPrinter.h"
20#include "AMDGPU.h"
Tom Stellard2e59a452014-06-13 01:32:00 +000021#include "AMDGPUSubtarget.h"
Tom Stellard043de4c2013-05-06 17:50:51 +000022#include "R600Defines.h"
Vincent Lejeune117f0752013-04-23 17:34:12 +000023#include "R600MachineFunctionInfo.h"
Vincent Lejeune98a73802013-04-17 15:17:25 +000024#include "R600RegisterInfo.h"
Benjamin Kramerd78bb462013-05-23 17:10:37 +000025#include "SIDefines.h"
26#include "SIMachineFunctionInfo.h"
27#include "SIRegisterInfo.h"
Tom Stellardb02094e2014-07-21 15:45:01 +000028#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard3a7beafb32013-04-15 17:51:30 +000029#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCSectionELF.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000031#include "llvm/MC/MCStreamer.h"
Tom Stellard3a7beafb32013-04-15 17:51:30 +000032#include "llvm/Support/ELF.h"
Tom Stellardc026e8b2013-06-28 15:47:08 +000033#include "llvm/Support/MathExtras.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000034#include "llvm/Support/TargetRegistry.h"
Chandler Carruthbe810232013-01-02 10:22:59 +000035#include "llvm/Target/TargetLoweringObjectFile.h"
Tom Stellard75aadc22012-12-11 21:25:42 +000036
37using namespace llvm;
38
Matt Arsenault0989d512014-06-26 17:22:30 +000039// TODO: This should get the default rounding mode from the kernel. We just set
40// the default here, but this could change if the OpenCL rounding mode pragmas
41// are used.
42//
43// The denormal mode here should match what is reported by the OpenCL runtime
44// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
45// can also be override to flush with the -cl-denorms-are-zero compiler flag.
46//
47// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
48// precision, and leaves single precision to flush all and does not report
49// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
50// CL_FP_DENORM for both.
Matt Arsenaultc6ae7b42014-07-14 23:40:43 +000051//
52// FIXME: It seems some instructions do not support single precision denormals
53// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
54// and sin_f32, cos_f32 on most parts).
55
56// We want to use these instructions, and using fp32 denormals also causes
57// instructions to run at the double precision rate for the device so it's
58// probably best to just report no single precision denormals.
Matt Arsenaultf171cf22014-07-14 23:40:49 +000059static uint32_t getFPMode(const MachineFunction &F) {
60 const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
61 // TODO: Is there any real use for the flush in only / flush out only modes?
62
63 uint32_t FP32Denormals =
64 ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
65
66 uint32_t FP64Denormals =
67 ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
68
Matt Arsenault0989d512014-06-26 17:22:30 +000069 return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
70 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
Matt Arsenaultf171cf22014-07-14 23:40:49 +000071 FP_DENORM_MODE_SP(FP32Denormals) |
72 FP_DENORM_MODE_DP(FP64Denormals);
Matt Arsenault0989d512014-06-26 17:22:30 +000073}
Tom Stellard75aadc22012-12-11 21:25:42 +000074
75static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
76 MCStreamer &Streamer) {
77 return new AMDGPUAsmPrinter(tm, Streamer);
78}
79
80extern "C" void LLVMInitializeR600AsmPrinter() {
81 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
82}
83
Tom Stellarded699252013-10-12 05:02:51 +000084AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
Matt Arsenault89cc49f2013-12-05 05:15:35 +000085 : AsmPrinter(TM, Streamer) {
Rafael Espindola277f9062014-01-31 22:14:06 +000086 DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
Tom Stellarded699252013-10-12 05:02:51 +000087}
88
Tom Stellard067c8152014-07-21 14:01:14 +000089void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
90
91 // This label is used to mark the end of the .text section.
92 const TargetLoweringObjectFile &TLOF = getObjFileLowering();
93 OutStreamer.SwitchSection(TLOF.getTextSection());
94 MCSymbol *EndOfTextLabel =
95 OutContext.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
96 OutStreamer.EmitLabel(EndOfTextLabel);
97}
98
Tom Stellard75aadc22012-12-11 21:25:42 +000099bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Tom Stellard75aadc22012-12-11 21:25:42 +0000100 SetupMachineFunction(MF);
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000101
Tom Stellard79243d92014-10-01 17:15:17 +0000102 EmitFunctionHeader();
Vincent Lejeune98a73802013-04-17 15:17:25 +0000103
Tom Stellarded699252013-10-12 05:02:51 +0000104 MCContext &Context = getObjFileLowering().getContext();
105 const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
Tom Stellard34e40682013-04-24 23:56:14 +0000106 ELF::SHT_PROGBITS, 0,
Vincent Lejeune98a73802013-04-17 15:17:25 +0000107 SectionKind::getReadOnly());
108 OutStreamer.SwitchSection(ConfigSection);
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000109
Tom Stellarded699252013-10-12 05:02:51 +0000110 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000111 SIProgramInfo KernelInfo;
Tom Stellarda6c6e1b2013-06-07 20:37:48 +0000112 if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
Matt Arsenaulte500e322014-04-15 22:40:47 +0000113 getSIProgramInfo(KernelInfo, MF);
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000114 EmitProgramInfoSI(MF, KernelInfo);
Vincent Lejeune98a73802013-04-17 15:17:25 +0000115 } else {
116 EmitProgramInfoR600(MF);
Tom Stellard75aadc22012-12-11 21:25:42 +0000117 }
Tom Stellarded699252013-10-12 05:02:51 +0000118
119 DisasmLines.clear();
120 HexLines.clear();
121 DisasmLineMaxLen = 0;
122
Tom Stellard3a7beafb32013-04-15 17:51:30 +0000123 OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
Tom Stellard75aadc22012-12-11 21:25:42 +0000124 EmitFunctionBody();
Tom Stellarded699252013-10-12 05:02:51 +0000125
Rafael Espindola887541f2014-01-31 22:08:19 +0000126 if (isVerbose()) {
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000127 const MCSectionELF *CommentSection
128 = Context.getELFSection(".AMDGPU.csdata",
129 ELF::SHT_PROGBITS, 0,
130 SectionKind::getReadOnly());
131 OutStreamer.SwitchSection(CommentSection);
132
Matt Arsenaulte500e322014-04-15 22:40:47 +0000133 if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
Rafael Espindola98f5b542014-01-27 00:19:41 +0000134 OutStreamer.emitRawComment(" Kernel info:", false);
Matt Arsenaulte500e322014-04-15 22:40:47 +0000135 OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
136 false);
Rafael Espindola98f5b542014-01-27 00:19:41 +0000137 OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
Rafael Espindolabcf890b2014-01-27 00:16:00 +0000138 false);
Rafael Espindola98f5b542014-01-27 00:19:41 +0000139 OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
Rafael Espindolabcf890b2014-01-27 00:16:00 +0000140 false);
Matt Arsenault0989d512014-06-26 17:22:30 +0000141 OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
142 false);
143 OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
144 false);
Tom Stellardb02094e2014-07-21 15:45:01 +0000145 OutStreamer.emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
146 false);
Tom Stellard08b6af92014-01-22 21:55:35 +0000147 } else {
148 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Rafael Espindola887541f2014-01-31 22:08:19 +0000149 OutStreamer.emitRawComment(
Tom Stellard08b6af92014-01-22 21:55:35 +0000150 Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
151 }
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000152 }
153
Tom Stellarded699252013-10-12 05:02:51 +0000154 if (STM.dumpCode()) {
155#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
156 MF.dump();
157#endif
158
159 if (DisasmEnabled) {
160 OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
161 ELF::SHT_NOTE, 0,
162 SectionKind::getReadOnly()));
163
164 for (size_t i = 0; i < DisasmLines.size(); ++i) {
165 std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
166 Comment += " ; " + HexLines[i] + "\n";
167
168 OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
169 OutStreamer.EmitBytes(StringRef(Comment));
170 }
171 }
172 }
173
Tom Stellard75aadc22012-12-11 21:25:42 +0000174 return false;
175}
176
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000177void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
Vincent Lejeune98a73802013-04-17 15:17:25 +0000178 unsigned MaxGPR = 0;
Vincent Lejeune4a0beb52013-04-30 00:13:13 +0000179 bool killPixel = false;
Eric Christopherd9134482014-08-04 21:25:23 +0000180 const R600RegisterInfo *RI = static_cast<const R600RegisterInfo *>(
181 TM.getSubtargetImpl()->getRegisterInfo());
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000182 const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard043de4c2013-05-06 17:50:51 +0000183 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
Vincent Lejeune98a73802013-04-17 15:17:25 +0000184
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000185 for (const MachineBasicBlock &MBB : MF) {
186 for (const MachineInstr &MI : MBB) {
Vincent Lejeune4a0beb52013-04-30 00:13:13 +0000187 if (MI.getOpcode() == AMDGPU::KILLGT)
188 killPixel = true;
Vincent Lejeune98a73802013-04-17 15:17:25 +0000189 unsigned numOperands = MI.getNumOperands();
190 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000191 const MachineOperand &MO = MI.getOperand(op_idx);
Vincent Lejeune98a73802013-04-17 15:17:25 +0000192 if (!MO.isReg())
193 continue;
194 unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
195
196 // Register with value > 127 aren't GPR
197 if (HWReg > 127)
198 continue;
199 MaxGPR = std::max(MaxGPR, HWReg);
200 }
201 }
202 }
Tom Stellard043de4c2013-05-06 17:50:51 +0000203
204 unsigned RsrcReg;
Tom Stellarda6c6e1b2013-06-07 20:37:48 +0000205 if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
Tom Stellard043de4c2013-05-06 17:50:51 +0000206 // Evergreen / Northern Islands
Matt Arsenault762af962014-07-13 03:06:39 +0000207 switch (MFI->getShaderType()) {
Tom Stellard043de4c2013-05-06 17:50:51 +0000208 default: // Fall through
209 case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
210 case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
211 case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
212 case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
213 }
214 } else {
215 // R600 / R700
Matt Arsenault762af962014-07-13 03:06:39 +0000216 switch (MFI->getShaderType()) {
Tom Stellard043de4c2013-05-06 17:50:51 +0000217 default: // Fall through
218 case ShaderType::GEOMETRY: // Fall through
219 case ShaderType::COMPUTE: // Fall through
220 case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
221 case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
222 }
223 }
224
225 OutStreamer.EmitIntValue(RsrcReg, 4);
226 OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
227 S_STACK_SIZE(MFI->StackSize), 4);
228 OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
229 OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
Tom Stellardc026e8b2013-06-28 15:47:08 +0000230
Matt Arsenault762af962014-07-13 03:06:39 +0000231 if (MFI->getShaderType() == ShaderType::COMPUTE) {
Tom Stellardc026e8b2013-06-28 15:47:08 +0000232 OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
233 OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
234 }
Vincent Lejeune98a73802013-04-17 15:17:25 +0000235}
236
Matt Arsenaulte500e322014-04-15 22:40:47 +0000237void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000238 const MachineFunction &MF) const {
Matt Arsenaulte500e322014-04-15 22:40:47 +0000239 uint64_t CodeSize = 0;
Tom Stellard75aadc22012-12-11 21:25:42 +0000240 unsigned MaxSGPR = 0;
241 unsigned MaxVGPR = 0;
242 bool VCCUsed = false;
Matt Arsenault3f981402014-09-15 15:41:53 +0000243 bool FlatUsed = false;
Eric Christopherd9134482014-08-04 21:25:23 +0000244 const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
245 TM.getSubtargetImpl()->getRegisterInfo());
Tom Stellard75aadc22012-12-11 21:25:42 +0000246
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000247 for (const MachineBasicBlock &MBB : MF) {
248 for (const MachineInstr &MI : MBB) {
Matt Arsenaulte500e322014-04-15 22:40:47 +0000249 // TODO: CodeSize should account for multiple functions.
250 CodeSize += MI.getDesc().Size;
251
Tom Stellard75aadc22012-12-11 21:25:42 +0000252 unsigned numOperands = MI.getNumOperands();
253 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000254 const MachineOperand &MO = MI.getOperand(op_idx);
Tom Stellard75aadc22012-12-11 21:25:42 +0000255 unsigned width = 0;
256 bool isSGPR = false;
Matt Arsenaulta64ee172014-01-08 21:47:14 +0000257
Tom Stellard75aadc22012-12-11 21:25:42 +0000258 if (!MO.isReg()) {
259 continue;
260 }
Matt Arsenaulta64ee172014-01-08 21:47:14 +0000261 unsigned reg = MO.getReg();
Tom Stellardfbe435d2014-03-17 17:03:51 +0000262 if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
263 reg == AMDGPU::VCC_HI) {
Tom Stellard75aadc22012-12-11 21:25:42 +0000264 VCCUsed = true;
265 continue;
Matt Arsenault3f981402014-09-15 15:41:53 +0000266 } else if (reg == AMDGPU::FLAT_SCR ||
267 reg == AMDGPU::FLAT_SCR_LO ||
268 reg == AMDGPU::FLAT_SCR_HI) {
269 FlatUsed = true;
270 continue;
Tom Stellard75aadc22012-12-11 21:25:42 +0000271 }
Matt Arsenault65864e32013-10-22 21:11:31 +0000272
Tom Stellard75aadc22012-12-11 21:25:42 +0000273 switch (reg) {
274 default: break;
Matt Arsenault65864e32013-10-22 21:11:31 +0000275 case AMDGPU::SCC:
Tom Stellard75aadc22012-12-11 21:25:42 +0000276 case AMDGPU::EXEC:
Tom Stellard75aadc22012-12-11 21:25:42 +0000277 case AMDGPU::M0:
278 continue;
279 }
280
281 if (AMDGPU::SReg_32RegClass.contains(reg)) {
282 isSGPR = true;
283 width = 1;
284 } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
285 isSGPR = false;
286 width = 1;
287 } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
288 isSGPR = true;
289 width = 2;
290 } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
291 isSGPR = false;
292 width = 2;
Christian Konig8b1ed282013-04-10 08:39:16 +0000293 } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
294 isSGPR = false;
295 width = 3;
Tom Stellard75aadc22012-12-11 21:25:42 +0000296 } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
297 isSGPR = true;
298 width = 4;
299 } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
300 isSGPR = false;
301 width = 4;
302 } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
303 isSGPR = true;
304 width = 8;
Tom Stellard538ceeb2013-02-07 17:02:09 +0000305 } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
306 isSGPR = false;
307 width = 8;
Tom Stellarda66cafa2013-10-23 00:44:12 +0000308 } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
309 isSGPR = true;
310 width = 16;
Tom Stellard538ceeb2013-02-07 17:02:09 +0000311 } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
312 isSGPR = false;
313 width = 16;
Tom Stellard75aadc22012-12-11 21:25:42 +0000314 } else {
Matt Arsenaulteaa3a7e2013-12-10 21:37:42 +0000315 llvm_unreachable("Unknown register class");
Tom Stellard75aadc22012-12-11 21:25:42 +0000316 }
Matt Arsenaulta64ee172014-01-08 21:47:14 +0000317 unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
318 unsigned maxUsed = hwReg + width - 1;
Tom Stellard75aadc22012-12-11 21:25:42 +0000319 if (isSGPR) {
320 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
321 } else {
322 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
323 }
324 }
325 }
326 }
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000327
328 if (VCCUsed)
Tom Stellard75aadc22012-12-11 21:25:42 +0000329 MaxSGPR += 2;
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000330
Matt Arsenault3f981402014-09-15 15:41:53 +0000331 if (FlatUsed)
332 MaxSGPR += 2;
333
Matt Arsenault362f3452014-09-11 22:51:37 +0000334 // We found the maximum register index. They start at 0, so add one to get the
335 // number of registers.
336 ProgInfo.NumVGPR = MaxVGPR + 1;
337 ProgInfo.NumSGPR = MaxSGPR + 1;
Matt Arsenault0989d512014-06-26 17:22:30 +0000338
339 // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
340 // register.
341 ProgInfo.FloatMode = getFPMode(MF);
342
343 // XXX: Not quite sure what this does, but sc seems to unset this.
344 ProgInfo.IEEEMode = 0;
345
346 // Do not clamp NAN to 0.
347 ProgInfo.DX10Clamp = 0;
348
Tom Stellardb02094e2014-07-21 15:45:01 +0000349 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
350 ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
351
Matt Arsenault3f981402014-09-15 15:41:53 +0000352 ProgInfo.FlatUsed = FlatUsed;
353 ProgInfo.VCCUsed = VCCUsed;
Matt Arsenault0989d512014-06-26 17:22:30 +0000354 ProgInfo.CodeLen = CodeSize;
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000355}
356
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000357void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
Matt Arsenault89cc49f2013-12-05 05:15:35 +0000358 const SIProgramInfo &KernelInfo) {
359 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
Matt Arsenaultd32dbb62014-07-13 03:06:43 +0000360 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Matt Arsenault0989d512014-06-26 17:22:30 +0000361
Tom Stellardcb97e3a2013-04-15 17:51:35 +0000362 unsigned RsrcReg;
Matt Arsenault762af962014-07-13 03:06:39 +0000363 switch (MFI->getShaderType()) {
Tom Stellardcb97e3a2013-04-15 17:51:35 +0000364 default: // Fall through
365 case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
366 case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
367 case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
368 case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
369 }
370
Tom Stellard6e1ee472013-10-29 16:37:28 +0000371 unsigned LDSAlignShift;
372 if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
Matt Arsenault0989d512014-06-26 17:22:30 +0000373 // LDS is allocated in 64 dword blocks.
Tom Stellard6e1ee472013-10-29 16:37:28 +0000374 LDSAlignShift = 8;
375 } else {
Matt Arsenault0989d512014-06-26 17:22:30 +0000376 // LDS is allocated in 128 dword blocks.
Tom Stellard6e1ee472013-10-29 16:37:28 +0000377 LDSAlignShift = 9;
378 }
Matt Arsenault0989d512014-06-26 17:22:30 +0000379
Tom Stellard96468902014-09-24 01:33:17 +0000380 unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
381 MFI->getMaximumWorkGroupSize(MF);
382
Tom Stellard6e1ee472013-10-29 16:37:28 +0000383 unsigned LDSBlocks =
Tom Stellard96468902014-09-24 01:33:17 +0000384 RoundUpToAlignment(MFI->LDSSize + LDSSpillSize,
385 1 << LDSAlignShift) >> LDSAlignShift;
Tom Stellard6e1ee472013-10-29 16:37:28 +0000386
Tom Stellardb02094e2014-07-21 15:45:01 +0000387 // Scratch is allocated in 256 dword blocks.
388 unsigned ScratchAlignShift = 10;
389 // We need to program the hardware with the amount of scratch memory that
390 // is used by the entire wave. KernelInfo.ScratchSize is the amount of
391 // scratch memory used per thread.
392 unsigned ScratchBlocks =
393 RoundUpToAlignment(KernelInfo.ScratchSize * STM.getWavefrontSize(),
394 1 << ScratchAlignShift) >> ScratchAlignShift;
395
Tom Stellardff795902014-09-19 20:42:37 +0000396 unsigned VGPRBlocks = (KernelInfo.NumVGPR - 1) / 4;
397 unsigned SGPRBlocks = (KernelInfo.NumSGPR - 1) / 8;
398
Matt Arsenault762af962014-07-13 03:06:39 +0000399 if (MFI->getShaderType() == ShaderType::COMPUTE) {
Matt Arsenault0989d512014-06-26 17:22:30 +0000400 OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
401
402 const uint32_t ComputePGMRSrc1 =
Tom Stellardff795902014-09-19 20:42:37 +0000403 S_00B848_VGPRS(VGPRBlocks) |
404 S_00B848_SGPRS(SGPRBlocks) |
Matt Arsenault0989d512014-06-26 17:22:30 +0000405 S_00B848_PRIORITY(KernelInfo.Priority) |
406 S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
407 S_00B848_PRIV(KernelInfo.Priv) |
408 S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
409 S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
410 S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
411
412 OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
413
Michel Danzer49812b52013-07-10 16:37:07 +0000414 OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
Tom Stellardb02094e2014-07-21 15:45:01 +0000415 const uint32_t ComputePGMRSrc2 =
416 S_00B84C_LDS_SIZE(LDSBlocks) |
417 S_00B02C_SCRATCH_EN(ScratchBlocks > 0);
418
419 OutStreamer.EmitIntValue(ComputePGMRSrc2, 4);
420
421 OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
422 OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
Matt Arsenault3f981402014-09-15 15:41:53 +0000423
424 // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
425 // 0" comment but I don't see a corresponding field in the register spec.
Matt Arsenault0989d512014-06-26 17:22:30 +0000426 } else {
427 OutStreamer.EmitIntValue(RsrcReg, 4);
Tom Stellardff795902014-09-19 20:42:37 +0000428 OutStreamer.EmitIntValue(S_00B028_VGPRS(VGPRBlocks) |
429 S_00B028_SGPRS(SGPRBlocks), 4);
Michel Danzer49812b52013-07-10 16:37:07 +0000430 }
Matt Arsenault0989d512014-06-26 17:22:30 +0000431
Matt Arsenault762af962014-07-13 03:06:39 +0000432 if (MFI->getShaderType() == ShaderType::PIXEL) {
Michel Danzer49812b52013-07-10 16:37:07 +0000433 OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
Tom Stellard6e1ee472013-10-29 16:37:28 +0000434 OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
Tom Stellardcb97e3a2013-04-15 17:51:35 +0000435 OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
436 OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
437 }
Tom Stellard75aadc22012-12-11 21:25:42 +0000438}