[AMDGPU] gfx1010 s_code_end generation
Also add some missing metadata in the streamer.
Differential Revision: https://reviews.llvm.org/D61531
llvm-svn: 359937
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 95302ff..76f7034 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -295,6 +295,12 @@
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
CallGraphResourceInfo.clear();
+
+ if (AMDGPU::isGFX10(*getGlobalSTI())) {
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ getTargetStreamer()->EmitCodeEnd();
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -928,6 +934,11 @@
1ULL << ScratchAlignShift) >>
ScratchAlignShift;
+ if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
+ ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
+ ProgInfo.MemOrdered = 1;
+ }
+
ProgInfo.ComputePGMRSrc1 =
S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
@@ -936,7 +947,9 @@
S_00B848_PRIV(ProgInfo.Priv) |
S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
- S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+ S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
+ S_00B848_WGP_MODE(ProgInfo.WgpMode) |
+ S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
// 0 = X, 1 = XY, 2 = XYZ
unsigned TIDIGCompCnt = 0;
@@ -1077,7 +1090,7 @@
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
(CurrentProgramInfo.ComputePGMRSrc2 << 32);
- Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
if (CurrentProgramInfo.DynamicCallStack)
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index b40bda9..bab9f4d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -235,6 +235,13 @@
return true;
}
+bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
+ const uint32_t Encoded_s_code_end = 0xbf9f0000;
+ OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
+ OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
+ return true;
+}
+
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
@@ -552,6 +559,18 @@
return true;
}
+bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
+ const uint32_t Encoded_s_code_end = 0xbf9f0000;
+
+ MCStreamer &OS = getStreamer();
+ OS.PushSection();
+ OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
+ for (unsigned I = 0; I < 32; ++I)
+ OS.EmitIntValue(Encoded_s_code_end, 4);
+ OS.PopSection();
+ return true;
+}
+
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index c1436b3..9c52199 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -74,6 +74,9 @@
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
+ /// \returns True on success, false on failure.
+ virtual bool EmitCodeEnd() = 0;
+
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -113,6 +116,9 @@
/// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
+ /// \returns True on success, false on failure.
+ bool EmitCodeEnd() override;
+
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -155,6 +161,9 @@
/// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
+ /// \returns True on success, false on failure.
+ bool EmitCodeEnd() override;
+
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 0b47591..168f05f 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -28,6 +28,8 @@
uint32_t DX10Clamp = 0;
uint32_t DebugMode = 0;
uint32_t IEEEMode = 0;
+ uint32_t WgpMode = 0; // GFX10+
+ uint32_t MemOrdered = 0; // GFX10+
uint64_t ScratchSize = 0;
uint64_t ComputePGMRSrc1 = 0;