[AMDGPU] gfx1010 wave32 metadata
Differential Revision: https://reviews.llvm.org/D63207
llvm-svn: 363577
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 3b1faac..03c6a67 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -361,6 +361,10 @@
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
+ if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
return KernelCodeProperties;
}
@@ -1081,6 +1085,10 @@
MD->setSpiPsInputEna(MFI->getPSInputEnable());
MD->setSpiPsInputAddr(MFI->getPSInputAddr());
}
+
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ if (STM.isWave32())
+ MD->setWave32(MF.getFunction().getCallingConv());
}
// This is supposed to be log2(Size)
diff --git a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
index 9a2cb95..3e658a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/llvm/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -126,8 +126,12 @@
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
- AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
- AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+
+ AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11,
+ AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5,
AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
/// Control wave ID base counter for GDS ordered-append. Used to set
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 54dfb11..02a1569 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3433,6 +3433,14 @@
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
UserSGPRCount += 1;
+ } else if (ID == ".amdhsa_wavefront_size32") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ EnableWavefrontSize32 = Val;
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ Val, ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -3680,6 +3688,30 @@
}
Lex();
+ if (ID == "enable_wavefront_size32") {
+ if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (!isGFX10())
+ return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
+ } else {
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
+ }
+ }
+
+ if (ID == "wavefront_size") {
+ if (Header.wavefront_size == 5) {
+ if (!isGFX10())
+ return TokError("wavefront_size=5 is only allowed on GFX10+");
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ return TokError("wavefront_size=5 requires +WavefrontSize32");
+ } else if (Header.wavefront_size == 6) {
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ return TokError("wavefront_size=6 requires +WavefrontSize64");
+ }
+ }
+
if (ID == "enable_wgp_mode") {
if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9789d73..b675004 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -284,6 +284,10 @@
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ if (IVersion.Major >= 10)
+ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
PRINT_FIELD(
OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
compute_pgm_rsrc2,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2db372f..7ba325b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -457,6 +457,10 @@
Header.private_segment_alignment = 4;
if (Version.Major >= 10) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
+ Header.wavefront_size = 5;
+ Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
Header.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
@@ -480,6 +484,9 @@
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
if (Version.Major >= 10) {
+ AMDHSA_BITS_SET(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 0fd3523..db20d5c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -233,6 +233,29 @@
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+// Set the hardware register bit in PAL metadata to enable wave32 on the
+// shader of the given calling convention.
+void AMDGPUPALMetadata::setWave32(unsigned CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_HS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_GS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_VS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_PS:
+ setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_CS:
+ setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR,
+ S_00B800_CS_W32_EN(1));
+ break;
+ }
+}
+
// Convert a register number to name, for display by toString().
// Returns nullptr if none.
static const char *getRegisterName(unsigned RegNum) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index dbef2cc..0f17c15 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -80,6 +80,10 @@
// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);
+ // Set the hardware register bit in PAL metadata to enable wave32 on the
+ // shader of the given calling convention.
+ void setWave32(unsigned CC);
+
// Emit the accumulated PAL metadata as asm directives.
// This is called from AMDGPUTargetAsmStreamer::Finish().
void toString(std::string &S);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 2d7857e..95ad3f3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -109,6 +109,7 @@
CODEPROP(enable_sgpr_grid_workgroup_count_x, ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
CODEPROP(enable_sgpr_grid_workgroup_count_y, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
CODEPROP(enable_sgpr_grid_workgroup_count_z, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_wavefront_size32, ENABLE_WAVEFRONT_SIZE32),
CODEPROP(enable_ordered_append_gds, ENABLE_ORDERED_APPEND_GDS),
CODEPROP(private_element_size, PRIVATE_ELEMENT_SIZE),
CODEPROP(is_ptr64, IS_PTR64),