[AMDGPU] Update assembler for HSA Code Object v3

Update AMDGPU assembler syntax behind the code-object-v3 feature:

* Replace/rename most AMDGPU assembler directives/symbols and document them.
* Provide more diagnostics (e.g. values out of range, missing values, repeated
  values).
* Provide path for backwards compatibility, even with underlying descriptor
  changes.

Differential Revision: https://reviews.llvm.org/D47736

llvm-svn: 335281
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 6ae561d..31e2885 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -42,6 +42,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@
 
 using namespace llvm;
 using namespace llvm::AMDGPU;
+using namespace llvm::amdhsa;
 
 namespace {
 
@@ -845,6 +847,27 @@
 
 private:
   bool ParseAsAbsoluteExpression(uint32_t &Ret);
+  bool OutOfRangeError(SMRange Range);
+  /// Calculate VGPR/SGPR blocks required for given target, reserved
+  /// registers, and user-specified NextFreeXGPR values.
+  ///
+  /// \param Features [in] Target features, used for bug corrections.
+  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
+  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
+  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
+  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
+  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
+  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
+  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
+  /// \param VGPRBlocks [out] Result VGPR block count.
+  /// \param SGPRBlocks [out] Result SGPR block count.
+  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
+                          bool FlatScrUsed, bool XNACKUsed,
+                          unsigned NextFreeVGPR, SMRange VGPRRange,
+                          unsigned NextFreeSGPR, SMRange SGPRRange,
+                          unsigned &VGPRBlocks, unsigned &SGPRBlocks);
+  bool ParseDirectiveAMDGCNTarget();
+  bool ParseDirectiveAMDHSAKernel();
   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
   bool ParseDirectiveHSACodeObjectVersion();
   bool ParseDirectiveHSACodeObjectISA();
@@ -863,6 +886,10 @@
   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
                            unsigned& RegNum, unsigned& RegWidth,
                            unsigned *DwordRegIndex);
+  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
+  void initializeGprCountSymbol(RegisterKind RegKind);
+  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
+                             unsigned RegWidth);
   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
@@ -896,15 +923,25 @@
       AMDGPU::IsaInfo::IsaVersion ISA =
           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
       MCContext &Ctx = getContext();
-      MCSymbol *Sym =
-          Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
-      Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
-      Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
-      Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
-      Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
-      Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
+      if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+        MCSymbol *Sym =
+            Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
+        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+      } else {
+        MCSymbol *Sym =
+            Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
+        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
+        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
+        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
+      }
+      if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+        initializeGprCountSymbol(IS_VGPR);
+        initializeGprCountSymbol(IS_SGPR);
+      } else
+        KernelScope.initialize(getContext());
     }
-    KernelScope.initialize(getContext());
   }
 
   bool hasXNACK() const {
@@ -1769,6 +1806,54 @@
   return true;
 }
 
+Optional<StringRef>
+AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
+  switch (RegKind) {
+  case IS_VGPR:
+    return StringRef(".amdgcn.next_free_vgpr");
+  case IS_SGPR:
+    return StringRef(".amdgcn.next_free_sgpr");
+  default:
+    return None;
+  }
+}
+
+void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
+  auto SymbolName = getGprCountSymbolName(RegKind);
+  assert(SymbolName && "initializing invalid register kind");
+  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
+  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
+}
+
+bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
+                                            unsigned DwordRegIndex,
+                                            unsigned RegWidth) {
+  // Symbols are only defined for GCN targets
+  if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+    return true;
+
+  auto SymbolName = getGprCountSymbolName(RegKind);
+  if (!SymbolName)
+    return true;
+  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
+
+  int64_t NewMax = DwordRegIndex + RegWidth - 1;
+  int64_t OldCount;
+
+  if (!Sym->isVariable())
+    return !Error(getParser().getTok().getLoc(),
+                  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
+  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
+    return !Error(
+        getParser().getTok().getLoc(),
+        ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
+
+  if (OldCount <= NewMax)
+    Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
+
+  return true;
+}
+
 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
   const auto &Tok = Parser.getTok();
   SMLoc StartLoc = Tok.getLoc();
@@ -1779,7 +1864,11 @@
   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
     return nullptr;
   }
-  KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
+  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+    if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
+      return nullptr;
+  } else
+    KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
 }
 
@@ -2538,6 +2627,320 @@
   return false;
 }
 
+bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
+  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
+    return TokError("directive only supported for amdgcn architecture");
+
+  std::string Target;
+
+  SMLoc TargetStart = getTok().getLoc();
+  if (getParser().parseEscapedString(Target))
+    return true;
+  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
+
+  std::string ExpectedTarget;
+  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
+  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
+
+  if (Target != ExpectedTargetOS.str())
+    return getParser().Error(TargetRange.Start, "target must match options",
+                             TargetRange);
+
+  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
+  return false;
+}
+
+bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
+  return getParser().Error(Range.Start, "value out of range", Range);
+}
+
+bool AMDGPUAsmParser::calculateGPRBlocks(
+    const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
+    bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
+    unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
+    unsigned &SGPRBlocks) {
+  // TODO(scott.linder): These calculations are duplicated from
+  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
+  IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+
+  unsigned NumVGPRs = NextFreeVGPR;
+  unsigned NumSGPRs = NextFreeSGPR;
+  unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+
+  if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
+      NumSGPRs > MaxAddressableNumSGPRs)
+    return OutOfRangeError(SGPRRange);
+
+  NumSGPRs +=
+      IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+
+  if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
+      NumSGPRs > MaxAddressableNumSGPRs)
+    return OutOfRangeError(SGPRRange);
+
+  if (Features.test(FeatureSGPRInitBug))
+    NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+  VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
+  SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
+  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
+    return TokError("directive only supported for amdgcn architecture");
+
+  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
+    return TokError("directive only supported for amdhsa OS");
+
+  StringRef KernelName;
+  if (getParser().parseIdentifier(KernelName))
+    return true;
+
+  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
+
+  StringSet<> Seen;
+
+  IsaInfo::IsaVersion IVersion =
+      IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+
+  SMRange VGPRRange;
+  uint64_t NextFreeVGPR = 0;
+  SMRange SGPRRange;
+  uint64_t NextFreeSGPR = 0;
+  unsigned UserSGPRCount = 0;
+  bool ReserveVCC = true;
+  bool ReserveFlatScr = true;
+  bool ReserveXNACK = hasXNACK();
+
+  while (true) {
+    while (getLexer().is(AsmToken::EndOfStatement))
+      Lex();
+
+    if (getLexer().isNot(AsmToken::Identifier))
+      return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
+
+    StringRef ID = getTok().getIdentifier();
+    SMRange IDRange = getTok().getLocRange();
+    Lex();
+
+    if (ID == ".end_amdhsa_kernel")
+      break;
+
+    if (Seen.find(ID) != Seen.end())
+      return TokError(".amdhsa_ directives cannot be repeated");
+    Seen.insert(ID);
+
+    SMLoc ValStart = getTok().getLoc();
+    int64_t IVal;
+    if (getParser().parseAbsoluteExpression(IVal))
+      return true;
+    SMLoc ValEnd = getTok().getLoc();
+    SMRange ValRange = SMRange(ValStart, ValEnd);
+
+    if (IVal < 0)
+      return OutOfRangeError(ValRange);
+
+    uint64_t Val = IVal;
+
+#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
+  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
+    return OutOfRangeError(RANGE);                                             \
+  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
+
+    if (ID == ".amdhsa_group_segment_fixed_size") {
+      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
+        return OutOfRangeError(ValRange);
+      KD.group_segment_fixed_size = Val;
+    } else if (ID == ".amdhsa_private_segment_fixed_size") {
+      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
+        return OutOfRangeError(ValRange);
+      KD.private_segment_fixed_size = Val;
+    } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
+                       Val, ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
+                       ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
+                       ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
+                       Val, ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
+                       ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
+                       ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
+                       Val, ValRange);
+      UserSGPRCount++;
+    } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
+      PARSE_BITS_ENTRY(
+          KD.compute_pgm_rsrc2,
+          COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
+          ValRange);
+    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_next_free_vgpr") {
+      VGPRRange = ValRange;
+      NextFreeVGPR = Val;
+    } else if (ID == ".amdhsa_next_free_sgpr") {
+      SGPRRange = ValRange;
+      NextFreeSGPR = Val;
+    } else if (ID == ".amdhsa_reserve_vcc") {
+      if (!isUInt<1>(Val))
+        return OutOfRangeError(ValRange);
+      ReserveVCC = Val;
+    } else if (ID == ".amdhsa_reserve_flat_scratch") {
+      if (IVersion.Major < 7)
+        return getParser().Error(IDRange.Start, "directive requires gfx7+",
+                                 IDRange);
+      if (!isUInt<1>(Val))
+        return OutOfRangeError(ValRange);
+      ReserveFlatScr = Val;
+    } else if (ID == ".amdhsa_reserve_xnack_mask") {
+      if (IVersion.Major < 8)
+        return getParser().Error(IDRange.Start, "directive requires gfx8+",
+                                 IDRange);
+      if (!isUInt<1>(Val))
+        return OutOfRangeError(ValRange);
+      ReserveXNACK = Val;
+    } else if (ID == ".amdhsa_float_round_mode_32") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
+    } else if (ID == ".amdhsa_float_round_mode_16_64") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
+    } else if (ID == ".amdhsa_float_denorm_mode_32") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
+    } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_dx10_clamp") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
+                       COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
+    } else if (ID == ".amdhsa_ieee_mode") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
+                       Val, ValRange);
+    } else if (ID == ".amdhsa_fp16_overflow") {
+      if (IVersion.Major < 9)
+        return getParser().Error(IDRange.Start, "directive requires gfx9+",
+                                 IDRange);
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+                       ValRange);
+    } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
+      PARSE_BITS_ENTRY(
+          KD.compute_pgm_rsrc2,
+          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
+          ValRange);
+    } else if (ID == ".amdhsa_exception_fp_denorm_src") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
+                       Val, ValRange);
+    } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
+      PARSE_BITS_ENTRY(
+          KD.compute_pgm_rsrc2,
+          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
+          ValRange);
+    } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
+                       Val, ValRange);
+    } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
+                       Val, ValRange);
+    } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
+                       Val, ValRange);
+    } else if (ID == ".amdhsa_exception_int_div_zero") {
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
+                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
+                       Val, ValRange);
+    } else {
+      return getParser().Error(IDRange.Start,
+                               "unknown .amdhsa_kernel directive", IDRange);
+    }
+
+#undef PARSE_BITS_ENTRY
+  }
+
+  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
+    return TokError(".amdhsa_next_free_vgpr directive is required");
+
+  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
+    return TokError(".amdhsa_next_free_sgpr directive is required");
+
+  unsigned VGPRBlocks;
+  unsigned SGPRBlocks;
+  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
+                         ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
+                         SGPRRange, VGPRBlocks, SGPRBlocks))
+    return true;
+
+  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
+          VGPRBlocks))
+    return OutOfRangeError(VGPRRange);
+  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+                  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
+
+  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
+          SGPRBlocks))
+    return OutOfRangeError(SGPRRange);
+  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+                  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
+                  SGPRBlocks);
+
+  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
+    return TokError("too many user SGPRs enabled");
+  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
+                  UserSGPRCount);
+
+  getTargetStreamer().EmitAmdhsaKernelDescriptor(
+      getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
+      ReserveFlatScr, ReserveXNACK);
+  return false;
+}
+
 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
   uint32_t Major;
   uint32_t Minor;
@@ -2657,7 +3060,8 @@
   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
                                            ELF::STT_AMDGPU_HSA_KERNEL);
   Lex();
-  KernelScope.initialize(getContext());
+  if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
+    KernelScope.initialize(getContext());
   return false;
 }
 
@@ -2761,20 +3165,28 @@
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getString();
 
-  if (IDVal == ".hsa_code_object_version")
-    return ParseDirectiveHSACodeObjectVersion();
+  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
+    if (IDVal == ".amdgcn_target")
+      return ParseDirectiveAMDGCNTarget();
 
-  if (IDVal == ".hsa_code_object_isa")
-    return ParseDirectiveHSACodeObjectISA();
+    if (IDVal == ".amdhsa_kernel")
+      return ParseDirectiveAMDHSAKernel();
+  } else {
+    if (IDVal == ".hsa_code_object_version")
+      return ParseDirectiveHSACodeObjectVersion();
 
-  if (IDVal == ".amd_kernel_code_t")
-    return ParseDirectiveAMDKernelCodeT();
+    if (IDVal == ".hsa_code_object_isa")
+      return ParseDirectiveHSACodeObjectISA();
 
-  if (IDVal == ".amdgpu_hsa_kernel")
-    return ParseDirectiveAMDGPUHsaKernel();
+    if (IDVal == ".amd_kernel_code_t")
+      return ParseDirectiveAMDKernelCodeT();
 
-  if (IDVal == ".amd_amdgpu_isa")
-    return ParseDirectiveISAVersion();
+    if (IDVal == ".amdgpu_hsa_kernel")
+      return ParseDirectiveAMDGPUHsaKernel();
+
+    if (IDVal == ".amd_amdgpu_isa")
+      return ParseDirectiveISAVersion();
+  }
 
   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
     return ParseDirectiveHSAMetadata();