[AMDGPU] Disassembler code refactored + error messages.

Idea behind this change is to make code shorter and as much common for all targets as possible. Let's even accept more code than is valid for a particular target, leaving it for the assembler to sort out.

64bit instructions decoding added.

Error\warning messages on unrecognized instructions operands added, InstPrinter allowed to print invalid operands helping to find invalid/unsupported code.

The change is massive and hard to compare with previous version, so it makes sense just to take a look on the new version. As a bonus, with a few TD changes following, it disassembles the majority of instructions. Currently it fully disassembles >300K binary source of some blas kernel.

Previous TODOs were saved whenever possible.

Patch by: Valery Pykhtin

Differential Revision: http://reviews.llvm.org/D17720

llvm-svn: 262332
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index a885010..c88f5eb 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -22,10 +22,12 @@
 #include "AMDGPURegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -37,69 +39,39 @@
 typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
 
 
-static DecodeStatus DecodeVGPR_32RegisterClass(MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr, const void *Decoder) {
-  const AMDGPUDisassembler *Dis =
-    static_cast<const AMDGPUDisassembler *>(Decoder);
-  return Dis->DecodeVGPR_32RegisterClass(Inst, Imm, Addr);
+inline static MCDisassembler::DecodeStatus
+addOperand(MCInst &Inst, const MCOperand& Opnd) {
+  Inst.addOperand(Opnd);
+  return Opnd.isValid() ?
+    MCDisassembler::Success :
+    MCDisassembler::SoftFail;
 }
 
-static DecodeStatus DecodeVS_32RegisterClass(MCInst &Inst, unsigned Imm,
-                                             uint64_t Addr, const void *Decoder) {
-  const AMDGPUDisassembler *Dis =
-    static_cast<const AMDGPUDisassembler *>(Decoder);
-  return Dis->DecodeVS_32RegisterClass(Inst, Imm, Addr);
+#define DECODE_OPERAND2(RegClass, DecName) \
+static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \
+                                                    unsigned Imm, \
+                                                    uint64_t /*Addr*/, \
+                                                    const void *Decoder) { \
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
+  return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \
 }
 
-static DecodeStatus DecodeVS_64RegisterClass(MCInst &Inst, unsigned Imm,
-                                             uint64_t Addr, const void *Decoder) {
-  const AMDGPUDisassembler *Dis =
-    static_cast<const AMDGPUDisassembler *>(Decoder);
-  return Dis->DecodeVS_64RegisterClass(Inst, Imm, Addr);
-}
+#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass)
 
-static DecodeStatus DecodeVReg_64RegisterClass(MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr, const void *Decoder) {
-  const AMDGPUDisassembler *Dis =
-    static_cast<const AMDGPUDisassembler *>(Decoder);
-  return Dis->DecodeVReg_64RegisterClass(Inst, Imm, Addr);
-}
+DECODE_OPERAND(VGPR_32)
+DECODE_OPERAND(VS_32)
+DECODE_OPERAND(VS_64)
 
-static DecodeStatus DecodeVReg_96RegisterClass(MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
+DECODE_OPERAND(VReg_64)
+DECODE_OPERAND(VReg_96)
+DECODE_OPERAND(VReg_128)
 
-static DecodeStatus DecodeVReg_128RegisterClass(MCInst &Inst, unsigned Imm,
-                                                uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeSReg_32RegisterClass(MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeSReg_64RegisterClass(MCInst &Inst, unsigned Imm,
-                                               uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeSReg_128RegisterClass(MCInst &Inst, unsigned Imm,
-                                                uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeSReg_256RegisterClass(MCInst &Inst, unsigned Imm,
-                                                uint64_t Addr, const void *Decoder) {
-  // ToDo
-  return MCDisassembler::Fail;
-}
+DECODE_OPERAND(SGPR_32)
+DECODE_OPERAND(SReg_32)
+DECODE_OPERAND(SReg_64)
+DECODE_OPERAND(SReg_128)
+DECODE_OPERAND(SReg_256)
+DECODE_OPERAND(SReg_512)
 
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
@@ -111,8 +83,31 @@
 //
 //===----------------------------------------------------------------------===//
 
+static inline uint32_t eatB32(ArrayRef<uint8_t>& Bytes) {
+  assert(Bytes.size() >= sizeof eatB32(Bytes));
+  const auto Res = support::endian::read32le(Bytes.data());
+  Bytes = Bytes.slice(sizeof Res);
+  return Res;
+}
+
+DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
+                                               MCInst &MI,
+                                               uint64_t Inst,
+                                               uint64_t Address) const {
+  assert(MI.getOpcode() == 0);
+  assert(MI.getNumOperands() == 0);
+  MCInst TmpInst;
+  const auto SavedBytes = Bytes;
+  if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
+    MI = TmpInst;
+    return MCDisassembler::Success;
+  }
+  Bytes = SavedBytes;
+  return MCDisassembler::Fail;
+}
+
 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-                                                ArrayRef<uint8_t> Bytes,
+                                                ArrayRef<uint8_t> Bytes_,
                                                 uint64_t Address,
                                                 raw_ostream &WS,
                                                 raw_ostream &CS) const {
@@ -121,333 +116,258 @@
   // ToDo: AMDGPUDisassembler supports only VI ISA.
   assert(AMDGPU::isVI(STI) && "Can disassemble only VI ISA.");
 
-  HasLiteral = false;
-  this->Bytes = Bytes;
+  const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
+  Bytes = Bytes_.slice(0, MaxInstBytesNum);
 
-  // Try decode 32-bit instruction
-  if (Bytes.size() < 4) {
-    Size = 0;
-    return MCDisassembler::Fail;
-  }
-  uint32_t Insn =
-      (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+  DecodeStatus Res = MCDisassembler::Fail;
+  do {
+    // ToDo: better to switch enc len using some bit predicate
+    // but it is unknown yet, so try all we can
+    if (Bytes.size() < 4) break;
+    const uint32_t DW = eatB32(Bytes);
+    Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address);
+    if (Res) break;
 
-  // Calling the auto-generated decoder function.
-  DecodeStatus Result =
-      decodeInstruction(DecoderTableVI32, MI, Insn, Address, this, STI);
-  if (Result != MCDisassembler::Success) {
-      Size = 0;
-      return MCDisassembler::Fail;
-  }
-  if (HasLiteral == true) {
-    Size = 8;
-    HasLiteral = false;
-  } else {
-    Size = 4;
-  }
+    Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
+    if (Res) break;
 
-  return MCDisassembler::Success;
+    if (Bytes.size() < 4) break;
+    const uint64_t QW = ((uint64_t)eatB32(Bytes) << 32) | DW;
+    Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
+    if (Res) break;
+
+    Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
+  } while (false);
+
+  Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0;
+  return Res;
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeImmedFloat(unsigned Imm, uint32_t &F) const {
-  // ToDo: case 248: 1/(2*PI) - is allowed only on VI
-  // ToDo: AMDGPUInstPrinter does not support 1/(2*PI). It consider 1/(2*PI) as
-  // literal constant.
-  switch(Imm) {
-  case 240: F = FloatToBits(0.5f); return MCDisassembler::Success;
-  case 241: F = FloatToBits(-0.5f); return MCDisassembler::Success;
-  case 242: F = FloatToBits(1.0f); return MCDisassembler::Success;
-  case 243: F = FloatToBits(-1.0f); return MCDisassembler::Success;
-  case 244: F = FloatToBits(2.0f); return MCDisassembler::Success;
-  case 245: F = FloatToBits(-2.0f); return MCDisassembler::Success;
-  case 246: F = FloatToBits(4.0f); return MCDisassembler::Success;
-  case 247: F = FloatToBits(-4.0f); return MCDisassembler::Success;
-  case 248: F = 0x3e22f983; return MCDisassembler::Success; // 1/(2*PI)
-  default: return MCDisassembler::Fail;
-  }
+const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
+  return getContext().getRegisterInfo()->
+    getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeImmedDouble(unsigned Imm, uint64_t &D) const {
-  switch(Imm) {
-  case 240: D = DoubleToBits(0.5); return MCDisassembler::Success;
-  case 241: D = DoubleToBits(-0.5); return MCDisassembler::Success;
-  case 242: D = DoubleToBits(1.0); return MCDisassembler::Success;
-  case 243: D = DoubleToBits(-1.0); return MCDisassembler::Success;
-  case 244: D = DoubleToBits(2.0); return MCDisassembler::Success;
-  case 245: D = DoubleToBits(-2.0); return MCDisassembler::Success;
-  case 246: D = DoubleToBits(4.0); return MCDisassembler::Success;
-  case 247: D = DoubleToBits(-4.0); return MCDisassembler::Success;
-  case 248: D = 0x3fc45f306dc9c882; return MCDisassembler::Success; // 1/(2*PI)
-  default: return MCDisassembler::Fail;
-  }
+inline
+MCOperand AMDGPUDisassembler::errOperand(unsigned V,
+                                         const Twine& ErrMsg) const {
+  *CommentStream << "Error: " + ErrMsg;
+
+  // ToDo: add support for error operands to MCInst.h
+  // return MCOperand::createError(V);
+  return MCOperand();
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeImmedInteger(unsigned Imm,
-                                                    int64_t &I) const {
-  if ((Imm >= 128) && (Imm <= 192)) {
-    I = Imm - 128;
-    return MCDisassembler::Success;
-  } else if ((Imm >= 193) && (Imm <= 208)) {
-    I = 192 - Imm;
-    return MCDisassembler::Success;
-  }
-  return MCDisassembler::Fail;
+inline
+MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
+  return MCOperand::createReg(RegId);
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeVgprRegister(unsigned Val,
-                                                    unsigned &RegID,
-                                                    unsigned Size) const {
-  if (Val > (256 - Size / 32)) {
-    return MCDisassembler::Fail;
-  }
-  unsigned RegClassID;
-  switch (Size) {
-  case 32: RegClassID = AMDGPU::VGPR_32RegClassID; break;
-  case 64: RegClassID = AMDGPU::VReg_64RegClassID; break;
-  case 96: RegClassID = AMDGPU::VReg_96RegClassID; break;
-  case 128: RegClassID = AMDGPU::VReg_128RegClassID; break;
-  case 256: RegClassID = AMDGPU::VReg_256RegClassID; break;
-  case 512: RegClassID = AMDGPU::VReg_512RegClassID; break;
-  default:
-    return MCDisassembler::Fail;
-  }
-
-  RegID = AMDGPUMCRegisterClasses[RegClassID].getRegister(Val);
-  return MCDisassembler::Success;
+inline
+MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
+                                               unsigned Val) const {
+  const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
+  if (Val >= RegCl.getNumRegs())
+    return errOperand(Val, Twine(getRegClassName(RegClassID)) +
+                           ": unknown register " + Twine(Val));
+  return createRegOperand(RegCl.getRegister(Val));
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeSgprRegister(unsigned Val,
-                                                    unsigned &RegID,
-                                                    unsigned Size) const {
+inline
+MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
+                                                unsigned Val) const {
   // ToDo: SI/CI have 104 SGPRs, VI - 102
-  unsigned RegClassID;
-
-  switch (Size) {
-  case 32:
-    if (Val > 101) {
-      return MCDisassembler::Fail;
-    }
-    RegClassID = AMDGPU::SGPR_32RegClassID;
-    break;
-  case 64:
-    if ((Val % 2 != 0) || (Val > 100)) {
-      return MCDisassembler::Fail;
-    }
-    Val /= 2;
-    RegClassID = AMDGPU::SGPR_64RegClassID;
-    break;
-  case 128:
-    // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
-    // this bundle?
-    if ((Val % 4 != 0) || (Val > 96)) {
-      return MCDisassembler::Fail;
-    }
-    Val /= 4;
-    RegClassID = AMDGPU::SReg_128RegClassID;
-    break;
-  case 256:
-    // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
-    // this bundle?
-    if ((Val % 4 != 0) || (Val > 92)) {
-      return MCDisassembler::Fail;
-    }
-    Val /= 4;
-    RegClassID = AMDGPU::SReg_256RegClassID;
-    break;
-  case 512:
-    // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
-    // this bundle?
-    if ((Val % 4 != 0) || (Val > 84)) {
-      return MCDisassembler::Fail;
-    }
-    Val /= 4;
-    RegClassID = AMDGPU::SReg_512RegClassID;
-    break;
-  default:
-    return MCDisassembler::Fail;
+  // Valery: here we accepting as much as we can, let assembler sort it out
+  int shift = 0;
+  switch (SRegClassID) {
+  case AMDGPU::SGPR_32RegClassID:
+  case AMDGPU::SReg_32RegClassID: break;
+  case AMDGPU::SGPR_64RegClassID:
+  case AMDGPU::SReg_64RegClassID:  shift = 1; break;
+  case AMDGPU::SReg_128RegClassID:
+  // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
+  // this bundle?
+  case AMDGPU::SReg_256RegClassID:
+  // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
+  // this bundle?
+  case AMDGPU::SReg_512RegClassID: shift = 2; break;
+  // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
+  // this bundle?
+  default: assert(false); break;
   }
-
-  RegID = AMDGPUMCRegisterClasses[RegClassID].getRegister(Val);
-  return MCDisassembler::Success;
+  if (Val % (1 << shift))
+    *CommentStream << "Warning: " << getRegClassName(SRegClassID)
+                   << ": scalar reg isn't aligned " << Val;
+  return createRegOperand(SRegClassID, Val >> shift);
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeSrc32Register(unsigned Val,
-                                                     unsigned &RegID) const {
-  // ToDo: deal with out-of range registers
-  using namespace AMDGPU;
-  if (Val <= 101) {
-    return DecodeSgprRegister(Val, RegID, 32);
-  } else if ((Val >= 256) && (Val <= 511)) {
-    return DecodeVgprRegister(Val - 256, RegID, 32);
-  } else {
-    switch(Val) {
-    case 102: RegID = getMCReg(FLAT_SCR_LO, STI); return MCDisassembler::Success;
-    case 103: RegID = getMCReg(FLAT_SCR_HI, STI); return MCDisassembler::Success;
-    // ToDo: no support for xnack_mask_lo/_hi register
-    case 104:
-    case 105: return MCDisassembler::Fail;
-    case 106: RegID = getMCReg(VCC_LO, STI); return MCDisassembler::Success;
-    case 107: RegID = getMCReg(VCC_HI, STI); return MCDisassembler::Success;
-    // ToDo: no support for tba_lo/_hi register
-    case 108:
-    case 109: return MCDisassembler::Fail;
-    // ToDo: no support for tma_lo/_hi register
-    case 110:
-    case 111: return MCDisassembler::Fail;
-    // ToDo: no support for ttmp[0:11] register
-    case 112:
-    case 113:
-    case 114:
-    case 115:
-    case 116:
-    case 117:
-    case 118:
-    case 119:
-    case 120:
-    case 121:
-    case 122:
-    case 123: return MCDisassembler::Fail;
-    case 124: RegID = getMCReg(M0, STI); return MCDisassembler::Success;
-    case 126: RegID = getMCReg(EXEC_LO, STI); return MCDisassembler::Success;
-    case 127: RegID = getMCReg(EXEC_HI, STI); return MCDisassembler::Success;
-    // ToDo: no support for vccz register
-    case 251: return MCDisassembler::Fail;
-    // ToDo: no support for execz register
-    case 252: return MCDisassembler::Fail;
-    case 253: RegID = getMCReg(SCC, STI); return MCDisassembler::Success;
-    default: return MCDisassembler::Fail;
-    }
-  }
-  return MCDisassembler::Fail;
+MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
+  return decodeSrcOp(OP32, Val);
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeSrc64Register(unsigned Val,
-                                                     unsigned &RegID) const {
-  // ToDo: deal with out-of range registers
-  using namespace AMDGPU;
-  if (Val <= 101) {
-    return DecodeSgprRegister(Val, RegID, 64);
-  } else if ((Val >= 256) && (Val <= 511)) {
-    return DecodeVgprRegister(Val - 256, RegID, 64);
-  } else {
-    switch(Val) {
-    case 102: RegID = getMCReg(FLAT_SCR, STI); return MCDisassembler::Success;
-    case 106: RegID = getMCReg(VCC, STI); return MCDisassembler::Success;
-    case 126: RegID = getMCReg(EXEC, STI); return MCDisassembler::Success;
-    default: return MCDisassembler::Fail;
-    }
-  }
-  return MCDisassembler::Fail;
+MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
+  return decodeSrcOp(OP64, Val);
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeLiteralConstant(MCInst &Inst,
-                                                       uint64_t &Literal) const {
+MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
+  return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
+  return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
+  return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
+  return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SGPR_32(unsigned Val) const {
+  return createSRegOperand(AMDGPU::SGPR_32RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
+  // table-gen generated disassembler doesn't care about operand types
+  // leaving only registry class so SSrc_32 operand turns into SReg_32
+  // and therefore we accept immediates and literals here as well
+  return decodeSrcOp(OP32, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
+  // see decodeOperand_SReg_32 comment
+  return decodeSrcOp(OP64, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
+  return createSRegOperand(AMDGPU::SReg_128RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
+  return createSRegOperand(AMDGPU::SReg_256RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
+  return createSRegOperand(AMDGPU::SReg_512RegClassID, Val);
+}
+
+
+MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
   // For now all literal constants are supposed to be unsigned integer
   // ToDo: deal with signed/unsigned 64-bit integer constants
   // ToDo: deal with float/double constants
-  if (Bytes.size() < 8) {
-    return MCDisassembler::Fail;
+  if (Bytes.size() < 4)
+    return errOperand(0, "cannot read literal, inst bytes left " +
+                         Twine(Bytes.size()));
+  return MCOperand::createImm(eatB32(Bytes));
+}
+
+MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
+  assert(Imm >= 128 && Imm <= 208);
+  return MCOperand::createImm((Imm <= 192) ? (Imm - 128) : (192 - Imm));
+}
+
+MCOperand AMDGPUDisassembler::decodeFPImmed(bool Is32, unsigned Imm) {
+  assert(Imm >= 240 && Imm <= 248);
+  // ToDo: case 248: 1/(2*PI) - is allowed only on VI
+  // ToDo: AMDGPUInstPrinter does not support 1/(2*PI). It consider 1/(2*PI) as
+  // literal constant.
+  float V = 0.0f;
+  switch (Imm) {
+  case 240: V =  0.5f; break;
+  case 241: V = -0.5f; break;
+  case 242: V =  1.0f; break;
+  case 243: V = -1.0f; break;
+  case 244: V =  2.0f; break;
+  case 245: V = -2.0f; break;
+  case 246: V =  4.0f; break;
+  case 247: V = -4.0f; break;
+  case 248: return MCOperand::createImm(Is32 ?         // 1/(2*PI)
+                                          0x3e22f983 :
+                                          0x3fc45f306dc9c882);
+  default: break;
   }
-  Literal =
-    0 | (Bytes[7] << 24) | (Bytes[6] << 16) | (Bytes[5] << 8) | (Bytes[4] << 0);
-  return MCDisassembler::Success;
+  return MCOperand::createImm(Is32? FloatToBits(V) : DoubleToBits(V));
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeVGPR_32RegisterClass(llvm::MCInst &Inst,
-                                                            unsigned Imm,
-                                                            uint64_t Addr) const {
-  unsigned RegID;
-  if (DecodeVgprRegister(Imm, RegID) == MCDisassembler::Success) {
-    Inst.addOperand(MCOperand::createReg(RegID));
-    return MCDisassembler::Success;
+MCOperand AMDGPUDisassembler::decodeSrcOp(bool Is32, unsigned Val) const {
+  using namespace AMDGPU;
+  assert(Val < 512); // enum9
+
+  if (Val >= 256)
+    return createRegOperand(Is32 ? VGPR_32RegClassID : VReg_64RegClassID,
+                            Val - 256);
+  if (Val <= 101)
+    return createSRegOperand(Is32 ? SGPR_32RegClassID : SGPR_64RegClassID,
+                             Val);
+
+  if (Val >= 128 && Val <= 208)
+    return decodeIntImmed(Val);
+
+  if (Val >= 240 && Val <= 248)
+    return decodeFPImmed(Is32, Val);
+
+  if (Val == 255)
+    return decodeLiteralConstant();
+
+  return Is32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
+  using namespace AMDGPU;
+  switch (Val) {
+  case 102: return createRegOperand(getMCReg(FLAT_SCR_LO, STI));
+  case 103: return createRegOperand(getMCReg(FLAT_SCR_HI, STI));
+    // ToDo: no support for xnack_mask_lo/_hi register
+  case 104:
+  case 105: break;
+  case 106: return createRegOperand(VCC_LO);
+  case 107: return createRegOperand(VCC_HI);
+    // ToDo: no support for tba_lo/_hi register
+  case 108:
+  case 109: break;
+    // ToDo: no support for tma_lo/_hi register
+  case 110:
+  case 111: break;
+    // ToDo: no support for ttmp[0:11] register
+  case 112:
+  case 113:
+  case 114:
+  case 115:
+  case 116:
+  case 117:
+  case 118:
+  case 119:
+  case 120:
+  case 121:
+  case 122:
+  case 123: break;
+  case 124: return createRegOperand(M0);
+  case 126: return createRegOperand(EXEC_LO);
+  case 127: return createRegOperand(EXEC_HI);
+    // ToDo: no support for vccz register
+  case 251: break;
+    // ToDo: no support for execz register
+  case 252: break;
+  case 253: return createRegOperand(SCC);
+  default: break;
   }
-  return MCDisassembler::Fail;
+  return errOperand(Val, "unknown operand encoding " + Twine(Val));
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeVSRegisterClass(MCInst &Inst,
-                                                       unsigned Imm,
-                                                       uint64_t Addr,
-                                                       bool Is32) const {
-  // ToDo: different opcodes allow different formats of this operands
-  if ((Imm >= 128) && (Imm <= 208)) {
-    // immediate integer
-    int64_t Val;
-    if (DecodeImmedInteger(Imm, Val) == MCDisassembler::Success) {
-      Inst.addOperand(MCOperand::createImm(Val));
-      return MCDisassembler::Success;
-    }
-  } else if ((Imm >= 240) && (Imm <= 248)) {
-    // immediate float/double
-    uint64_t Val;
-    DecodeStatus status;
-    if (Is32) {
-      uint32_t Val32;
-      status = DecodeImmedFloat(Imm, Val32);
-      Val = static_cast<uint64_t>(Val32);
-    } else {
-      status = DecodeImmedDouble(Imm, Val);
-    }
-    if (status == MCDisassembler::Success) {
-      Inst.addOperand(MCOperand::createImm(Val));
-      return MCDisassembler::Success;
-    }
-  } else if (Imm == 254) {
-    // LDS direct
-    // ToDo: implement LDS direct read
-  } else if (Imm == 255) {
-    // literal constant
-    HasLiteral = true;
-    uint64_t Literal;
-    if (DecodeLiteralConstant(Inst, Literal) == MCDisassembler::Success) {
-      Inst.addOperand(MCOperand::createImm(Literal));
-      return MCDisassembler::Success;
-    }
-    return MCDisassembler::Fail;
-  } else if ((Imm == 125) ||
-             ((Imm >= 209) && (Imm <= 239)) ||
-             (Imm == 249) ||
-             (Imm == 250) ||
-             (Imm >= 512)) {
-    // reserved
-    return MCDisassembler::Fail;
-  } else {
-    // register
-    unsigned RegID;
-    DecodeStatus status = Is32 ? DecodeSrc32Register(Imm, RegID)
-                               : DecodeSrc64Register(Imm, RegID);
-    if (status == MCDisassembler::Success) {
-      Inst.addOperand(MCOperand::createReg(RegID));
-      return MCDisassembler::Success;
-    }
+MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
+  using namespace AMDGPU;
+  switch (Val) {
+  case 102: return createRegOperand(getMCReg(FLAT_SCR, STI));
+  case 106: return createRegOperand(VCC);
+  case 126: return createRegOperand(EXEC);
+  default: break;
   }
-  return MCDisassembler::Fail;
+  return errOperand(Val, "unknown operand encoding " + Twine(Val));
 }
 
-DecodeStatus AMDGPUDisassembler::DecodeVS_32RegisterClass(MCInst &Inst,
-                                                          unsigned Imm,
-                                                          uint64_t Addr) const {
-  return DecodeVSRegisterClass(Inst, Imm, Addr, true);
-}
-
-DecodeStatus AMDGPUDisassembler::DecodeVS_64RegisterClass(MCInst &Inst,
-                                                          unsigned Imm,
-                                                          uint64_t Addr) const {
-  return DecodeVSRegisterClass(Inst, Imm, Addr, false);
-}
-
-DecodeStatus AMDGPUDisassembler::DecodeVReg_64RegisterClass(llvm::MCInst &Inst,
-                                                            unsigned Imm,
-                                                            uint64_t Addr) const {
-  unsigned RegID;
-  if (DecodeVgprRegister(Imm, RegID, 64) == MCDisassembler::Success) {
-    Inst.addOperand(MCOperand::createReg(RegID));
-    return MCDisassembler::Success;
-  }
-  return MCDisassembler::Fail;
-}
-
-
-
 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
                                                 const MCSubtargetInfo &STI,
                                                 MCContext &Ctx) {