[WebAssembly] Initial Disassembler.
This implements a new table-gen emitter to create tables for
a wasm disassembler, and a dissassembler to use them.
Comes with 2 tests, that tests a few instructions manually. Is also able to
disassemble large .wasm files with objdump reasonably.
Not working so well, to be addressed in followups:
- objdump appears to be passing an incorrect starting point.
- since the disassembler works an instruction at a time, and it is
disassembling stack instruction, it has no idea of pseudo register assignments.
These registers are required for the instruction printing code that follows.
For now, all such registers appear in the output as $0.
Patch by Wouter van Oortmerssen
Differential Revision: https://reviews.llvm.org/D45848
llvm-svn: 332052
diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt
index c761867..41b7694 100644
--- a/llvm/lib/Target/WebAssembly/CMakeLists.txt
+++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt
@@ -3,6 +3,7 @@
tablegen(LLVM WebAssemblyGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM WebAssemblyGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM WebAssemblyGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM WebAssemblyGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel)
tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index f75832f..2f09602 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -19,16 +19,23 @@
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
#define DEBUG_TYPE "wasm-disassembler"
+using DecodeStatus = MCDisassembler::DecodeStatus;
+
+#include "WebAssemblyGenDisassemblerTables.inc"
+
namespace {
class WebAssemblyDisassembler final : public MCDisassembler {
std::unique_ptr<const MCInstrInfo> MCII;
@@ -60,11 +67,120 @@
createWebAssemblyDisassembler);
}
+static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
+ if (Size >= Bytes.size())
+ return -1;
+ auto V = Bytes[Size];
+ Size++;
+ return V;
+}
+
+static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, bool Signed) {
+ unsigned N = 0;
+ const char *Error = nullptr;
+ auto Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
+ Bytes.data() + Bytes.size(), &Error)
+ : static_cast<int64_t>(
+ decodeULEB128(Bytes.data() + Size, &N,
+ Bytes.data() + Bytes.size(), &Error));
+ if (Error)
+ return false;
+ Size += N;
+ MI.addOperand(MCOperand::createImm(Val));
+ return true;
+}
+
+template <typename T>
+bool parseFPImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
+ if (Size + sizeof(T) > Bytes.size())
+ return false;
+ T Val;
+ memcpy(&Val, Bytes.data() + Size, sizeof(T));
+ support::endian::byte_swap<T, support::endianness::little>(Val);
+ Size += sizeof(T);
+ MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
+ return true;
+}
+
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
- raw_ostream &OS, raw_ostream &CS) const {
-
- // TODO: Implement disassembly.
-
- return MCDisassembler::Fail;
+ raw_ostream & /*OS*/, raw_ostream &CS) const {
+ CommentStream = &CS;
+ Size = 0;
+ auto Opc = nextByte(Bytes, Size);
+ if (Opc < 0)
+ return MCDisassembler::Fail;
+ const auto *WasmInst = &InstructionTable0[Opc];
+ // If this is a prefix byte, indirect to another table.
+ if (WasmInst->ET == ET_Prefix) {
+ WasmInst = nullptr;
+ // Linear search, so far only 2 entries.
+ for (auto PT = PrefixTable; PT->Table; PT++) {
+ if (PT->Prefix == Opc) {
+ WasmInst = PT->Table;
+ break;
+ }
+ }
+ if (!WasmInst)
+ return MCDisassembler::Fail;
+ Opc = nextByte(Bytes, Size);
+ if (Opc < 0)
+ return MCDisassembler::Fail;
+ WasmInst += Opc;
+ }
+ if (WasmInst->ET == ET_Unused)
+ return MCDisassembler::Fail;
+ // At this point we must have a valid instruction to decode.
+ assert(WasmInst->ET == ET_Instruction);
+ MI.setOpcode(WasmInst->Opcode);
+ // Parse any operands.
+ for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
+ switch (WasmInst->Operands[OPI]) {
+ // ULEB operands:
+ case WebAssembly::OPERAND_BASIC_BLOCK:
+ case WebAssembly::OPERAND_LOCAL:
+ case WebAssembly::OPERAND_GLOBAL:
+ case WebAssembly::OPERAND_FUNCTION32:
+ case WebAssembly::OPERAND_OFFSET32:
+ case WebAssembly::OPERAND_P2ALIGN:
+ case WebAssembly::OPERAND_TYPEINDEX:
+ case MCOI::OPERAND_IMMEDIATE: {
+ if (!parseLEBImmediate(MI, Size, Bytes, false))
+ return MCDisassembler::Fail;
+ break;
+ }
+ // SLEB operands:
+ case WebAssembly::OPERAND_I32IMM:
+ case WebAssembly::OPERAND_I64IMM:
+ case WebAssembly::OPERAND_SIGNATURE: {
+ if (!parseLEBImmediate(MI, Size, Bytes, true))
+ return MCDisassembler::Fail;
+ break;
+ }
+ // FP operands.
+ case WebAssembly::OPERAND_F32IMM: {
+ if (!parseFPImmediate<float>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case WebAssembly::OPERAND_F64IMM: {
+ if (!parseFPImmediate<double>(MI, Size, Bytes))
+ return MCDisassembler::Fail;
+ break;
+ }
+ case MCOI::OPERAND_REGISTER: {
+ // These are NOT actually in the instruction stream, but MC is going to
+ // expect operands to be present for them!
+ // FIXME: can MC re-generate register assignments or do we have to
+ // do this? Since this function decodes a single instruction, we don't
+ // have the proper context for tracking an operand stack here.
+ MI.addOperand(MCOperand::createReg(0));
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
+ }
+ }
+ return MCDisassembler::Success;
}
diff --git a/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 6629ce6..10fa798 100644
--- a/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -46,7 +46,7 @@
void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot,
- const MCSubtargetInfo & /*STI*/) {
+ const MCSubtargetInfo &STI) {
// Print the instruction (this uses the AsmStrings from the .td files).
printInstruction(MI, OS);
@@ -194,20 +194,16 @@
}
}
-void
-WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
- unsigned OpNo,
- raw_ostream &O) {
+void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(
+ const MCInst *MI, unsigned OpNo, raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode()))
return;
O << ":p2align=" << Imm;
}
-void
-WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
- unsigned OpNo,
- raw_ostream &O) {
+void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(
+ const MCInst *MI, unsigned OpNo, raw_ostream &O) {
int64_t Imm = MI->getOperand(OpNo).getImm();
switch (WebAssembly::ExprType(Imm)) {
case WebAssembly::ExprType::Void: break;
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index ad1549f..2f301da 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -82,7 +82,15 @@
let ShouldEmitMatchRegisterName = 0;
}
+def WebAssemblyAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 0;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
def WebAssembly : Target {
let InstructionSet = WebAssemblyInstrInfo;
let AssemblyParsers = [WebAssemblyAsmParser];
+ let AssemblyWriters = [WebAssemblyAsmWriter];
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index c667f93..d59aba4 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -57,6 +57,10 @@
}
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+// This is technically a control-flow instruction, since all it affects is the
+// IP.
+def NOP : I<(outs), (ins), [], "nop", 0x01>;
+
// Placemarkers to indicate the start or end of a block or loop scope.
// These use/clobber VALUE_STACK to prevent them from being moved into the
// middle of an expression tree.