Fix oatdump crash on arm64/arm code.
Also adds 16 bit literal information.

Rationale:
When "run-away" instructions are disassembled, the literal
addresses may go out of range, causing oatdump to crash.
This CL guards memory access against the full memory range
allocated to assembly instructions and data (it is possible
but not really necessary to refine this a bit). Out of range
arguments are now displayed as (?) to denote the issue, which
is a lot nicer than crashing.

BUG=28670871

Change-Id: I51e9b6a6a99162546fe31059f14278e8980451c2
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index b99e5c2..b080315 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -31,16 +31,23 @@
   // Should the disassembler print absolute or relative addresses.
   const bool absolute_addresses_;
 
-  // Base addess for calculating relative code offsets when absolute_addresses_ is false.
+  // Base address for calculating relative code offsets when absolute_addresses_ is false.
   const uint8_t* const base_address_;
 
+  // End address (exclusive);
+  const uint8_t* const end_address_;
+
   // If set, the disassembler is allowed to look at load targets in literal
   // pools.
   const bool can_read_literals_;
 
-  DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address,
+  DisassemblerOptions(bool absolute_addresses,
+                      const uint8_t* base_address,
+                      const uint8_t* end_address,
                       bool can_read_literals)
-      : absolute_addresses_(absolute_addresses), base_address_(base_address),
+      : absolute_addresses_(absolute_addresses),
+        base_address_(base_address),
+        end_address_(end_address),
         can_read_literals_(can_read_literals) {}
 
  private:
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index bcb0438..286faf2 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -418,7 +418,12 @@
   return os << static_cast<int>(type);
 }
 
-void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32,
+void DumpThumb2Literal(std::ostream& args,
+                       const uint8_t* instr_ptr,
+                       const uintptr_t lo_adr,
+                       const uintptr_t hi_adr,
+                       uint32_t U,
+                       uint32_t imm32,
                        T2LitType type) {
   // Literal offsets (imm32) are not required to be aligned so we may need unaligned access.
   typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1)));
@@ -428,8 +433,16 @@
   typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1)));
   typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1)));
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4);
   uintptr_t lit_adr = U ? pc + imm32 : pc - imm32;
+  if (lit_adr < lo_adr || lit_adr >= hi_adr) {
+    args << "  ; (?)";
+    return;
+  }
+
   args << "  ; ";
   switch (type) {
     case kT2LitUByte:
@@ -482,6 +495,10 @@
     return DumpThumb16(os, instr_ptr);
   }
 
+  // Set valid address range of backing buffer.
+  const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+  const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
+
   uint32_t op2 = (instr >> 20) & 0x7F;
   std::ostringstream opcode;
   std::ostringstream args;
@@ -824,7 +841,7 @@
                 args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
                      << (imm8 << 2) << "]";
                 if (Rn.r == 15 && U == 1) {
-                  DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong);
+                  DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm8 << 2, kT2LitHexLong);
                 }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
@@ -1410,7 +1427,7 @@
               };
               DCHECK_LT(op2 >> 1, arraysize(lit_type));
               DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid);
-              DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]);
+              DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, U, imm12, lit_type[op2 >> 1]);
             }
           } else if ((instr & 0xFC0) == 0) {
             opcode << ldr_str << sign << type << ".w";
@@ -1711,10 +1728,13 @@
           break;
       }
     } else if (opcode1 == 0x12 || opcode1 == 0x13) {  // 01001x
+      const uintptr_t lo_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->base_address_);
+      const uintptr_t hi_adr = reinterpret_cast<intptr_t>(GetDisassemblerOptions()->end_address_);
       ThumbRegister Rt(instr, 8);
       uint16_t imm8 = instr & 0xFF;
       opcode << "ldr";
       args << Rt << ", [pc, #" << (imm8 << 2) << "]";
+      DumpThumb2Literal(args, instr_ptr, lo_adr, hi_adr, /*U*/ 1u, imm8 << 2, kT2LitHexWord);
     } else if ((opcode1 >= 0x14 && opcode1 <= 0x17) ||  // 0101xx
                (opcode1 >= 0x18 && opcode1 <= 0x1f) ||  // 011xxx
                (opcode1 >= 0x20 && opcode1 <= 0x27)) {  // 100xxx
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 5f88714..6a9afe5 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -63,9 +63,17 @@
     return;
   }
 
+  // Get address of literal. Bail if not within expected buffer range to
+  // avoid trying to fetch invalid literals (we can encounter this when
+  // interpreting raw data as instructions).
   void* data_address = instr->LiteralAddress<void*>();
-  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+  if (data_address < base_address_ || data_address >= end_address_) {
+    AppendToOutput(" (?)");
+    return;
+  }
 
+  // Output information on literal.
+  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
   switch (op) {
     case vixl::LDR_w_lit:
     case vixl::LDR_x_lit:
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index 44fa53f..a4e5ee8 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -30,8 +30,11 @@
 
 class CustomDisassembler FINAL : public vixl::Disassembler {
  public:
-  explicit CustomDisassembler(DisassemblerOptions* options) :
-      vixl::Disassembler(), read_literals_(options->can_read_literals_) {
+  explicit CustomDisassembler(DisassemblerOptions* options)
+      : vixl::Disassembler(),
+        read_literals_(options->can_read_literals_),
+        base_address_(options->base_address_),
+        end_address_(options->end_address_) {
     if (!options->absolute_addresses_) {
       MapCodeAddress(0, reinterpret_cast<const vixl::Instruction*>(options->base_address_));
     }
@@ -55,6 +58,10 @@
   //           true | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0)
   //          false | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0) (3.40282e+38)
   const bool read_literals_;
+
+  // Valid address range: [base_address_, end_address_)
+  const void* const base_address_;
+  const void* const end_address_;
 };
 
 class DisassemblerArm64 FINAL : public Disassembler {