Ensure stack maps are 4 byte aligned.

With the recent move to gcc 4.9, we are hitting alignment
SIGBUS on ARM. The reason is that gcc will optimize two consecutive
32bits loads into one 64bits load, and the instruction (ldrd)
will fail if the data is not aligned.

Also removed the emission of mapping table when a method is optimized.
The information can be found in the StackMap itself.

Change-Id: Icf79406c18a3f4db3c05d52fc2c0dd2e35bf0f8f
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 1729686..1a13f93 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -204,7 +204,14 @@
     // Portable doesn't use the machine pc, we just use dex pc instead.
     return static_cast<uint32_t>(pc);
   }
+
   const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
+  if (IsOptimized(sizeof(void*))) {
+    uint32_t ret = GetStackMap(sought_offset).GetDexPc();
+    return ret;
+  }
+
   MappingTable table(entry_point != nullptr ?
       GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
   if (table.TotalSize() == 0) {
@@ -213,7 +220,6 @@
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
     return DexFile::kDexNoIndex;   // Special no mapping case
   }
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   // Assume the caller wants a pc-to-dex mapping so check here first.
   typedef MappingTable::PcToDexIterator It;
   for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 2107944..64663ed 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -151,7 +151,8 @@
     // Temporary solution for detecting if a method has been optimized: the compiler
     // does not create a GC map. Instead, the vmap table contains the stack map
     // (as in stack_map.h).
-    return GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
+    return !IsNative()
+        && GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
         && GetQuickOatCodePointer(pointer_size) != nullptr
         && GetNativeGcMap(pointer_size) == nullptr;
   }
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a58ecab..7cc3e57 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -19,6 +19,7 @@
 
 #include "base/bit_vector.h"
 #include "memory_region.h"
+#include "utils.h"
 
 namespace art {
 
@@ -199,6 +200,11 @@
        && region_.size() == other.region_.size();
   }
 
+  static size_t ComputeAlignedStackMapSize(size_t stack_mask_size) {
+    // On ARM, the stack maps must be 4-byte aligned.
+    return RoundUp(StackMap::kFixedSize + stack_mask_size, 4);
+  }
+
  private:
   static constexpr int kDexPcOffset = 0;
   static constexpr int kNativePcOffsetOffset = kDexPcOffset + sizeof(uint32_t);
@@ -262,7 +268,7 @@
   }
 
   size_t StackMapSize() const {
-    return StackMap::kFixedSize + GetStackMaskSize();
+    return StackMap::ComputeAlignedStackMapSize(GetStackMaskSize());
   }
 
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, uint32_t number_of_dex_registers) {