Cache memory regions in ProcessMinidump and use the linux maps as the source of the information if available

Breakpad creates minidump files that sometimes have:
- linux maps textual content
- no MemoryInfoList

Right now unless the file has a MemoryInfoList we get no region information.

This patch:

- reads and caches the memory region info one time and sorts it for easy subsequent access
- get the region info from the best source in this order:
  - linux maps info (if available)
  - MemoryInfoList (if available)
  - MemoryList or Memory64List
- returns memory region info for the gaps between regions (before the first and after the last)

Differential Revision: https://reviews.llvm.org/D55522

llvm-svn: 349182
diff --git a/lldb/source/Plugins/Process/Utility/CMakeLists.txt b/lldb/source/Plugins/Process/Utility/CMakeLists.txt
index b43756a..e36ce4d 100644
--- a/lldb/source/Plugins/Process/Utility/CMakeLists.txt
+++ b/lldb/source/Plugins/Process/Utility/CMakeLists.txt
@@ -5,6 +5,7 @@
   HistoryThread.cpp
   HistoryUnwind.cpp
   InferiorCallPOSIX.cpp
+  LinuxProcMaps.cpp
   LinuxSignals.cpp
   MipsLinuxSignals.cpp
   NativeRegisterContextRegisterInfo.cpp
diff --git a/lldb/source/Plugins/Process/Utility/LinuxProcMaps.cpp b/lldb/source/Plugins/Process/Utility/LinuxProcMaps.cpp
new file mode 100644
index 0000000..d45bf6d
--- /dev/null
+++ b/lldb/source/Plugins/Process/Utility/LinuxProcMaps.cpp
@@ -0,0 +1,113 @@
+//===-- LinuxProcMaps.cpp ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LinuxProcMaps.h"
+#include "llvm/ADT/StringRef.h"
+#include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Utility/Status.h"
+#include "lldb/Utility/StringExtractor.h"
+
+using namespace lldb_private;
+
+static Status
+ParseMemoryRegionInfoFromProcMapsLine(llvm::StringRef maps_line,
+                                      MemoryRegionInfo &memory_region_info) {
+  memory_region_info.Clear();
+  
+  StringExtractor line_extractor(maps_line);
+  
+  // Format: {address_start_hex}-{address_end_hex} perms offset  dev   inode
+  // pathname perms: rwxp   (letter is present if set, '-' if not, final
+  // character is p=private, s=shared).
+  
+  // Parse out the starting address
+  lldb::addr_t start_address = line_extractor.GetHexMaxU64(false, 0);
+  
+  // Parse out hyphen separating start and end address from range.
+  if (!line_extractor.GetBytesLeft() || (line_extractor.GetChar() != '-'))
+    return Status(
+        "malformed /proc/{pid}/maps entry, missing dash between address range");
+  
+  // Parse out the ending address
+  lldb::addr_t end_address = line_extractor.GetHexMaxU64(false, start_address);
+  
+  // Parse out the space after the address.
+  if (!line_extractor.GetBytesLeft() || (line_extractor.GetChar() != ' '))
+    return Status(
+        "malformed /proc/{pid}/maps entry, missing space after range");
+  
+  // Save the range.
+  memory_region_info.GetRange().SetRangeBase(start_address);
+  memory_region_info.GetRange().SetRangeEnd(end_address);
+  
+  // Any memory region in /proc/{pid}/maps is by definition mapped into the
+  // process.
+  memory_region_info.SetMapped(MemoryRegionInfo::OptionalBool::eYes);
+  
+  // Parse out each permission entry.
+  if (line_extractor.GetBytesLeft() < 4)
+    return Status("malformed /proc/{pid}/maps entry, missing some portion of "
+                  "permissions");
+  
+  // Handle read permission.
+  const char read_perm_char = line_extractor.GetChar();
+  if (read_perm_char == 'r')
+    memory_region_info.SetReadable(MemoryRegionInfo::OptionalBool::eYes);
+  else if (read_perm_char == '-')
+    memory_region_info.SetReadable(MemoryRegionInfo::OptionalBool::eNo);
+  else
+    return Status("unexpected /proc/{pid}/maps read permission char");
+  
+  // Handle write permission.
+  const char write_perm_char = line_extractor.GetChar();
+  if (write_perm_char == 'w')
+    memory_region_info.SetWritable(MemoryRegionInfo::OptionalBool::eYes);
+  else if (write_perm_char == '-')
+    memory_region_info.SetWritable(MemoryRegionInfo::OptionalBool::eNo);
+  else
+    return Status("unexpected /proc/{pid}/maps write permission char");
+  
+  // Handle execute permission.
+  const char exec_perm_char = line_extractor.GetChar();
+  if (exec_perm_char == 'x')
+    memory_region_info.SetExecutable(MemoryRegionInfo::OptionalBool::eYes);
+  else if (exec_perm_char == '-')
+    memory_region_info.SetExecutable(MemoryRegionInfo::OptionalBool::eNo);
+  else
+    return Status("unexpected /proc/{pid}/maps exec permission char");
+  
+  line_extractor.GetChar();              // Read the private bit
+  line_extractor.SkipSpaces();           // Skip the separator
+  line_extractor.GetHexMaxU64(false, 0); // Read the offset
+  line_extractor.GetHexMaxU64(false, 0); // Read the major device number
+  line_extractor.GetChar();              // Read the device id separator
+  line_extractor.GetHexMaxU64(false, 0); // Read the major device number
+  line_extractor.SkipSpaces();           // Skip the separator
+  line_extractor.GetU64(0, 10);          // Read the inode number
+  
+  line_extractor.SkipSpaces();
+  const char *name = line_extractor.Peek();
+  if (name)
+    memory_region_info.SetName(name);
+  
+  return Status();
+}
+
+void lldb_private::ParseLinuxMapRegions(llvm::StringRef linux_map,
+                                        LinuxMapCallback const &callback) {
+  llvm::StringRef lines(linux_map);
+  llvm::StringRef line;
+  while (!lines.empty()) {
+    std::tie(line, lines) = lines.split('\n');
+    MemoryRegionInfo region;
+    Status error = ParseMemoryRegionInfoFromProcMapsLine(line, region);
+    if (!callback(region, error))
+      break;
+  }
+}
diff --git a/lldb/source/Plugins/Process/Utility/LinuxProcMaps.h b/lldb/source/Plugins/Process/Utility/LinuxProcMaps.h
new file mode 100644
index 0000000..e6eabb2
--- /dev/null
+++ b/lldb/source/Plugins/Process/Utility/LinuxProcMaps.h
@@ -0,0 +1,28 @@
+//===-- LinuxProcMaps.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef liblldb_LinuxProcMaps_H_
+#define liblldb_LinuxProcMaps_H_
+
+#include "lldb/lldb-forward.h"
+#include "llvm/ADT/StringRef.h"
+#include <functional>
+
+
+namespace lldb_private {
+
+typedef std::function<bool(const lldb_private::MemoryRegionInfo &,
+                           const lldb_private::Status &)> LinuxMapCallback;
+
+void ParseLinuxMapRegions(llvm::StringRef linux_map,
+                          LinuxMapCallback const &callback);
+
+} // namespace lldb_private
+
+#endif // liblldb_LinuxProcMaps_H_
diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
index adaa01c..bae5989 100644
--- a/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
+++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.cpp
@@ -13,12 +13,14 @@
 
 #include "lldb/Target/MemoryRegionInfo.h"
 #include "lldb/Utility/LLDBAssert.h"
+#include "Plugins/Process/Utility/LinuxProcMaps.h"
 
 // C includes
 // C++ includes
 #include <algorithm>
 #include <map>
 #include <vector>
+#include <utility>
 
 using namespace lldb_private;
 using namespace minidump;
@@ -410,72 +412,147 @@
   return range->range_ref.slice(offset, overlap);
 }
 
-llvm::Optional<MemoryRegionInfo>
-MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
-  MemoryRegionInfo info;
-  llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList);
+static bool
+CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser,
+                                std::vector<MemoryRegionInfo> &regions) {
+  auto data = parser.GetStream(MinidumpStreamType::LinuxMaps);
   if (data.empty())
-    return llvm::None;
+    return false;
+  ParseLinuxMapRegions(llvm::toStringRef(data),
+                       [&](const lldb_private::MemoryRegionInfo &region,
+                           const lldb_private::Status &status) -> bool {
+    if (status.Success())
+      regions.push_back(region);
+    return true;
+  });
+  return !regions.empty();
+}
 
-  std::vector<const MinidumpMemoryInfo *> mem_info_list =
-      MinidumpMemoryInfo::ParseMemoryInfoList(data);
+static bool
+CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser,
+                                     std::vector<MemoryRegionInfo> &regions) {
+  auto data = parser.GetStream(MinidumpStreamType::MemoryInfoList);
+  if (data.empty())
+    return false;
+  auto mem_info_list = MinidumpMemoryInfo::ParseMemoryInfoList(data);
   if (mem_info_list.empty())
-    return llvm::None;
-
-  const auto yes = MemoryRegionInfo::eYes;
-  const auto no = MemoryRegionInfo::eNo;
-
-  const MinidumpMemoryInfo *next_entry = nullptr;
+    return false;
+  constexpr auto yes = MemoryRegionInfo::eYes;
+  constexpr auto no = MemoryRegionInfo::eNo;
+  regions.reserve(mem_info_list.size());
   for (const auto &entry : mem_info_list) {
-    const auto head = entry->base_address;
-    const auto tail = head + entry->region_size;
-
-    if (head <= load_addr && load_addr < tail) {
-      info.GetRange().SetRangeBase(
-          (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree))
-              ? head
-              : load_addr);
-      info.GetRange().SetRangeEnd(tail);
-
-      const uint32_t PageNoAccess =
-          static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess);
-      info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no);
-
-      const uint32_t PageWritable =
-          static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable);
-      info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no);
-
-      const uint32_t PageExecutable = static_cast<uint32_t>(
-          MinidumpMemoryProtectionContants::PageExecutable);
-      info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no);
-
-      const uint32_t MemFree =
-          static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
-      info.SetMapped((entry->state != MemFree) ? yes : no);
-
-      return info;
-    } else if (head > load_addr &&
-               (next_entry == nullptr || head < next_entry->base_address)) {
-      // In case there is no region containing load_addr keep track of the
-      // nearest region after load_addr so we can return the distance to it.
-      next_entry = entry;
-    }
+    MemoryRegionInfo region;
+    region.GetRange().SetRangeBase(entry->base_address);
+    region.GetRange().SetByteSize(entry->region_size);
+    region.SetReadable(entry->isReadable() ? yes : no);
+    region.SetWritable(entry->isWritable() ? yes : no);
+    region.SetExecutable(entry->isExecutable() ? yes : no);
+    region.SetMapped(entry->isMapped() ? yes : no);
+    regions.push_back(region);
   }
+  return !regions.empty();
+}
 
-  // No containing region found. Create an unmapped region that extends to the
-  // next region or LLDB_INVALID_ADDRESS
-  info.GetRange().SetRangeBase(load_addr);
-  info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address
-                                                      : LLDB_INVALID_ADDRESS);
-  info.SetReadable(no);
-  info.SetWritable(no);
-  info.SetExecutable(no);
-  info.SetMapped(no);
+static bool
+CreateRegionsCacheFromMemoryList(MinidumpParser &parser,
+                                 std::vector<MemoryRegionInfo> &regions) {
+  auto data = parser.GetStream(MinidumpStreamType::MemoryList);
+  if (data.empty())
+    return false;
+  auto memory_list = MinidumpMemoryDescriptor::ParseMemoryList(data);
+  if (memory_list.empty())
+    return false;
+  regions.reserve(memory_list.size());
+  for (const auto &memory_desc : memory_list) {
+    if (memory_desc.memory.data_size == 0)
+      continue;
+    MemoryRegionInfo region;
+    region.GetRange().SetRangeBase(memory_desc.start_of_memory_range);
+    region.GetRange().SetByteSize(memory_desc.memory.data_size);
+    region.SetReadable(MemoryRegionInfo::eYes);
+    region.SetMapped(MemoryRegionInfo::eYes);
+    regions.push_back(region);
+  }
+  regions.shrink_to_fit();
+  return !regions.empty();
+}
 
-  // Note that the memory info list doesn't seem to contain ranges in kernel
-  // space, so if you're walking a stack that has kernel frames, the stack may
-  // appear truncated.
-  return info;
+static bool
+CreateRegionsCacheFromMemory64List(MinidumpParser &parser,
+                                   std::vector<MemoryRegionInfo> &regions) {
+  llvm::ArrayRef<uint8_t> data =
+      parser.GetStream(MinidumpStreamType::Memory64List);
+  if (data.empty())
+    return false;
+  llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
+  uint64_t base_rva;
+  std::tie(memory64_list, base_rva) =
+      MinidumpMemoryDescriptor64::ParseMemory64List(data);
+  
+  if (memory64_list.empty())
+    return false;
+    
+  regions.reserve(memory64_list.size());
+  for (const auto &memory_desc : memory64_list) {
+    if (memory_desc.data_size == 0)
+      continue;
+    MemoryRegionInfo region;
+    region.GetRange().SetRangeBase(memory_desc.start_of_memory_range);
+    region.GetRange().SetByteSize(memory_desc.data_size);
+    region.SetReadable(MemoryRegionInfo::eYes);
+    region.SetMapped(MemoryRegionInfo::eYes);
+    regions.push_back(region);
+  }
+  regions.shrink_to_fit();
+  return !regions.empty();
+}
+
+MemoryRegionInfo
+MinidumpParser::FindMemoryRegion(lldb::addr_t load_addr) const {
+  auto begin = m_regions.begin();
+  auto end = m_regions.end();
+  auto pos = std::lower_bound(begin, end, load_addr);
+  if (pos != end && pos->GetRange().Contains(load_addr))
+    return *pos;
+  
+  MemoryRegionInfo region;
+  if (pos == begin)
+    region.GetRange().SetRangeBase(0);
+  else {
+    auto prev = pos - 1;
+    if (prev->GetRange().Contains(load_addr))
+      return *prev;
+    region.GetRange().SetRangeBase(prev->GetRange().GetRangeEnd());
+  }
+  if (pos == end)
+    region.GetRange().SetRangeEnd(UINT64_MAX);
+  else
+    region.GetRange().SetRangeEnd(pos->GetRange().GetRangeBase());
+  region.SetReadable(MemoryRegionInfo::eNo);
+  region.SetWritable(MemoryRegionInfo::eNo);
+  region.SetExecutable(MemoryRegionInfo::eNo);
+  region.SetMapped(MemoryRegionInfo::eNo);
+  return region;
+}
+
+MemoryRegionInfo
+MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
+  if (!m_parsed_regions) {
+    m_parsed_regions = true;
+    // We haven't cached our memory regions yet we will create the region cache
+    // once. We create the region cache using the best source. We start with
+    // the linux maps since they are the most complete and have names for the
+    // regions. Next we try the MemoryInfoList since it has
+    // read/write/execute/map data, and then fall back to the MemoryList and
+    // Memory64List to just get a list of the memory that is mapped in this
+    // core file
+    if (!CreateRegionsCacheFromLinuxMaps(*this, m_regions))
+      if (!CreateRegionsCacheFromMemoryInfoList(*this, m_regions))
+        if (!CreateRegionsCacheFromMemoryList(*this, m_regions))
+          CreateRegionsCacheFromMemory64List(*this, m_regions);
+    std::sort(m_regions.begin(), m_regions.end());
+  }
+  return FindMemoryRegion(load_addr);
 }
 
 Status MinidumpParser::Initialize() {
diff --git a/lldb/source/Plugins/Process/minidump/MinidumpParser.h b/lldb/source/Plugins/Process/minidump/MinidumpParser.h
index c08fe91..bc819cd 100644
--- a/lldb/source/Plugins/Process/minidump/MinidumpParser.h
+++ b/lldb/source/Plugins/Process/minidump/MinidumpParser.h
@@ -1,5 +1,4 @@
-//===-- MinidumpParser.h -----------------------------------------*- C++
-//-*-===//
+//===-- MinidumpParser.h -----------------------------------------*- C++-*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -86,7 +85,7 @@
 
   llvm::ArrayRef<uint8_t> GetMemory(lldb::addr_t addr, size_t size);
 
-  llvm::Optional<MemoryRegionInfo> GetMemoryRegionInfo(lldb::addr_t);
+  MemoryRegionInfo GetMemoryRegionInfo(lldb::addr_t load_addr);
 
   // Perform consistency checks and initialize internal data structures
   Status Initialize();
@@ -94,10 +93,14 @@
 private:
   MinidumpParser(const lldb::DataBufferSP &data_buf_sp);
 
+  MemoryRegionInfo FindMemoryRegion(lldb::addr_t load_addr) const;
+
 private:
   lldb::DataBufferSP m_data_sp;
   llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> m_directory_map;
   ArchSpec m_arch;
+  std::vector<MemoryRegionInfo> m_regions;
+  bool m_parsed_regions = false;
 };
 
 } // end namespace minidump
diff --git a/lldb/source/Plugins/Process/minidump/MinidumpTypes.h b/lldb/source/Plugins/Process/minidump/MinidumpTypes.h
index b5952f9..82e126d 100644
--- a/lldb/source/Plugins/Process/minidump/MinidumpTypes.h
+++ b/lldb/source/Plugins/Process/minidump/MinidumpTypes.h
@@ -256,25 +256,6 @@
 static_assert(sizeof(MinidumpMemoryInfoListHeader) == 16,
               "sizeof MinidumpMemoryInfoListHeader is not correct!");
 
-// Reference:
-// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680386(v=vs.85).aspx
-struct MinidumpMemoryInfo {
-  llvm::support::ulittle64_t base_address;
-  llvm::support::ulittle64_t allocation_base;
-  llvm::support::ulittle32_t allocation_protect;
-  llvm::support::ulittle32_t alignment1;
-  llvm::support::ulittle64_t region_size;
-  llvm::support::ulittle32_t state;
-  llvm::support::ulittle32_t protect;
-  llvm::support::ulittle32_t type;
-  llvm::support::ulittle32_t alignment2;
-
-  static std::vector<const MinidumpMemoryInfo *>
-  ParseMemoryInfoList(llvm::ArrayRef<uint8_t> &data);
-};
-static_assert(sizeof(MinidumpMemoryInfo) == 48,
-              "sizeof MinidumpMemoryInfo is not correct!");
-
 enum class MinidumpMemoryInfoState : uint32_t {
   MemCommit = 0x1000,
   MemFree = 0x10000,
@@ -311,6 +292,45 @@
 };
 
 // Reference:
+// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680386(v=vs.85).aspx
+struct MinidumpMemoryInfo {
+  llvm::support::ulittle64_t base_address;
+  llvm::support::ulittle64_t allocation_base;
+  llvm::support::ulittle32_t allocation_protect;
+  llvm::support::ulittle32_t alignment1;
+  llvm::support::ulittle64_t region_size;
+  llvm::support::ulittle32_t state;
+  llvm::support::ulittle32_t protect;
+  llvm::support::ulittle32_t type;
+  llvm::support::ulittle32_t alignment2;
+
+  static std::vector<const MinidumpMemoryInfo *>
+  ParseMemoryInfoList(llvm::ArrayRef<uint8_t> &data);
+
+  bool isReadable() const {
+    const auto mask = MinidumpMemoryProtectionContants::PageNoAccess;
+    return (static_cast<uint32_t>(mask) & protect) == 0;
+  }
+
+  bool isWritable() const {
+    const auto mask = MinidumpMemoryProtectionContants::PageWritable;
+    return (static_cast<uint32_t>(mask) & protect) != 0;
+  }
+
+  bool isExecutable() const {
+    const auto mask = MinidumpMemoryProtectionContants::PageExecutable;
+    return (static_cast<uint32_t>(mask) & protect) != 0;
+  }
+  
+  bool isMapped() const {
+    return state != static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
+  }
+};
+
+static_assert(sizeof(MinidumpMemoryInfo) == 48,
+              "sizeof MinidumpMemoryInfo is not correct!");
+
+// Reference:
 // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680517(v=vs.85).aspx
 struct MinidumpThread {
   llvm::support::ulittle32_t thread_id;
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index 97e1376..1744b65 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -284,14 +284,8 @@
 
 Status ProcessMinidump::GetMemoryRegionInfo(lldb::addr_t load_addr,
                                             MemoryRegionInfo &range_info) {
-  Status error;
-  auto info = m_minidump_parser.GetMemoryRegionInfo(load_addr);
-  if (!info) {
-    error.SetErrorString("No valid MemoryRegionInfo found!");
-    return error;
-  }
-  range_info = info.getValue();
-  return error;
+  range_info = m_minidump_parser.GetMemoryRegionInfo(load_addr);
+  return Status();
 }
 
 void ProcessMinidump::Clear() { Process::m_thread_list.Clear(); }