[codeview,pdb] Try really hard to conserve memory when reading.

PDBs can be extremely large.  We're already mapping the entire
PDB into the process's address space, but to make matters worse
the blocks of the PDB are not arranged contiguously.  So, when
we have something like an array or a string embedded into the
stream, we have to make a copy.  Since it's convenient to use
traditional data structures to iterate and manipulate these
records, we need the memory to be contiguous.

As a result of this, we were using roughly twice as much memory
as the file size of the PDB, because every stream was copied
out and re-stitched together contiguously.

This patch addresses this by improving the MappedBlockStream
to allocate from a BumpPtrAllocator only when a read requires
a discontiguous read.  Furthermore, it introduces some data
structures backed by a stream which can iterate over both
fixed and variable length records of a PDB.  Since everything
is backed by a stream and not a buffer, we can read almost
everything from the PDB with zero copies.

Differential Revision: http://reviews.llvm.org/D20654
Reviewed By: ruiu

llvm-svn: 270951
diff --git a/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp
index 09d7198..a3db147 100644
--- a/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp
@@ -23,6 +23,69 @@
   }
 }
 
+Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+                                   ArrayRef<uint8_t> &Buffer) const {
+  // Make sure we aren't trying to read beyond the end of the stream.
+  if (Buffer.size() > StreamLength)
+    return make_error<RawError>(raw_error_code::insufficient_buffer);
+  if (Offset > StreamLength - Buffer.size())
+    return make_error<RawError>(raw_error_code::insufficient_buffer);
+
+  if (tryReadContiguously(Offset, Size, Buffer))
+    return Error::success();
+
+  auto CacheIter = CacheMap.find(Offset);
+  if (CacheIter != CacheMap.end()) {
+    // In a more general solution, we would need to guarantee that the
+    // cached allocation is at least the requested size.  In practice, since
+    // these are CodeView / PDB records, we know they are always formatted
+    // the same way and never change, so we should never be requesting two
+    // allocations from the same address with different sizes.
+    Buffer = ArrayRef<uint8_t>(CacheIter->second, Size);
+    return Error::success();
+  }
+
+  // Otherwise allocate a large enough buffer in the pool, memcpy the data
+  // into it, and return an ArrayRef to that.
+  uint8_t *WriteBuffer = Pool.Allocate<uint8_t>(Size);
+
+  if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size)))
+    return EC;
+  CacheMap.insert(std::make_pair(Offset, WriteBuffer));
+  Buffer = ArrayRef<uint8_t>(WriteBuffer, Size);
+  return Error::success();
+}
+
+bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
+                                            ArrayRef<uint8_t> &Buffer) const {
+  // Attempt to fulfill the request with a reference directly into the stream.
+  // This can work even if the request crosses a block boundary, provided that
+  // all subsequent blocks are contiguous.  For example, a 10k read with a 4k
+  // block size can be filled with a reference if, from the starting offset,
+  // 3 blocks in a row are contiguous.
+  uint32_t BlockNum = Offset / Pdb.getBlockSize();
+  uint32_t OffsetInBlock = Offset % Pdb.getBlockSize();
+  uint32_t BytesFromFirstBlock =
+      std::min(Size, Pdb.getBlockSize() - OffsetInBlock);
+  uint32_t NumAdditionalBlocks =
+      llvm::alignTo(Size - BytesFromFirstBlock, Pdb.getBlockSize()) /
+      Pdb.getBlockSize();
+
+  uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
+  uint32_t E = BlockList[BlockNum];
+  for (uint32_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
+    if (BlockList[I + BlockNum] != E)
+      return false;
+  }
+
+  uint32_t FirstBlockAddr = BlockList[BlockNum];
+  StringRef Str = Pdb.getBlockData(FirstBlockAddr, Pdb.getBlockSize());
+  Str = Str.drop_front(OffsetInBlock);
+  Buffer =
+      ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Str.data()), Size);
+  return true;
+}
+
 Error MappedBlockStream::readBytes(uint32_t Offset,
                                    MutableArrayRef<uint8_t> Buffer) const {
   uint32_t BlockNum = Offset / Pdb.getBlockSize();
@@ -54,27 +117,5 @@
   }
 
   return Error::success();
-}
 
-Error MappedBlockStream::getArrayRef(uint32_t Offset, ArrayRef<uint8_t> &Buffer,
-                                     uint32_t Length) const {
-  uint32_t BlockNum = Offset / Pdb.getBlockSize();
-  uint32_t OffsetInBlock = Offset % Pdb.getBlockSize();
-  uint32_t BytesAvailableInBlock = Pdb.getBlockSize() - OffsetInBlock;
-
-  // If this is the last block in the stream, not all of the data is valid.
-  if (BlockNum == BlockList.size() - 1) {
-    uint32_t AllocatedBytesInBlock = StreamLength % Pdb.getBlockSize();
-    if (AllocatedBytesInBlock < BytesAvailableInBlock)
-      BytesAvailableInBlock = AllocatedBytesInBlock;
-  }
-  if (BytesAvailableInBlock < Length)
-    return make_error<RawError>(raw_error_code::feature_unsupported);
-
-  uint32_t StreamBlockAddr = BlockList[BlockNum];
-  StringRef Data = Pdb.getBlockData(StreamBlockAddr, Pdb.getBlockSize());
-  Data = Data.substr(OffsetInBlock, Length);
-
-  Buffer = ArrayRef<uint8_t>(Data.bytes_begin(), Data.bytes_end());
-  return Error::success();
 }