Bitcode: Change reader interface to take memory buffers.

As proposed on llvm-dev:
http://lists.llvm.org/pipermail/llvm-dev/2016-October/106595.html

This change also fixes an API oddity where BitstreamCursor::Read() would
return zero for the first read past the end of the bitstream, but would
report_fatal_error for subsequent reads. Now we always report_fatal_error
for all reads past the end. Updated clients to check for the end of the
bitstream before reading from it.

I also needed to add padding to the invalid bitcode tests in
test/Bitcode/. This is because the streaming interface was not checking that
the file size is a multiple of 4.

Differential Revision: https://reviews.llvm.org/D26219

llvm-svn: 285773
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f224b04..710187a 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -61,14 +61,12 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataStream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/StreamingMemoryObject.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -232,29 +230,19 @@
 
 class BitcodeReaderBase {
 protected:
-  BitcodeReaderBase() = default;
   BitcodeReaderBase(MemoryBuffer *Buffer) : Buffer(Buffer) {}
 
   std::unique_ptr<MemoryBuffer> Buffer;
   std::unique_ptr<BitstreamReader> StreamFile;
   BitstreamCursor Stream;
 
-  std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
-  std::error_code initStreamFromBuffer();
-  std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
+  std::error_code initStream();
 
   virtual std::error_code error(const Twine &Message) = 0;
   virtual ~BitcodeReaderBase() = default;
 };
 
-std::error_code
-BitcodeReaderBase::initStream(std::unique_ptr<DataStreamer> Streamer) {
-  if (Streamer)
-    return initLazyStream(std::move(Streamer));
-  return initStreamFromBuffer();
-}
-
-std::error_code BitcodeReaderBase::initStreamFromBuffer() {
+std::error_code BitcodeReaderBase::initStream() {
   const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
   const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
 
@@ -267,39 +255,12 @@
     if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
       return error("Invalid bitcode wrapper header");
 
-  StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+  StreamFile.reset(new BitstreamReader(ArrayRef<uint8_t>(BufPtr, BufEnd)));
   Stream.init(&*StreamFile);
 
   return std::error_code();
 }
 
-std::error_code
-BitcodeReaderBase::initLazyStream(std::unique_ptr<DataStreamer> Streamer) {
-  // Check and strip off the bitcode wrapper; BitstreamReader expects never to
-  // see it.
-  auto OwnedBytes =
-      llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
-  StreamingMemoryObject &Bytes = *OwnedBytes;
-  StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
-  Stream.init(&*StreamFile);
-
-  unsigned char buf[16];
-  if (Bytes.readBytes(buf, 16, 0) != 16)
-    return error("Invalid bitcode signature");
-
-  if (!isBitcode(buf, buf + 16))
-    return error("Invalid bitcode signature");
-
-  if (isBitcodeWrapper(buf, buf + 4)) {
-    const unsigned char *bitcodeStart = buf;
-    const unsigned char *bitcodeEnd = buf + 16;
-    SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
-    Bytes.dropLeadingBytes(bitcodeStart - buf);
-    Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart);
-  }
-  return std::error_code();
-}
-
 class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
   LLVMContext &Context;
   Module *TheModule = nullptr;
@@ -399,7 +360,6 @@
   std::error_code error(const Twine &Message) override;
 
   BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context);
-  BitcodeReader(LLVMContext &Context);
   ~BitcodeReader() override { freeState(); }
 
   std::error_code materializeForwardReferencedFunctions();
@@ -414,8 +374,7 @@
 
   /// \brief Main interface to parsing a bitcode buffer.
   /// \returns true if an error occurred.
-  std::error_code parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
-                                   Module *M,
+  std::error_code parseBitcodeInto(Module *M,
                                    bool ShouldLazyLoadMetadata = false);
 
   /// \brief Cheap mechanism to just extract module triple
@@ -638,8 +597,7 @@
 
   /// \brief Main interface to parsing a bitcode buffer.
   /// \returns true if an error occurred.
-  std::error_code parseSummaryIndexInto(std::unique_ptr<DataStreamer> Streamer,
-                                        ModuleSummaryIndex *I);
+  std::error_code parseSummaryIndexInto(ModuleSummaryIndex *I);
 
 private:
   std::error_code parseModule();
@@ -706,9 +664,6 @@
     : BitcodeReaderBase(Buffer), Context(Context), ValueList(Context),
       MetadataList(Context) {}
 
-BitcodeReader::BitcodeReader(LLVMContext &Context)
-    : Context(Context), ValueList(Context), MetadataList(Context) {}
-
 std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
     return std::error_code();
@@ -2166,10 +2121,6 @@
   SimpleBitstreamCursor R(*StreamFile);
   R.jumpToPointer(Lengths.begin());
 
-  // Ensure that Blob doesn't get invalidated, even if this is reading from
-  // a StreamingMemoryObject with corrupt data.
-  R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
-
   StringRef Strings = Blob.drop_front(StringsOffset);
   do {
     if (R.AtEndOfStream())
@@ -4203,12 +4154,11 @@
   return true;
 }
 
-std::error_code
-BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
-                                Module *M, bool ShouldLazyLoadMetadata) {
+std::error_code BitcodeReader::parseBitcodeInto(Module *M,
+                                                bool ShouldLazyLoadMetadata) {
   TheModule = M;
 
-  if (std::error_code EC = initStream(std::move(Streamer)))
+  if (std::error_code EC = initStream())
     return EC;
 
   // Sniff for the signature.
@@ -4282,7 +4232,7 @@
 }
 
 ErrorOr<std::string> BitcodeReader::parseTriple() {
-  if (std::error_code EC = initStream(nullptr))
+  if (std::error_code EC = initStream())
     return EC;
 
   // Sniff for the signature.
@@ -4317,7 +4267,7 @@
 }
 
 ErrorOr<std::string> BitcodeReader::parseIdentificationBlock() {
-  if (std::error_code EC = initStream(nullptr))
+  if (std::error_code EC = initStream())
     return EC;
 
   // Sniff for the signature.
@@ -4367,7 +4317,7 @@
 }
 
 ErrorOr<bool> BitcodeReader::hasObjCCategory() {
-  if (std::error_code EC = initStream(nullptr))
+  if (std::error_code EC = initStream())
     return EC;
 
   // Sniff for the signature.
@@ -5955,7 +5905,8 @@
 ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
     MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler,
     bool CheckGlobalValSummaryPresenceOnly)
-    : BitcodeReaderBase(Buffer), DiagnosticHandler(std::move(DiagnosticHandler)),
+    : BitcodeReaderBase(Buffer),
+      DiagnosticHandler(std::move(DiagnosticHandler)),
       CheckGlobalValSummaryPresenceOnly(CheckGlobalValSummaryPresenceOnly) {}
 
 void ModuleSummaryIndexBitcodeReader::freeState() { Buffer = nullptr; }
@@ -6555,11 +6506,11 @@
 }
 
 // Parse the function info index from the bitcode streamer into the given index.
-std::error_code ModuleSummaryIndexBitcodeReader::parseSummaryIndexInto(
-    std::unique_ptr<DataStreamer> Streamer, ModuleSummaryIndex *I) {
+std::error_code
+ModuleSummaryIndexBitcodeReader::parseSummaryIndexInto(ModuleSummaryIndex *I) {
   TheIndex = I;
 
-  if (std::error_code EC = initStream(std::move(Streamer)))
+  if (std::error_code EC = initStream())
     return EC;
 
   // Sniff for the signature.
@@ -6624,8 +6575,7 @@
 //===----------------------------------------------------------------------===//
 
 static ErrorOr<std::unique_ptr<Module>>
-getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
-                     BitcodeReader *R, LLVMContext &Context,
+getBitcodeModuleImpl(StringRef Name, BitcodeReader *R, LLVMContext &Context,
                      bool MaterializeAll, bool ShouldLazyLoadMetadata) {
   std::unique_ptr<Module> M = llvm::make_unique<Module>(Name, Context);
   M->setMaterializer(R);
@@ -6636,8 +6586,7 @@
   };
 
   // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
-  if (std::error_code EC = R->parseBitcodeInto(std::move(Streamer), M.get(),
-                                               ShouldLazyLoadMetadata))
+  if (std::error_code EC = R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata))
     return cleanupOnError(EC);
 
   if (MaterializeAll) {
@@ -6667,7 +6616,7 @@
   BitcodeReader *R = new BitcodeReader(Buffer.get(), Context);
 
   ErrorOr<std::unique_ptr<Module>> Ret =
-      getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context,
+      getBitcodeModuleImpl(Buffer->getBufferIdentifier(), R, Context,
                            MaterializeAll, ShouldLazyLoadMetadata);
   if (!Ret)
     return Ret;
@@ -6683,17 +6632,6 @@
                                   ShouldLazyLoadMetadata);
 }
 
-ErrorOr<std::unique_ptr<Module>>
-llvm::getStreamedBitcodeModule(StringRef Name,
-                               std::unique_ptr<DataStreamer> Streamer,
-                               LLVMContext &Context) {
-  std::unique_ptr<Module> M = llvm::make_unique<Module>(Name, Context);
-  BitcodeReader *R = new BitcodeReader(Context);
-
-  return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false,
-                              false);
-}
-
 ErrorOr<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
                                                         LLVMContext &Context) {
   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
@@ -6746,7 +6684,7 @@
     return EC;
   };
 
-  if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get()))
+  if (std::error_code EC = R.parseSummaryIndexInto(Index.get()))
     return cleanupOnError(EC);
 
   Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now.
@@ -6765,7 +6703,7 @@
     return false;
   };
 
-  if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr))
+  if (std::error_code EC = R.parseSummaryIndexInto(nullptr))
     return cleanupOnError(EC);
 
   Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now.