Enable streaming of bitcode

This CL delays reading of function bodies from initial parse until
materialization, allowing overlap of compilation with bitcode download.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149918 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 322d32f..6cec47d 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -16,6 +16,7 @@
   ConstantRange.cpp
   CrashRecoveryContext.cpp
   DataExtractor.cpp
+  DataStream.cpp
   Debug.cpp
   DeltaAlgorithm.cpp
   DAGDeltaAlgorithm.cpp
@@ -42,6 +43,7 @@
   SmallVector.cpp
   SourceMgr.cpp
   Statistic.cpp
+  StreamableMemoryObject.cpp
   StringExtras.cpp
   StringMap.cpp
   StringPool.cpp
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
new file mode 100644
index 0000000..fa8edc7
--- /dev/null
+++ b/lib/Support/DataStream.cpp
@@ -0,0 +1,96 @@
+//===--- llvm/Support/DataStream.cpp - Lazy streamed Data               ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DataStreamer, which fetches bytes of Data from
+// a stream source. It provides support for streaming (lazy reading) of
+// bitcode. An example implementation of streaming from a file or stdin
+// is included.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "Data-stream"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <cerrno>
+#include <cstdio>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+// Interface goals:
+// * StreamableMemoryObject doesn't care about complexities like using
+//   threads/async callbacks to actually overlap download+compile
+// * Don't want to duplicate Data in memory
+// * Don't need to know total Data len in advance
+// Non-goals:
+// StreamableMemoryObject already has random access so this interface only does
+// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
+// Data here in addition to MemoryObject).  This also means that if we want
+// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
+
+STATISTIC(NumStreamFetches, "Number of calls to Data stream fetch");
+
+namespace llvm {
+DataStreamer::~DataStreamer() {}
+}
+
+namespace {
+
+const static error_code success;
+
+// Very simple stream backed by a file. Mostly useful for stdin and debugging;
+// actual file access is probably still best done with mmap.
+class DataFileStreamer : public DataStreamer {
+ int Fd;
+public:
+  DataFileStreamer() : Fd(0) {}
+  virtual ~DataFileStreamer() {
+    close(Fd);
+  }
+  virtual size_t GetBytes(unsigned char *buf, size_t len) {
+    NumStreamFetches++;
+    return read(Fd, buf, len);
+  }
+
+  error_code OpenFile(const std::string &Filename) {
+    int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+    OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
+#endif
+    if (Filename == "-")
+      Fd = 0;
+    else
+      Fd = ::open(Filename.c_str(), OpenFlags);
+    if (Fd == -1) return error_code(errno, posix_category());
+      return success;
+  }
+};
+
+}
+
+namespace llvm {
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+                                  std::string *StrError) {
+  DataFileStreamer *s = new DataFileStreamer();
+  error_code e = s->OpenFile(Filename);
+  if (e != success) {
+    *StrError = std::string("Could not open ") + Filename + ": " +
+        e.message() + "\n";
+    return NULL;
+  }
+  return s;
+}
+
+}
diff --git a/lib/Support/MemoryObject.cpp b/lib/Support/MemoryObject.cpp
index b20ab89..c82f46a 100644
--- a/lib/Support/MemoryObject.cpp
+++ b/lib/Support/MemoryObject.cpp
@@ -16,7 +16,7 @@
 int MemoryObject::readBytes(uint64_t address,
                             uint64_t size,
                             uint8_t* buf,
-                            uint64_t* copied) const {
+                            uint64_t* copied) {
   uint64_t current = address;
   uint64_t limit = getBase() + getExtent();
 
diff --git a/lib/Support/StreamableMemoryObject.cpp b/lib/Support/StreamableMemoryObject.cpp
new file mode 100644
index 0000000..2b9d7ad
--- /dev/null
+++ b/lib/Support/StreamableMemoryObject.cpp
@@ -0,0 +1,137 @@
+//===- StreamableMemoryObject.cpp - Streamable data interface - -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <cassert>
+#include <cstring>
+
+
+using namespace llvm;
+
+namespace {
+
+class RawMemoryObject : public StreamableMemoryObject {
+public:
+  RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
+    FirstChar(Start), LastChar(End) {
+    assert(LastChar > FirstChar && "Invalid start/end range");
+  }
+
+  virtual uint64_t getBase() const { return 0; }
+  virtual uint64_t getExtent() { return LastChar - FirstChar; }
+  virtual int readByte(uint64_t address, uint8_t* ptr);
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied);
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size);
+  virtual bool isValidAddress(uint64_t address) {return validAddress(address);}
+  virtual bool isObjectEnd(uint64_t address) {return objectEnd(address);}
+
+private:
+  const uint8_t* const FirstChar;
+  const uint8_t* const LastChar;
+
+  // These are implemented as inline functions here to avoid multiple virtual
+  // calls per public function
+  bool validAddress(uint64_t address) {
+    return static_cast<ptrdiff_t>(address) < LastChar - FirstChar;
+  }
+  bool objectEnd(uint64_t address) {
+    return static_cast<ptrdiff_t>(address) == LastChar - FirstChar;
+  }
+
+  RawMemoryObject(const RawMemoryObject&);  // DO NOT IMPLEMENT
+  void operator=(const RawMemoryObject&);  // DO NOT IMPLEMENT
+};
+
+int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) {
+  if (!validAddress(address)) return -1;
+  *ptr = *((uint8_t *)(uintptr_t)(address + FirstChar));
+  return 0;
+}
+
+int RawMemoryObject::readBytes(uint64_t address,
+                               uint64_t size,
+                               uint8_t* buf,
+                               uint64_t* copied) {
+  if (!validAddress(address) || !validAddress(address + size - 1)) return -1;
+  memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size);
+  if (copied) *copied = size;
+  return size;
+}
+
+const uint8_t *RawMemoryObject::getPointer(uint64_t address, uint64_t size) {
+  return FirstChar + address;
+}
+} // anonymous namespace
+
+namespace llvm {
+// If the bitcode has a header, then its size is known, and we don't have to
+// block until we actually want to read it.
+bool StreamingMemoryObject::isValidAddress(uint64_t address) {
+  if (ObjectSize && address < ObjectSize) return true;
+    return fetchToPos(address);
+}
+
+bool StreamingMemoryObject::isObjectEnd(uint64_t address) {
+  if (ObjectSize) return address == ObjectSize;
+  fetchToPos(address);
+  return address == ObjectSize && address != 0;
+}
+
+uint64_t StreamingMemoryObject::getExtent() {
+  if (ObjectSize) return ObjectSize;
+  size_t pos = BytesRead + kChunkSize;
+  // keep fetching until we run out of bytes
+  while (fetchToPos(pos)) pos += kChunkSize;
+  return ObjectSize;
+}
+
+int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) {
+  if (!fetchToPos(address)) return -1;
+  *ptr = Bytes[address + BytesSkipped];
+  return 0;
+}
+
+int StreamingMemoryObject::readBytes(uint64_t address,
+                                     uint64_t size,
+                                     uint8_t* buf,
+                                     uint64_t* copied) {
+  if (!fetchToPos(address + size - 1)) return -1;
+  memcpy(buf, &Bytes[address + BytesSkipped], size);
+  if (copied) *copied = size;
+  return 0;
+}
+
+bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
+  if (BytesRead < s) return true;
+  BytesSkipped = s;
+  BytesRead -= s;
+  return false;
+}
+
+void StreamingMemoryObject::setKnownObjectSize(size_t size) {
+  ObjectSize = size;
+  Bytes.reserve(size);
+}
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+    const unsigned char *Start, const unsigned char *End) {
+  return new RawMemoryObject(Start, End);
+}
+
+StreamableMemoryObject::~StreamableMemoryObject() { }
+
+StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
+  Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
+  ObjectSize(0), EOFReached(false) {
+  BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
+}
+}