Stage two of getting CFE top correct.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@39734 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/Basic/Diagnostic.cpp b/Basic/Diagnostic.cpp
new file mode 100644
index 0000000..155b6fc
--- /dev/null
+++ b/Basic/Diagnostic.cpp
@@ -0,0 +1,147 @@
+//===--- Diagnostic.cpp - C Language Family Diagnostic Handling -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Diagnostic-related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/SourceLocation.h"
+#include <cassert>
+using namespace clang;
+
+/// Flag values for diagnostics.
+enum {
+  // Diagnostic classes.
+  NOTE       = 0x01,
+  WARNING    = 0x02,
+  EXTENSION  = 0x03,
+  ERROR      = 0x04,
+  FATAL      = 0x05,
+  class_mask = 0x07
+};
+
+/// DiagnosticFlags - A set of flags, or'd together, that describe the
+/// diagnostic.
+static unsigned char DiagnosticFlags[] = {
+#define DIAG(ENUM,FLAGS,DESC) FLAGS,
+#include "clang/Basic/DiagnosticKinds.def"
+  0
+};
+
+/// getDiagClass - Return the class field of the diagnostic.
+///
+static unsigned getDiagClass(unsigned DiagID) {
+  assert(DiagID < diag::NUM_DIAGNOSTICS && "Diagnostic ID out of range!");
+  return DiagnosticFlags[DiagID] & class_mask;
+}
+
+/// DiagnosticText - An english message to print for the diagnostic.  These
+/// should be localized.
+static const char * const DiagnosticText[] = {
+#define DIAG(ENUM,FLAGS,DESC) DESC,
+#include "clang/Basic/DiagnosticKinds.def"
+  0
+};
+
+Diagnostic::Diagnostic(DiagnosticClient &client) : Client(client) {
+  WarningsAsErrors = false;
+  WarnOnExtensions = false;
+  ErrorOnExtensions = false;
+  // Clear all mappings, setting them to MAP_DEFAULT.
+  memset(DiagMappings, 0, sizeof(DiagMappings));
+  
+  ErrorOccurred = false;
+  NumDiagnostics = 0;
+  NumErrors = 0;
+}
+
+/// isNoteWarningOrExtension - Return true if the unmapped diagnostic level of
+/// the specified diagnostic ID is a Note, Warning, or Extension.
+bool Diagnostic::isNoteWarningOrExtension(unsigned DiagID) {
+  return getDiagClass(DiagID) < ERROR;
+}
+
+
+/// getDescription - Given a diagnostic ID, return a description of the
+/// issue.
+const char *Diagnostic::getDescription(unsigned DiagID) {
+  assert(DiagID < diag::NUM_DIAGNOSTICS && "Diagnostic ID out of range!");
+  return DiagnosticText[DiagID];
+}
+
+/// getDiagnosticLevel - Based on the way the client configured the Diagnostic
+/// object, classify the specified diagnostic ID into a Level, consumable by
+/// the DiagnosticClient.
+Diagnostic::Level Diagnostic::getDiagnosticLevel(unsigned DiagID) const {
+  unsigned DiagClass = getDiagClass(DiagID);
+  
+  // Specific non-error diagnostics may be mapped to various levels from ignored
+  // to error.
+  if (DiagClass < ERROR) {
+    switch (getDiagnosticMapping((diag::kind)DiagID)) {
+    case diag::MAP_DEFAULT: break;
+    case diag::MAP_IGNORE:  return Ignored;
+    case diag::MAP_WARNING: DiagClass = WARNING; break;
+    case diag::MAP_ERROR:   DiagClass = ERROR; break;
+    }
+  }
+  
+  // Map diagnostic classes based on command line argument settings.
+  if (DiagClass == EXTENSION) {
+    if (ErrorOnExtensions)
+      DiagClass = ERROR;
+    else if (WarnOnExtensions)
+      DiagClass = WARNING;
+    else
+      return Ignored;
+  }
+  
+  // If warnings are to be treated as errors, indicate this as such.
+  if (DiagClass == WARNING && WarningsAsErrors)
+    DiagClass = ERROR;
+  
+  switch (DiagClass) {
+  default: assert(0 && "Unknown diagnostic class!");
+  case NOTE:        return Diagnostic::Note;
+  case WARNING:     return Diagnostic::Warning;
+  case ERROR:       return Diagnostic::Error;
+  case FATAL:       return Diagnostic::Fatal;
+  }
+}
+
+/// Report - Issue the message to the client. If the client wants us to stop
+/// compilation, return true, otherwise return false.  DiagID is a member of
+/// the diag::kind enum.  
+void Diagnostic::Report(SourceLocation Pos, unsigned DiagID,
+                        const std::string *Strs, unsigned NumStrs,
+                        const SourceRange *Ranges, unsigned NumRanges) {
+  // Figure out the diagnostic level of this message.
+  Diagnostic::Level DiagLevel = getDiagnosticLevel(DiagID);
+  
+  // If the client doesn't care about this message, don't issue it.
+  if (DiagLevel == Diagnostic::Ignored)
+    return;
+  
+  if (DiagLevel >= Diagnostic::Error) {
+    ErrorOccurred = true;
+    ++NumErrors;
+  }
+
+  // Are we going to ignore this diagnosic?
+  if (Client.IgnoreDiagnostic(DiagLevel, Pos))
+    return;
+
+  // Finally, report it.
+  Client.HandleDiagnostic(DiagLevel, Pos, (diag::kind)DiagID, Strs, NumStrs,
+                          Ranges, NumRanges);
+  ++NumDiagnostics;
+}
+
+DiagnosticClient::~DiagnosticClient() {}
diff --git a/Basic/FileManager.cpp b/Basic/FileManager.cpp
new file mode 100644
index 0000000..9886e03
--- /dev/null
+++ b/Basic/FileManager.cpp
@@ -0,0 +1,169 @@
+//===--- FileManager.cpp - File System Probing and Caching ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the FileManager interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This should index all interesting directories with dirent calls.
+//  getdirentries ?
+//  opendir/readdir_r/closedir ?
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/SmallString.h"
+#include <iostream>
+using namespace clang;
+
+// FIXME: Enhance libsystem to support inode and other fields.
+#include <sys/stat.h>
+
+
+/// NON_EXISTANT_DIR - A special value distinct from null that is used to
+/// represent a dir name that doesn't exist on the disk.
+#define NON_EXISTANT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1)
+
+/// getDirectory - Lookup, cache, and verify the specified directory.  This
+/// returns null if the directory doesn't exist.
+/// 
+const DirectoryEntry *FileManager::getDirectory(const char *NameStart,
+                                                const char *NameEnd) {
+  ++NumDirLookups;
+  llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt =
+    DirEntries.GetOrCreateValue(NameStart, NameEnd);
+  
+  // See if there is already an entry in the map.
+  if (NamedDirEnt.getValue())
+    return NamedDirEnt.getValue() == NON_EXISTANT_DIR
+              ? 0 : NamedDirEnt.getValue();
+  
+  ++NumDirCacheMisses;
+  
+  // By default, initialize it to invalid.
+  NamedDirEnt.setValue(NON_EXISTANT_DIR);
+  
+  // Get the null-terminated directory name as stored as the key of the
+  // DirEntries map.
+  const char *InterndDirName = NamedDirEnt.getKeyData();
+  
+  // Check to see if the directory exists.
+  struct stat StatBuf;
+  if (stat(InterndDirName, &StatBuf) ||   // Error stat'ing.
+      !S_ISDIR(StatBuf.st_mode))          // Not a directory?
+    return 0;
+  
+  // It exists.  See if we have already opened a directory with the same inode.
+  // This occurs when one dir is symlinked to another, for example.
+  DirectoryEntry &UDE = 
+    UniqueDirs[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)];
+  
+  NamedDirEnt.setValue(&UDE);
+  if (UDE.getName()) // Already have an entry with this inode, return it.
+    return &UDE;
+  
+  // Otherwise, we don't have this directory yet, add it.  We use the string
+  // key from the DirEntries map as the string.
+  UDE.Name  = InterndDirName;
+  return &UDE;
+}
+
+/// NON_EXISTANT_FILE - A special value distinct from null that is used to
+/// represent a filename that doesn't exist on the disk.
+#define NON_EXISTANT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1)
+
+/// getFile - Lookup, cache, and verify the specified file.  This returns null
+/// if the file doesn't exist.
+/// 
+const FileEntry *FileManager::getFile(const char *NameStart,
+                                      const char *NameEnd) {
+  ++NumFileLookups;
+  
+  // See if there is already an entry in the map.
+  llvm::StringMapEntry<FileEntry *> &NamedFileEnt =
+    FileEntries.GetOrCreateValue(NameStart, NameEnd);
+
+  // See if there is already an entry in the map.
+  if (NamedFileEnt.getValue())
+    return NamedFileEnt.getValue() == NON_EXISTANT_FILE
+                 ? 0 : NamedFileEnt.getValue();
+  
+  ++NumFileCacheMisses;
+
+  // By default, initialize it to invalid.
+  NamedFileEnt.setValue(NON_EXISTANT_FILE);
+
+  // Figure out what directory it is in.   If the string contains a / in it,
+  // strip off everything after it.
+  // FIXME: this logic should be in sys::Path.
+  const char *SlashPos = NameEnd-1;
+  while (SlashPos >= NameStart && SlashPos[0] != '/')
+    --SlashPos;
+  
+  const DirectoryEntry *DirInfo;
+  if (SlashPos < NameStart) {
+    // Use the current directory if file has no path component.
+    const char *Name = ".";
+    DirInfo = getDirectory(Name, Name+1);
+  } else if (SlashPos == NameEnd-1)
+    return 0;       // If filename ends with a /, it's a directory.
+  else
+    DirInfo = getDirectory(NameStart, SlashPos);
+  
+  if (DirInfo == 0)  // Directory doesn't exist, file can't exist.
+    return 0;
+  
+  // Get the null-terminated file name as stored as the key of the
+  // FileEntries map.
+  const char *InterndFileName = NamedFileEnt.getKeyData();
+  
+  // FIXME: Use the directory info to prune this, before doing the stat syscall.
+  // FIXME: This will reduce the # syscalls.
+  
+  // Nope, there isn't.  Check to see if the file exists.
+  struct stat StatBuf;
+  //std::cerr << "STATING: " << Filename;
+  if (stat(InterndFileName, &StatBuf) ||   // Error stat'ing.
+      S_ISDIR(StatBuf.st_mode)) {           // A directory?
+    // If this file doesn't exist, we leave a null in FileEntries for this path.
+    //std::cerr << ": Not existing\n";
+    return 0;
+  }
+  //std::cerr << ": exists\n";
+  
+  // It exists.  See if we have already opened a directory with the same inode.
+  // This occurs when one dir is symlinked to another, for example.
+  FileEntry &UFE = UniqueFiles[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)];
+  
+  NamedFileEnt.setValue(&UFE);
+  if (UFE.getName())  // Already have an entry with this inode, return it.
+    return &UFE;
+
+  // Otherwise, we don't have this directory yet, add it.
+  // FIXME: Change the name to be a char* that points back to the 'FileEntries'
+  // key.
+  UFE.Name    = InterndFileName;
+  UFE.Size    = StatBuf.st_size;
+  UFE.ModTime = StatBuf.st_mtime;
+  UFE.Dir     = DirInfo;
+  UFE.UID     = NextFileUID++;
+  return &UFE;
+}
+
+void FileManager::PrintStats() const {
+  std::cerr << "\n*** File Manager Stats:\n";
+  std::cerr << UniqueFiles.size() << " files found, "
+            << UniqueDirs.size() << " dirs found.\n";
+  std::cerr << NumDirLookups << " dir lookups, "
+            << NumDirCacheMisses << " dir cache misses.\n";
+  std::cerr << NumFileLookups << " file lookups, "
+            << NumFileCacheMisses << " file cache misses.\n";
+  
+  //std::cerr << PagesMapped << BytesOfPagesMapped << FSLookups;
+}
diff --git a/Basic/Makefile b/Basic/Makefile
new file mode 100644
index 0000000..1db0a7f
--- /dev/null
+++ b/Basic/Makefile
@@ -0,0 +1,22 @@
+##===- clang/Basic/Makefile --------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by Chris Lattner and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+#  This implements the Basic library for the C-Language front-end.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME := clangBasic
+BUILD_ARCHIVE = 1
+CXXFLAGS = -fno-rtti
+
+CPPFLAGS += -I$(PROJ_SRC_DIR)/../include
+
+include $(LEVEL)/Makefile.common
+
diff --git a/Basic/SourceManager.cpp b/Basic/SourceManager.cpp
new file mode 100644
index 0000000..f6148c16
--- /dev/null
+++ b/Basic/SourceManager.cpp
@@ -0,0 +1,370 @@
+//===--- SourceManager.cpp - Track and cache source files -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the SourceManager interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/System/Path.h"
+#include <algorithm>
+#include <iostream>
+using namespace clang;
+using namespace SrcMgr;
+using llvm::MemoryBuffer;
+
+SourceManager::~SourceManager() {
+  for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
+       E = FileInfos.end(); I != E; ++I) {
+    delete I->second.Buffer;
+    delete[] I->second.SourceLineCache;
+  }
+  
+  for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(), 
+       E = MemBufferInfos.end(); I != E; ++I) {
+    delete I->second.Buffer;
+    delete[] I->second.SourceLineCache;
+  }
+}
+
+
+// FIXME: REMOVE THESE
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/fcntl.h>
+#include <cerrno>
+
+static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
+#if 0
+  // FIXME: Reintroduce this and zap this function once the common llvm stuff
+  // is fast for the small case.
+  return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
+                               FileEnt->getSize());
+#endif
+  
+  // If the file is larger than some threshold, use 'read', otherwise use mmap.
+  if (FileEnt->getSize() >= 4096*4)
+    return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
+                                 0, FileEnt->getSize());
+  
+  MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
+                                                         FileEnt->getName());
+  char *BufPtr = const_cast<char*>(SB->getBufferStart());
+  
+  int FD = ::open(FileEnt->getName(), O_RDONLY);
+  if (FD == -1) {
+    delete SB;
+    return 0;
+  }
+  
+  unsigned BytesLeft = FileEnt->getSize();
+  while (BytesLeft) {
+    ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+    if (NumRead != -1) {
+      BytesLeft -= NumRead;
+      BufPtr += NumRead;
+    } else if (errno == EINTR) {
+      // try again
+    } else {
+      // error reading.
+      close(FD);
+      delete SB;
+      return 0;
+    }
+  }
+  close(FD);
+  
+  return SB;
+}
+
+
+/// getFileInfo - Create or return a cached FileInfo for the specified file.
+///
+const InfoRec *
+SourceManager::getInfoRec(const FileEntry *FileEnt) {
+  assert(FileEnt && "Didn't specify a file entry to use?");
+  // Do we already have information about this file?
+  std::map<const FileEntry *, FileInfo>::iterator I = 
+    FileInfos.lower_bound(FileEnt);
+  if (I != FileInfos.end() && I->first == FileEnt)
+    return &*I;
+  
+  // Nope, get information.
+  const MemoryBuffer *File = ReadFileFast(FileEnt);
+  if (File == 0)
+    return 0;
+
+  const InfoRec &Entry =
+    *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
+  FileInfo &Info = const_cast<FileInfo &>(Entry.second);
+
+  Info.Buffer = File;
+  Info.SourceLineCache = 0;
+  Info.NumLines = 0;
+  return &Entry;
+}
+
+
+/// createMemBufferInfoRec - Create a new info record for the specified memory
+/// buffer.  This does no caching.
+const InfoRec *
+SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
+  // Add a new info record to the MemBufferInfos list and return it.
+  FileInfo FI;
+  FI.Buffer = Buffer;
+  FI.SourceLineCache = 0;
+  FI.NumLines = 0;
+  MemBufferInfos.push_back(InfoRec(0, FI));
+  return &MemBufferInfos.back();
+}
+
+
+/// createFileID - Create a new fileID for the specified InfoRec and include
+/// position.  This works regardless of whether the InfoRec corresponds to a
+/// file or some other input source.
+unsigned SourceManager::createFileID(const InfoRec *File,
+                                     SourceLocation IncludePos) {
+  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
+  // to fit an arbitrary position in the file in the FilePos field.  To handle
+  // this, we create one FileID for each chunk of the file that fits in a
+  // FilePos field.
+  unsigned FileSize = File->second.Buffer->getBufferSize();
+  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
+    FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, 0, File));
+    assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
+           "Ran out of file ID's!");
+    return FileIDs.size();
+  }
+  
+  // Create one FileID for each chunk of the file.
+  unsigned Result = FileIDs.size()+1;
+
+  unsigned ChunkNo = 0;
+  while (1) {
+    FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, ChunkNo++, File));
+
+    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
+    FileSize -= (1 << SourceLocation::FilePosBits);
+  }
+
+  assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
+         "Ran out of file ID's!");
+  return Result;
+}
+
+/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
+/// that a token from physloc PhysLoc should actually be referenced from
+/// InstantiationLoc.
+SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
+                                                  SourceLocation InstantLoc) {
+  assert(getFIDInfo(PhysLoc.getFileID())->IDType !=
+         SrcMgr::FileIDInfo::MacroExpansion &&
+         "Location instantiated in a macro?");
+  
+  // Resolve InstantLoc down to a real logical location.
+  InstantLoc = getLogicalLoc(InstantLoc);
+  
+  unsigned InstantiationFileID;
+  // If this is the same instantiation as was requested last time, return this
+  // immediately.
+  if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
+      InstantLoc == LastInstantiationLoc_InstantLoc) {
+    InstantiationFileID = LastInstantiationLoc_Result;
+  } else {
+    // Add a FileID for this.  FIXME: should cache these!
+    FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
+                                                    PhysLoc.getFileID()));
+    InstantiationFileID = FileIDs.size();
+    
+    // Remember this in the single-entry cache for next time.
+    LastInstantiationLoc_MacroFID   = PhysLoc.getFileID();
+    LastInstantiationLoc_InstantLoc = InstantLoc;
+    LastInstantiationLoc_Result     = InstantiationFileID;
+  }
+  return SourceLocation(InstantiationFileID, PhysLoc.getRawFilePos());
+}
+
+
+
+/// getCharacterData - Return a pointer to the start of the specified location
+/// in the appropriate MemoryBuffer.
+const char *SourceManager::getCharacterData(SourceLocation SL) const {
+  // Note that this is a hot function in the getSpelling() path, which is
+  // heavily used by -E mode.
+  unsigned FileID = SL.getFileID();
+  assert(FileID && "Invalid source location!");
+  
+  return getFileInfo(FileID)->Buffer->getBufferStart() + getFilePos(SL);
+}
+
+/// getIncludeLoc - Return the location of the #include for the specified
+/// FileID.
+SourceLocation SourceManager::getIncludeLoc(unsigned FileID) const {
+  const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(FileID);
+
+  // For Macros, the physical loc is specified by the MacroTokenFileID.
+  if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
+    FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
+  
+  return FIDInfo->IncludeLoc;
+}  
+
+
+/// getColumnNumber - Return the column # for the specified include position.
+/// this is significantly cheaper to compute than the line number.  This returns
+/// zero if the column number isn't known.
+unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
+  Loc = getLogicalLoc(Loc);
+  unsigned FileID = Loc.getFileID();
+  if (FileID == 0) return 0;
+  
+  unsigned FilePos = getFilePos(Loc);
+  const MemoryBuffer *Buffer = getBuffer(FileID);
+  const char *Buf = Buffer->getBufferStart();
+
+  unsigned LineStart = FilePos;
+  while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
+    --LineStart;
+  return FilePos-LineStart+1;
+}
+
+/// getSourceName - This method returns the name of the file or buffer that
+/// the SourceLocation specifies.  This can be modified with #line directives,
+/// etc.
+std::string SourceManager::getSourceName(SourceLocation Loc) {
+  Loc = getLogicalLoc(Loc);
+  unsigned FileID = Loc.getFileID();
+  if (FileID == 0) return "";
+  return getFileInfo(FileID)->Buffer->getBufferIdentifier();
+}
+
+
+/// getLineNumber - Given a SourceLocation, return the physical line number
+/// for the position indicated.  This requires building and caching a table of
+/// line offsets for the MemoryBuffer, so this is not cheap: use only when
+/// about to emit a diagnostic.
+unsigned SourceManager::getLineNumber(SourceLocation Loc) {
+  Loc = getLogicalLoc(Loc);
+  unsigned FileID = Loc.getFileID();
+  if (FileID == 0) return 0;
+  FileInfo *FileInfo = getFileInfo(FileID);
+  
+  // If this is the first use of line information for this buffer, compute the
+  /// SourceLineCache for it on demand. 
+  if (FileInfo->SourceLineCache == 0) {
+    const MemoryBuffer *Buffer = FileInfo->Buffer;
+    
+    // Find the file offsets of all of the *physical* source lines.  This does
+    // not look at trigraphs, escaped newlines, or anything else tricky.
+    std::vector<unsigned> LineOffsets;
+    
+    // Line #1 starts at char 0.
+    LineOffsets.push_back(0);
+    
+    const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
+    const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
+    unsigned Offs = 0;
+    while (1) {
+      // Skip over the contents of the line.
+      // TODO: Vectorize this?  This is very performance sensitive for programs
+      // with lots of diagnostics and in -E mode.
+      const unsigned char *NextBuf = (const unsigned char *)Buf;
+      while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
+        ++NextBuf;
+      Offs += NextBuf-Buf;
+      Buf = NextBuf;
+      
+      if (Buf[0] == '\n' || Buf[0] == '\r') {
+        // If this is \n\r or \r\n, skip both characters.
+        if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
+          ++Offs, ++Buf;
+        ++Offs, ++Buf;
+        LineOffsets.push_back(Offs);
+      } else {
+        // Otherwise, this is a null.  If end of file, exit.
+        if (Buf == End) break;
+        // Otherwise, skip the null.
+        ++Offs, ++Buf;
+      }
+    }
+    LineOffsets.push_back(Offs);
+    
+    // Copy the offsets into the FileInfo structure.
+    FileInfo->NumLines = LineOffsets.size();
+    FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
+    std::copy(LineOffsets.begin(), LineOffsets.end(),
+              FileInfo->SourceLineCache);
+  }
+
+  // Okay, we know we have a line number table.  Do a binary search to find the
+  // line number that this character position lands on.
+  unsigned NumLines = FileInfo->NumLines;
+  unsigned *SourceLineCache = FileInfo->SourceLineCache;
+    
+  // TODO: If this is performance sensitive, we could try doing simple radix
+  // type approaches to make good (tight?) initial guesses based on the
+  // assumption that all lines are the same average size.
+  unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
+                                   getFilePos(Loc)+1);
+  return Pos-SourceLineCache;
+}
+
+/// getSourceFilePos - This method returns the *logical* offset from the start
+/// of the file that the specified SourceLocation represents.  This returns
+/// the location of the *logical* character data, not the physical file
+/// position.  In the case of macros, for example, this returns where the
+/// macro was instantiated, not where the characters for the macro can be
+/// found.
+unsigned SourceManager::getSourceFilePos(SourceLocation Loc) const {
+ 
+  // If this is a macro, we need to get the instantiation location.
+  const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
+  while (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) {
+    Loc = FIDInfo->IncludeLoc;
+    FIDInfo = getFIDInfo(Loc.getFileID());
+  }
+  
+  return getFilePos(Loc);
+}
+
+
+/// PrintStats - Print statistics to stderr.
+///
+void SourceManager::PrintStats() const {
+  std::cerr << "\n*** Source Manager Stats:\n";
+  std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
+            << " mem buffers mapped, " << FileIDs.size() 
+            << " file ID's allocated.\n";
+  unsigned NumBuffers = 0, NumMacros = 0;
+  for (unsigned i = 0, e = FileIDs.size(); i != e; ++i) {
+    if (FileIDs[i].IDType == FileIDInfo::NormalBuffer)
+      ++NumBuffers;
+    else if (FileIDs[i].IDType == FileIDInfo::MacroExpansion)
+      ++NumMacros;
+    else
+      assert(0 && "Unknown FileID!");
+  }
+  std::cerr << "  " << NumBuffers << " normal buffer FileID's, "
+            << NumMacros << " macro expansion FileID's.\n";
+    
+  
+  
+  unsigned NumLineNumsComputed = 0;
+  unsigned NumFileBytesMapped = 0;
+  for (std::map<const FileEntry *, FileInfo>::const_iterator I = 
+       FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
+    NumLineNumsComputed += I->second.SourceLineCache != 0;
+    NumFileBytesMapped  += I->second.Buffer->getBufferSize();
+  }
+  std::cerr << NumFileBytesMapped << " bytes of files mapped, "
+            << NumLineNumsComputed << " files with line #'s computed.\n";
+}
diff --git a/Basic/TargetInfo.cpp b/Basic/TargetInfo.cpp
new file mode 100644
index 0000000..008e99b
--- /dev/null
+++ b/Basic/TargetInfo.cpp
@@ -0,0 +1,223 @@
+//===--- TargetInfo.cpp - Information about Target machine ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the TargetInfo and TargetInfoImpl interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/AST/Builtins.h"
+#include <map>
+#include <set>
+using namespace clang;
+
+void TargetInfoImpl::ANCHOR() {} // out-of-line virtual method for class.
+
+
+/// DiagnoseNonPortability - When a use of a non-portable target feature is
+/// used, this method emits the diagnostic and marks the translation unit as
+/// non-portable.
+void TargetInfo::DiagnoseNonPortability(SourceLocation Loc, unsigned DiagKind) {
+  NonPortable = true;
+  if (Diag && Loc.isValid()) Diag->Report(Loc, DiagKind);
+}
+
+/// GetTargetDefineMap - Get the set of target #defines in an associative
+/// collection for easy lookup.
+static void GetTargetDefineMap(const TargetInfoImpl *Target,
+                               std::map<std::string, std::string> &Map) {
+  std::vector<std::string> PrimaryDefines;
+  Target->getTargetDefines(PrimaryDefines);
+
+  while (!PrimaryDefines.empty()) {
+    const char *Str = PrimaryDefines.back().c_str();
+    if (const char *Equal = strchr(Str, '=')) {
+      // Split at the '='.
+      Map.insert(std::make_pair(std::string(Str, Equal),
+                                std::string(Equal+1,
+                                            Str+PrimaryDefines.back().size())));
+    } else {
+      // Remember "macroname=1".
+      Map.insert(std::make_pair(PrimaryDefines.back(), std::string("1")));
+    }
+    PrimaryDefines.pop_back();
+  }
+}
+
+/// getTargetDefines - Appends the target-specific #define values for this
+/// target set to the specified buffer.
+void TargetInfo::getTargetDefines(std::vector<char> &Buffer) {
+  // This is tricky in the face of secondary targets.  Specifically, 
+  // target-specific #defines that are present and identical across all
+  // secondary targets are turned into #defines, #defines that are present in
+  // the primary target but are missing or different in the secondary targets
+  // are turned into #define_target, and #defines that are not defined in the
+  // primary, but are defined in a secondary are turned into
+  // #define_other_target.  This allows the preprocessor to correctly track uses
+  // of target-specific macros.
+  
+  // Get the set of primary #defines.
+  std::map<std::string, std::string> PrimaryDefines;
+  GetTargetDefineMap(PrimaryTarget, PrimaryDefines);
+  
+  // If we have no secondary targets, be a bit more efficient.
+  if (SecondaryTargets.empty()) {
+    for (std::map<std::string, std::string>::iterator I = 
+           PrimaryDefines.begin(), E = PrimaryDefines.end(); I != E; ++I) {
+      // If this define is non-portable, turn it into #define_target, otherwise
+      // just use #define.
+      const char *Command = "#define ";
+      Buffer.insert(Buffer.end(), Command, Command+strlen(Command));
+      
+      // Insert "defname defvalue\n".
+      Buffer.insert(Buffer.end(), I->first.begin(), I->first.end());
+      Buffer.push_back(' ');
+      Buffer.insert(Buffer.end(), I->second.begin(), I->second.end());
+      Buffer.push_back('\n');
+    }
+    return;
+  }
+  
+  // Get the sets of secondary #defines.
+  std::vector<std::map<std::string, std::string> > SecondaryDefines;
+  SecondaryDefines.resize(SecondaryTargets.size());
+  for (unsigned i = 0, e = SecondaryTargets.size(); i != e; ++i)
+    GetTargetDefineMap(SecondaryTargets[i], SecondaryDefines[i]);
+
+  // Loop over all defines in the primary target, processing them until we run
+  // out.
+  while (!PrimaryDefines.empty()) {
+    std::string DefineName  = PrimaryDefines.begin()->first;
+    std::string DefineValue = PrimaryDefines.begin()->second;
+    PrimaryDefines.erase(PrimaryDefines.begin());
+    
+    // Check to see whether all secondary targets have this #define and whether
+    // it is to the same value.  Remember if not, but remove the #define from
+    // their collection in any case if they have it.
+    bool isPortable = true;
+    
+    for (unsigned i = 0, e = SecondaryDefines.size(); i != e; ++i) {
+      std::map<std::string, std::string>::iterator I = 
+        SecondaryDefines[i].find(DefineName);
+      if (I == SecondaryDefines[i].end()) {
+        // Secondary target doesn't have this #define.
+        isPortable = false;
+      } else {
+        // Secondary target has this define, remember if it disagrees.
+        if (isPortable)
+          isPortable = I->second == DefineValue;
+        // Remove it from the secondary target unconditionally.
+        SecondaryDefines[i].erase(I);
+      }
+    }
+    
+    // If this define is non-portable, turn it into #define_target, otherwise
+    // just use #define.
+    const char *Command = isPortable ? "#define " : "#define_target ";
+    Buffer.insert(Buffer.end(), Command, Command+strlen(Command));
+
+    // Insert "defname defvalue\n".
+    Buffer.insert(Buffer.end(), DefineName.begin(), DefineName.end());
+    Buffer.push_back(' ');
+    Buffer.insert(Buffer.end(), DefineValue.begin(), DefineValue.end());
+    Buffer.push_back('\n');
+  }
+  
+  // Now that all of the primary target's defines have been handled and removed
+  // from the secondary target's define sets, go through the remaining secondary
+  // target's #defines and taint them.
+  for (unsigned i = 0, e = SecondaryDefines.size(); i != e; ++i) {
+    std::map<std::string, std::string> &Defs = SecondaryDefines[i];
+    while (!Defs.empty()) {
+      const std::string &DefName = Defs.begin()->first;
+      
+      // Insert "#define_other_target defname".
+      const char *Command = "#define_other_target ";
+      Buffer.insert(Buffer.end(), Command, Command+strlen(Command));
+      Buffer.insert(Buffer.end(), DefName.begin(), DefName.end());
+      Buffer.push_back('\n');
+      
+      // If any other secondary targets have this same define, remove it from
+      // them to avoid duplicate #define_other_target directives.
+      for (unsigned j = i+1; j != e; ++j)
+        SecondaryDefines[j].erase(DefName);
+      
+      Defs.erase(Defs.begin());
+    }
+  }
+}
+
+/// ComputeWCharWidth - Determine the width of the wchar_t type for the primary
+/// target, diagnosing whether this is non-portable across the secondary
+/// targets.
+void TargetInfo::ComputeWCharWidth(SourceLocation Loc) {
+  WCharWidth = PrimaryTarget->getWCharWidth();
+  
+  // Check whether this is portable across the secondary targets if the T-U is
+  // portable so far.
+  for (unsigned i = 0, e = SecondaryTargets.size(); i != e; ++i)
+    if (SecondaryTargets[i]->getWCharWidth() != WCharWidth)
+      return DiagnoseNonPortability(Loc, diag::port_wchar_t);
+}
+
+
+/// getTargetBuiltins - Return information about target-specific builtins for
+/// the current primary target, and info about which builtins are non-portable
+/// across the current set of primary and secondary targets.
+void TargetInfo::getTargetBuiltins(const Builtin::Info *&Records,
+                                   unsigned &NumRecords,
+                                   std::vector<const char *> &NPortable) const {
+  // Get info about what actual builtins we will expose.
+  PrimaryTarget->getTargetBuiltins(Records, NumRecords);
+  if (SecondaryTargets.empty()) return;
+ 
+  // Compute the set of non-portable builtins.
+  
+  // Start by computing a mapping from the primary target's builtins to their
+  // info records for efficient lookup.
+  std::map<std::string, const Builtin::Info*> PrimaryRecs;
+  for (unsigned i = 0, e = NumRecords; i != e; ++i)
+    PrimaryRecs[Records[i].Name] = Records+i;
+  
+  for (unsigned i = 0, e = SecondaryTargets.size(); i != e; ++i) {
+    // Get the builtins for this secondary target.
+    const Builtin::Info *Records2nd;
+    unsigned NumRecords2nd;
+    SecondaryTargets[i]->getTargetBuiltins(Records2nd, NumRecords2nd);
+    
+    // Remember all of the secondary builtin names.
+    std::set<std::string> BuiltinNames2nd;
+
+    for (unsigned j = 0, e = NumRecords2nd; j != e; ++j) {
+      BuiltinNames2nd.insert(Records2nd[j].Name);
+      
+      // Check to see if the primary target has this builtin.
+      if (const Builtin::Info *PrimBI = PrimaryRecs[Records2nd[j].Name]) {
+        // If does.  If they are not identical, mark the builtin as being
+        // non-portable.
+        if (Records2nd[j] != *PrimBI)
+          NPortable.push_back(PrimBI->Name);
+      } else {
+        // The primary target doesn't have this, it is non-portable.
+        NPortable.push_back(Records2nd[j].Name);
+      }
+    }
+    
+    // Now that we checked all the secondary builtins, check to see if the
+    // primary target has any builtins that the secondary one doesn't.  If so,
+    // then those are non-portable.
+    for (unsigned j = 0, e = NumRecords; j != e; ++j) {
+      if (!BuiltinNames2nd.count(Records[j].Name))
+        NPortable.push_back(Records[j].Name);
+    }
+  }
+}
+
+
diff --git a/Basic/TokenKinds.cpp b/Basic/TokenKinds.cpp
new file mode 100644
index 0000000..772925b
--- /dev/null
+++ b/Basic/TokenKinds.cpp
@@ -0,0 +1,28 @@
+//===--- TokenKinds.cpp - Token Kinds Support -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the TokenKind enum and support functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/TokenKinds.h"
+#include <cassert>
+using namespace clang;
+
+static const char * const TokNames[] = {
+#define TOK(X) #X,
+#define KEYWORD(X,Y) #X,
+#include "clang/Basic/TokenKinds.def"
+  0
+};
+
+const char *tok::getTokenName(enum TokenKind Kind) {
+  assert(Kind < tok::NUM_TOKENS);
+  return TokNames[Kind];
+}