Check in LLVM r95781.

commit: f8fd82ba49827db0f6a6ba00c55a7b56b12a19fa [log] [tgz]
author: Shih-wei Liao <sliao@google.com> Wed Feb 10 11:10:31 2010 -0800
committer: Shih-wei Liao <sliao@google.com> Wed Feb 10 11:10:31 2010 -0800
tree: 07718e23380bc08b11f5039b0d185512c657f5e9
diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt
new file mode 100644
index 0000000..81a1e01
--- /dev/null
+++ b/lib/Lex/CMakeLists.txt

@@ -0,0 +1,26 @@
+set(LLVM_NO_RTTI 1)
+
+# TODO: Add -maltivec when ARCH is PowerPC.
+
+add_clang_library(clangLex
+  HeaderMap.cpp
+  HeaderSearch.cpp
+  Lexer.cpp
+  LiteralSupport.cpp
+  MacroArgs.cpp
+  MacroInfo.cpp
+  PPCaching.cpp
+  PPDirectives.cpp
+  PPExpressions.cpp
+  PPLexerChange.cpp
+  PPMacroExpansion.cpp
+  PTHLexer.cpp
+  Pragma.cpp
+  Preprocessor.cpp
+  PreprocessorLexer.cpp
+  ScratchBuffer.cpp
+  TokenConcatenation.cpp
+  TokenLexer.cpp
+  )
+
+add_dependencies(clangLex ClangDiagnosticLex)

diff --git a/lib/Lex/HeaderMap.cpp b/lib/Lex/HeaderMap.cpp
new file mode 100644
index 0000000..4010d61
--- /dev/null
+++ b/lib/Lex/HeaderMap.cpp

@@ -0,0 +1,228 @@
+//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HeaderMap interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/System/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstdio>
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Data Structures and Manifest Constants
+//===----------------------------------------------------------------------===//
+
+enum {
+  HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
+  HMAP_HeaderVersion = 1,
+
+  HMAP_EmptyBucketKey = 0
+};
+
+namespace clang {
+struct HMapBucket {
+  uint32_t Key;          // Offset (into strings) of key.
+
+  uint32_t Prefix;     // Offset (into strings) of value prefix.
+  uint32_t Suffix;     // Offset (into strings) of value suffix.
+};
+
+struct HMapHeader {
+  uint32_t Magic;           // Magic word, also indicates byte order.
+  uint16_t Version;         // Version number -- currently 1.
+  uint16_t Reserved;        // Reserved for future use - zero for now.
+  uint32_t StringsOffset;   // Offset to start of string pool.
+  uint32_t NumEntries;      // Number of entries in the string table.
+  uint32_t NumBuckets;      // Number of buckets (always a power of 2).
+  uint32_t MaxValueLength;  // Length of longest result path (excluding nul).
+  // An array of 'NumBuckets' HMapBucket objects follows this header.
+  // Strings follow the buckets, at StringsOffset.
+};
+} // end namespace clang.
+
+/// HashHMapKey - This is the 'well known' hash function required by the file
+/// format, used to look up keys in the hash table.  The hash table uses simple
+/// linear probing based on this function.
+static inline unsigned HashHMapKey(llvm::StringRef Str) {
+  unsigned Result = 0;
+  const char *S = Str.begin(), *End = Str.end();
+
+  for (; S != End; S++)
+    Result += tolower(*S) * 13;
+  return Result;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Verification and Construction
+//===----------------------------------------------------------------------===//
+
+/// HeaderMap::Create - This attempts to load the specified file as a header
+/// map.  If it doesn't look like a HeaderMap, it gives up and returns null.
+/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
+/// into the string error argument and returns null.
+const HeaderMap *HeaderMap::Create(const FileEntry *FE) {
+  // If the file is too small to be a header map, ignore it.
+  unsigned FileSize = FE->getSize();
+  if (FileSize <= sizeof(HMapHeader)) return 0;
+
+  llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer(
+    llvm::MemoryBuffer::getFile(FE->getName(), 0, FE->getSize()));
+  if (FileBuffer == 0) return 0;  // Unreadable file?
+  const char *FileStart = FileBuffer->getBufferStart();
+
+  // We know the file is at least as big as the header, check it now.
+  const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
+
+  // Sniff it to see if it's a headermap by checking the magic number and
+  // version.
+  bool NeedsByteSwap;
+  if (Header->Magic == HMAP_HeaderMagicNumber &&
+      Header->Version == HMAP_HeaderVersion)
+    NeedsByteSwap = false;
+  else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) &&
+           Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
+    NeedsByteSwap = true;  // Mixed endianness headermap.
+  else
+    return 0;  // Not a header map.
+
+  if (Header->Reserved != 0) return 0;
+
+  // Okay, everything looks good, create the header map.
+  return new HeaderMap(FileBuffer.take(), NeedsByteSwap);
+}
+
+HeaderMap::~HeaderMap() {
+  delete FileBuffer;
+}
+
+//===----------------------------------------------------------------------===//
+//  Utility Methods
+//===----------------------------------------------------------------------===//
+
+
+/// getFileName - Return the filename of the headermap.
+const char *HeaderMap::getFileName() const {
+  return FileBuffer->getBufferIdentifier();
+}
+
+unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
+  if (!NeedsBSwap) return X;
+  return llvm::ByteSwap_32(X);
+}
+
+/// getHeader - Return a reference to the file header, in unbyte-swapped form.
+/// This method cannot fail.
+const HMapHeader &HeaderMap::getHeader() const {
+  // We know the file is at least as big as the header.  Return it.
+  return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
+}
+
+/// getBucket - Return the specified hash table bucket from the header map,
+/// bswap'ing its fields as appropriate.  If the bucket number is not valid,
+/// this return a bucket with an empty key (0).
+HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
+  HMapBucket Result;
+  Result.Key = HMAP_EmptyBucketKey;
+
+  const HMapBucket *BucketArray =
+    reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
+                                        sizeof(HMapHeader));
+
+  const HMapBucket *BucketPtr = BucketArray+BucketNo;
+  if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) {
+    Result.Prefix = 0;
+    Result.Suffix = 0;
+    return Result;  // Invalid buffer, corrupt hmap.
+  }
+
+  // Otherwise, the bucket is valid.  Load the values, bswapping as needed.
+  Result.Key    = getEndianAdjustedWord(BucketPtr->Key);
+  Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
+  Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
+  return Result;
+}
+
+/// getString - Look up the specified string in the string table.  If the string
+/// index is not valid, it returns an empty string.
+const char *HeaderMap::getString(unsigned StrTabIdx) const {
+  // Add the start of the string table to the idx.
+  StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
+
+  // Check for invalid index.
+  if (StrTabIdx >= FileBuffer->getBufferSize())
+    return 0;
+
+  // Otherwise, we have a valid pointer into the file.  Just return it.  We know
+  // that the "string" can not overrun the end of the file, because the buffer
+  // is nul terminated by virtue of being a MemoryBuffer.
+  return FileBuffer->getBufferStart()+StrTabIdx;
+}
+
+//===----------------------------------------------------------------------===//
+// The Main Drivers
+//===----------------------------------------------------------------------===//
+
+/// dump - Print the contents of this headermap to stderr.
+void HeaderMap::dump() const {
+  const HMapHeader &Hdr = getHeader();
+  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+  fprintf(stderr, "Header Map %s:\n  %d buckets, %d entries\n",
+          getFileName(), NumBuckets,
+          getEndianAdjustedWord(Hdr.NumEntries));
+
+  for (unsigned i = 0; i != NumBuckets; ++i) {
+    HMapBucket B = getBucket(i);
+    if (B.Key == HMAP_EmptyBucketKey) continue;
+
+    const char *Key    = getString(B.Key);
+    const char *Prefix = getString(B.Prefix);
+    const char *Suffix = getString(B.Suffix);
+    fprintf(stderr, "  %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix);
+  }
+}
+
+/// LookupFile - Check to see if the specified relative filename is located in
+/// this HeaderMap.  If so, open it and return its FileEntry.
+const FileEntry *HeaderMap::LookupFile(llvm::StringRef Filename,
+                                       FileManager &FM) const {
+  const HMapHeader &Hdr = getHeader();
+  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+  // If the number of buckets is not a power of two, the headermap is corrupt.
+  // Don't probe infinitely.
+  if (NumBuckets & (NumBuckets-1))
+    return 0;
+
+  // Linearly probe the hash table.
+  for (unsigned Bucket = HashHMapKey(Filename);; ++Bucket) {
+    HMapBucket B = getBucket(Bucket & (NumBuckets-1));
+    if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss.
+
+    // See if the key matches.  If not, probe on.
+    if (!Filename.equals_lower(getString(B.Key)))
+      continue;
+
+    // If so, we have a match in the hash table.  Construct the destination
+    // path.
+    llvm::SmallString<1024> DestPath;
+    DestPath += getString(B.Prefix);
+    DestPath += getString(B.Suffix);
+    return FM.getFile(DestPath.begin(), DestPath.end());
+  }
+}

diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
new file mode 100644
index 0000000..4554aba
--- /dev/null
+++ b/lib/Lex/HeaderSearch.cpp

@@ -0,0 +1,440 @@
+//===--- HeaderSearch.cpp - Resolve Header File Locations ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the DirectoryLookup and HeaderSearch interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderMap.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "llvm/System/Path.h"
+#include "llvm/ADT/SmallString.h"
+#include <cstdio>
+using namespace clang;
+
+const IdentifierInfo *
+HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) {
+  if (ControllingMacro)
+    return ControllingMacro;
+
+  if (!ControllingMacroID || !External)
+    return 0;
+
+  ControllingMacro = External->GetIdentifier(ControllingMacroID);
+  return ControllingMacro;
+}
+
+HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) {
+  SystemDirIdx = 0;
+  NoCurDirSearch = false;
+
+  ExternalLookup = 0;
+  NumIncluded = 0;
+  NumMultiIncludeFileOptzn = 0;
+  NumFrameworkLookups = NumSubFrameworkLookups = 0;
+}
+
+HeaderSearch::~HeaderSearch() {
+  // Delete headermaps.
+  for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+    delete HeaderMaps[i].second;
+}
+
+void HeaderSearch::PrintStats() {
+  fprintf(stderr, "\n*** HeaderSearch Stats:\n");
+  fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size());
+  unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
+  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+    NumOnceOnlyFiles += FileInfo[i].isImport;
+    if (MaxNumIncludes < FileInfo[i].NumIncludes)
+      MaxNumIncludes = FileInfo[i].NumIncludes;
+    NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
+  }
+  fprintf(stderr, "  %d #import/#pragma once files.\n", NumOnceOnlyFiles);
+  fprintf(stderr, "  %d included exactly once.\n", NumSingleIncludedFiles);
+  fprintf(stderr, "  %d max times a file is included.\n", MaxNumIncludes);
+
+  fprintf(stderr, "  %d #include/#include_next/#import.\n", NumIncluded);
+  fprintf(stderr, "    %d #includes skipped due to"
+          " the multi-include optimization.\n", NumMultiIncludeFileOptzn);
+
+  fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups);
+  fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups);
+}
+
+/// CreateHeaderMap - This method returns a HeaderMap for the specified
+/// FileEntry, uniquing them through the the 'HeaderMaps' datastructure.
+const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {
+  // We expect the number of headermaps to be small, and almost always empty.
+  // If it ever grows, use of a linear search should be re-evaluated.
+  if (!HeaderMaps.empty()) {
+    for (unsigned i = 0, e = HeaderMaps.size(); i != e; ++i)
+      // Pointer equality comparison of FileEntries works because they are
+      // already uniqued by inode.
+      if (HeaderMaps[i].first == FE)
+        return HeaderMaps[i].second;
+  }
+
+  if (const HeaderMap *HM = HeaderMap::Create(FE)) {
+    HeaderMaps.push_back(std::make_pair(FE, HM));
+    return HM;
+  }
+
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// File lookup within a DirectoryLookup scope
+//===----------------------------------------------------------------------===//
+
+/// getName - Return the directory or filename corresponding to this lookup
+/// object.
+const char *DirectoryLookup::getName() const {
+  if (isNormalDir())
+    return getDir()->getName();
+  if (isFramework())
+    return getFrameworkDir()->getName();
+  assert(isHeaderMap() && "Unknown DirectoryLookup");
+  return getHeaderMap()->getFileName();
+}
+
+
+/// LookupFile - Lookup the specified file in this search path, returning it
+/// if it exists or returning null if not.
+const FileEntry *DirectoryLookup::LookupFile(llvm::StringRef Filename,
+                                             HeaderSearch &HS) const {
+  llvm::SmallString<1024> TmpDir;
+  if (isNormalDir()) {
+    // Concatenate the requested file onto the directory.
+    // FIXME: Portability.  Filename concatenation should be in sys::Path.
+    TmpDir += getDir()->getName();
+    TmpDir.push_back('/');
+    TmpDir.append(Filename.begin(), Filename.end());
+    return HS.getFileMgr().getFile(TmpDir.begin(), TmpDir.end());
+  }
+
+  if (isFramework())
+    return DoFrameworkLookup(Filename, HS);
+
+  assert(isHeaderMap() && "Unknown directory lookup");
+  return getHeaderMap()->LookupFile(Filename, HS.getFileMgr());
+}
+
+
+/// DoFrameworkLookup - Do a lookup of the specified file in the current
+/// DirectoryLookup, which is a framework directory.
+const FileEntry *DirectoryLookup::DoFrameworkLookup(llvm::StringRef Filename,
+                                                    HeaderSearch &HS) const {
+  FileManager &FileMgr = HS.getFileMgr();
+
+  // Framework names must have a '/' in the filename.
+  size_t SlashPos = Filename.find('/');
+  if (SlashPos == llvm::StringRef::npos) return 0;
+
+  // Find out if this is the home for the specified framework, by checking
+  // HeaderSearch.  Possible answer are yes/no and unknown.
+  const DirectoryEntry *&FrameworkDirCache =
+    HS.LookupFrameworkCache(Filename.substr(0, SlashPos));
+
+  // If it is known and in some other directory, fail.
+  if (FrameworkDirCache && FrameworkDirCache != getFrameworkDir())
+    return 0;
+
+  // Otherwise, construct the path to this framework dir.
+
+  // FrameworkName = "/System/Library/Frameworks/"
+  llvm::SmallString<1024> FrameworkName;
+  FrameworkName += getFrameworkDir()->getName();
+  if (FrameworkName.empty() || FrameworkName.back() != '/')
+    FrameworkName.push_back('/');
+
+  // FrameworkName = "/System/Library/Frameworks/Cocoa"
+  FrameworkName.append(Filename.begin(), Filename.begin()+SlashPos);
+
+  // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/"
+  FrameworkName += ".framework/";
+
+  // If the cache entry is still unresolved, query to see if the cache entry is
+  // still unresolved.  If so, check its existence now.
+  if (FrameworkDirCache == 0) {
+    HS.IncrementFrameworkLookupCount();
+
+    // If the framework dir doesn't exist, we fail.
+    // FIXME: It's probably more efficient to query this with FileMgr.getDir.
+    if (!llvm::sys::Path(std::string(FrameworkName.begin(),
+                                     FrameworkName.end())).exists())
+      return 0;
+
+    // Otherwise, if it does, remember that this is the right direntry for this
+    // framework.
+    FrameworkDirCache = getFrameworkDir();
+  }
+
+  // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h"
+  unsigned OrigSize = FrameworkName.size();
+
+  FrameworkName += "Headers/";
+  FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
+  if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(),
+                                            FrameworkName.end())) {
+    return FE;
+  }
+
+  // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
+  const char *Private = "Private";
+  FrameworkName.insert(FrameworkName.begin()+OrigSize, Private,
+                       Private+strlen(Private));
+  return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Header File Location.
+//===----------------------------------------------------------------------===//
+
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure.  isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of "").  CurFileEnt, if
+/// non-null, indicates where the #including file is, in case a relative search
+/// is needed.
+const FileEntry *HeaderSearch::LookupFile(llvm::StringRef Filename,
+                                          bool isAngled,
+                                          const DirectoryLookup *FromDir,
+                                          const DirectoryLookup *&CurDir,
+                                          const FileEntry *CurFileEnt) {
+  // If 'Filename' is absolute, check to see if it exists and no searching.
+  if (llvm::sys::Path::isAbsolute(Filename.begin(), Filename.size())) {
+    CurDir = 0;
+
+    // If this was an #include_next "/absolute/file", fail.
+    if (FromDir) return 0;
+
+    // Otherwise, just return the file.
+    return FileMgr.getFile(Filename);
+  }
+
+  // Step #0, unless disabled, check to see if the file is in the #includer's
+  // directory.  This has to be based on CurFileEnt, not CurDir, because
+  // CurFileEnt could be a #include of a subdirectory (#include "foo/bar.h") and
+  // a subsequent include of "baz.h" should resolve to "whatever/foo/baz.h".
+  // This search is not done for <> headers.
+  if (CurFileEnt && !isAngled && !NoCurDirSearch) {
+    llvm::SmallString<1024> TmpDir;
+    // Concatenate the requested file onto the directory.
+    // FIXME: Portability.  Filename concatenation should be in sys::Path.
+    TmpDir += CurFileEnt->getDir()->getName();
+    TmpDir.push_back('/');
+    TmpDir.append(Filename.begin(), Filename.end());
+    if (const FileEntry *FE = FileMgr.getFile(TmpDir.str())) {
+      // Leave CurDir unset.
+      // This file is a system header or C++ unfriendly if the old file is.
+      //
+      // Note that the temporary 'DirInfo' is required here, as either call to
+      // getFileInfo could resize the vector and we don't want to rely on order
+      // of evaluation.
+      unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo;
+      getFileInfo(FE).DirInfo = DirInfo;
+      return FE;
+    }
+  }
+
+  CurDir = 0;
+
+  // If this is a system #include, ignore the user #include locs.
+  unsigned i = isAngled ? SystemDirIdx : 0;
+
+  // If this is a #include_next request, start searching after the directory the
+  // file was found in.
+  if (FromDir)
+    i = FromDir-&SearchDirs[0];
+
+  // Cache all of the lookups performed by this method.  Many headers are
+  // multiply included, and the "pragma once" optimization prevents them from
+  // being relex/pp'd, but they would still have to search through a
+  // (potentially huge) series of SearchDirs to find it.
+  std::pair<unsigned, unsigned> &CacheLookup =
+    LookupFileCache.GetOrCreateValue(Filename).getValue();
+
+  // If the entry has been previously looked up, the first value will be
+  // non-zero.  If the value is equal to i (the start point of our search), then
+  // this is a matching hit.
+  if (CacheLookup.first == i+1) {
+    // Skip querying potentially lots of directories for this lookup.
+    i = CacheLookup.second;
+  } else {
+    // Otherwise, this is the first query, or the previous query didn't match
+    // our search start.  We will fill in our found location below, so prime the
+    // start point value.
+    CacheLookup.first = i+1;
+  }
+
+  // Check each directory in sequence to see if it contains this file.
+  for (; i != SearchDirs.size(); ++i) {
+    const FileEntry *FE =
+      SearchDirs[i].LookupFile(Filename, *this);
+    if (!FE) continue;
+
+    CurDir = &SearchDirs[i];
+
+    // This file is a system header or C++ unfriendly if the dir is.
+    getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic();
+
+    // Remember this location for the next lookup we do.
+    CacheLookup.second = i;
+    return FE;
+  }
+
+  // Otherwise, didn't find it. Remember we didn't find this.
+  CacheLookup.second = SearchDirs.size();
+  return 0;
+}
+
+/// LookupSubframeworkHeader - Look up a subframework for the specified
+/// #include file.  For example, if #include'ing <HIToolbox/HIToolbox.h> from
+/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox
+/// is a subframework within Carbon.framework.  If so, return the FileEntry
+/// for the designated file, otherwise return null.
+const FileEntry *HeaderSearch::
+LookupSubframeworkHeader(llvm::StringRef Filename,
+                         const FileEntry *ContextFileEnt) {
+  assert(ContextFileEnt && "No context file?");
+
+  // Framework names must have a '/' in the filename.  Find it.
+  size_t SlashPos = Filename.find('/');
+  if (SlashPos == llvm::StringRef::npos) return 0;
+
+  // Look up the base framework name of the ContextFileEnt.
+  const char *ContextName = ContextFileEnt->getName();
+
+  // If the context info wasn't a framework, couldn't be a subframework.
+  const char *FrameworkPos = strstr(ContextName, ".framework/");
+  if (FrameworkPos == 0)
+    return 0;
+
+  llvm::SmallString<1024> FrameworkName(ContextName,
+                                        FrameworkPos+strlen(".framework/"));
+
+  // Append Frameworks/HIToolbox.framework/
+  FrameworkName += "Frameworks/";
+  FrameworkName.append(Filename.begin(), Filename.begin()+SlashPos);
+  FrameworkName += ".framework/";
+
+  llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup =
+    FrameworkMap.GetOrCreateValue(Filename.begin(), Filename.begin()+SlashPos);
+
+  // Some other location?
+  if (CacheLookup.getValue() &&
+      CacheLookup.getKeyLength() == FrameworkName.size() &&
+      memcmp(CacheLookup.getKeyData(), &FrameworkName[0],
+             CacheLookup.getKeyLength()) != 0)
+    return 0;
+
+  // Cache subframework.
+  if (CacheLookup.getValue() == 0) {
+    ++NumSubFrameworkLookups;
+
+    // If the framework dir doesn't exist, we fail.
+    const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(),
+                                                     FrameworkName.end());
+    if (Dir == 0) return 0;
+
+    // Otherwise, if it does, remember that this is the right direntry for this
+    // framework.
+    CacheLookup.setValue(Dir);
+  }
+
+  const FileEntry *FE = 0;
+
+  // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h"
+  llvm::SmallString<1024> HeadersFilename(FrameworkName);
+  HeadersFilename += "Headers/";
+  HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
+  if (!(FE = FileMgr.getFile(HeadersFilename.begin(),
+                             HeadersFilename.end()))) {
+
+    // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
+    HeadersFilename = FrameworkName;
+    HeadersFilename += "PrivateHeaders/";
+    HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
+    if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end())))
+      return 0;
+  }
+
+  // This file is a system header or C++ unfriendly if the old file is.
+  //
+  // Note that the temporary 'DirInfo' is required here, as either call to
+  // getFileInfo could resize the vector and we don't want to rely on order
+  // of evaluation.
+  unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
+  getFileInfo(FE).DirInfo = DirInfo;
+  return FE;
+}
+
+//===----------------------------------------------------------------------===//
+// File Info Management.
+//===----------------------------------------------------------------------===//
+
+
+/// getFileInfo - Return the HeaderFileInfo structure for the specified
+/// FileEntry.
+HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
+  if (FE->getUID() >= FileInfo.size())
+    FileInfo.resize(FE->getUID()+1);
+  return FileInfo[FE->getUID()];
+}
+
+void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) {
+  if (UID >= FileInfo.size())
+    FileInfo.resize(UID+1);
+  FileInfo[UID] = HFI;
+}
+
+/// ShouldEnterIncludeFile - Mark the specified file as a target of of a
+/// #include, #include_next, or #import directive.  Return false if #including
+/// the file will have no effect or true if we should include it.
+bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){
+  ++NumIncluded; // Count # of attempted #includes.
+
+  // Get information about this file.
+  HeaderFileInfo &FileInfo = getFileInfo(File);
+
+  // If this is a #import directive, check that we have not already imported
+  // this header.
+  if (isImport) {
+    // If this has already been imported, don't import it again.
+    FileInfo.isImport = true;
+
+    // Has this already been #import'ed or #include'd?
+    if (FileInfo.NumIncludes) return false;
+  } else {
+    // Otherwise, if this is a #include of a file that was previously #import'd
+    // or if this is the second #include of a #pragma once file, ignore it.
+    if (FileInfo.isImport)
+      return false;
+  }
+
+  // Next, check to see if the file is wrapped with #ifndef guards.  If so, and
+  // if the macro that guards it is defined, we know the #include has no effect.
+  if (const IdentifierInfo *ControllingMacro
+      = FileInfo.getControllingMacro(ExternalLookup))
+    if (ControllingMacro->hasMacroDefinition()) {
+      ++NumMultiIncludeFileOptzn;
+      return false;
+    }
+
+  // Increment the number of times this file has been included.
+  ++FileInfo.NumIncludes;
+
+  return true;
+}
+
+

diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
new file mode 100644
index 0000000..3207062
--- /dev/null
+++ b/lib/Lex/Lexer.cpp

@@ -0,0 +1,2055 @@
+//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Lexer and Token interfaces.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: GCC Diagnostics emitted by the lexer:
+// PEDWARN: (form feed|vertical tab) in preprocessing directive
+//
+// Universal characters, unicode, char mapping:
+// WARNING: `%.*s' is not in NFKC
+// WARNING: `%.*s' is not in NFC
+//
+// Other:
+// TODO: Options to support:
+//    -fexec-charset,-fwide-exec-charset
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cctype>
+using namespace clang;
+
+static void InitCharacterInfo();
+
+//===----------------------------------------------------------------------===//
+// Token Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
+bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
+  if (IdentifierInfo *II = getIdentifierInfo())
+    return II->getObjCKeywordID() == objcKey;
+  return false;
+}
+
+/// getObjCKeywordID - Return the ObjC keyword kind.
+tok::ObjCKeywordKind Token::getObjCKeywordID() const {
+  IdentifierInfo *specId = getIdentifierInfo();
+  return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Class Implementation
+//===----------------------------------------------------------------------===//
+
+void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
+                      const char *BufEnd) {
+  InitCharacterInfo();
+
+  BufferStart = BufStart;
+  BufferPtr = BufPtr;
+  BufferEnd = BufEnd;
+
+  assert(BufEnd[0] == 0 &&
+         "We assume that the input buffer has a null character at the end"
+         " to simplify lexing!");
+
+  Is_PragmaLexer = false;
+  IsInConflictMarker = false;
+  
+  // Start of the file is a start of line.
+  IsAtStartOfLine = true;
+
+  // We are not after parsing a #.
+  ParsingPreprocessorDirective = false;
+
+  // We are not after parsing #include.
+  ParsingFilename = false;
+
+  // We are not in raw mode.  Raw mode disables diagnostics and interpretation
+  // of tokens (e.g. identifiers, thus disabling macro expansion).  It is used
+  // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
+  // or otherwise skipping over tokens.
+  LexingRawMode = false;
+
+  // Default to not keeping comments.
+  ExtendedTokenMode = 0;
+}
+
+/// Lexer constructor - Create a new lexer object for the specified buffer
+/// with the specified preprocessor managing the lexing process.  This lexer
+/// assumes that the associated file buffer and Preprocessor objects will
+/// outlive it, so it doesn't take ownership of either of them.
+Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
+  : PreprocessorLexer(&PP, FID),
+    FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
+    Features(PP.getLangOptions()) {
+
+  InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
+            InputFile->getBufferEnd());
+
+  // Default to keeping comments if the preprocessor wants them.
+  SetCommentRetentionState(PP.getCommentRetentionState());
+}
+
+/// Lexer constructor - Create a new raw lexer object.  This object is only
+/// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
+/// range will outlive it, so it doesn't take ownership of it.
+Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
+             const char *BufStart, const char *BufPtr, const char *BufEnd)
+  : FileLoc(fileloc), Features(features) {
+
+  InitLexer(BufStart, BufPtr, BufEnd);
+
+  // We *are* in raw mode.
+  LexingRawMode = true;
+}
+
+/// Lexer constructor - Create a new raw lexer object.  This object is only
+/// suitable for calls to 'LexRawToken'.  This lexer assumes that the text
+/// range will outlive it, so it doesn't take ownership of it.
+Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile,
+             const SourceManager &SM, const LangOptions &features)
+  : FileLoc(SM.getLocForStartOfFile(FID)), Features(features) {
+
+  InitLexer(FromFile->getBufferStart(), FromFile->getBufferStart(),
+            FromFile->getBufferEnd());
+
+  // We *are* in raw mode.
+  LexingRawMode = true;
+}
+
+/// Create_PragmaLexer: Lexer constructor - Create a new lexer object for
+/// _Pragma expansion.  This has a variety of magic semantics that this method
+/// sets up.  It returns a new'd Lexer that must be delete'd when done.
+///
+/// On entrance to this routine, TokStartLoc is a macro location which has a
+/// spelling loc that indicates the bytes to be lexed for the token and an
+/// instantiation location that indicates where all lexed tokens should be
+/// "expanded from".
+///
+/// FIXME: It would really be nice to make _Pragma just be a wrapper around a
+/// normal lexer that remaps tokens as they fly by.  This would require making
+/// Preprocessor::Lex virtual.  Given that, we could just dump in a magic lexer
+/// interface that could handle this stuff.  This would pull GetMappedTokenLoc
+/// out of the critical path of the lexer!
+///
+Lexer *Lexer::Create_PragmaLexer(SourceLocation SpellingLoc,
+                                 SourceLocation InstantiationLocStart,
+                                 SourceLocation InstantiationLocEnd,
+                                 unsigned TokLen, Preprocessor &PP) {
+  SourceManager &SM = PP.getSourceManager();
+
+  // Create the lexer as if we were going to lex the file normally.
+  FileID SpellingFID = SM.getFileID(SpellingLoc);
+  const llvm::MemoryBuffer *InputFile = SM.getBuffer(SpellingFID);
+  Lexer *L = new Lexer(SpellingFID, InputFile, PP);
+
+  // Now that the lexer is created, change the start/end locations so that we
+  // just lex the subsection of the file that we want.  This is lexing from a
+  // scratch buffer.
+  const char *StrData = SM.getCharacterData(SpellingLoc);
+
+  L->BufferPtr = StrData;
+  L->BufferEnd = StrData+TokLen;
+  assert(L->BufferEnd[0] == 0 && "Buffer is not nul terminated!");
+
+  // Set the SourceLocation with the remapping information.  This ensures that
+  // GetMappedTokenLoc will remap the tokens as they are lexed.
+  L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),
+                                         InstantiationLocStart,
+                                         InstantiationLocEnd, TokLen);
+
+  // Ensure that the lexer thinks it is inside a directive, so that end \n will
+  // return an EOM token.
+  L->ParsingPreprocessorDirective = true;
+
+  // This lexer really is for _Pragma.
+  L->Is_PragmaLexer = true;
+  return L;
+}
+
+
+/// Stringify - Convert the specified string into a C string, with surrounding
+/// ""'s, and with escaped \ and " characters.
+std::string Lexer::Stringify(const std::string &Str, bool Charify) {
+  std::string Result = Str;
+  char Quote = Charify ? '\'' : '"';
+  for (unsigned i = 0, e = Result.size(); i != e; ++i) {
+    if (Result[i] == '\\' || Result[i] == Quote) {
+      Result.insert(Result.begin()+i, '\\');
+      ++i; ++e;
+    }
+  }
+  return Result;
+}
+
+/// Stringify - Convert the specified string into a C string by escaping '\'
+/// and " characters.  This does not add surrounding ""'s to the string.
+void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] == '\\' || Str[i] == '"') {
+      Str.insert(Str.begin()+i, '\\');
+      ++i; ++e;
+    }
+  }
+}
+
+static bool isWhitespace(unsigned char c);
+
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file.  If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+                                   const SourceManager &SM,
+                                   const LangOptions &LangOpts) {
+  // TODO: this could be special cased for common tokens like identifiers, ')',
+  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
+  // all obviously single-char tokens.  This could use
+  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+  // something.
+
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  Loc = SM.getInstantiationLoc(Loc);
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+  std::pair<const char *,const char *> Buffer = SM.getBufferData(LocInfo.first);
+  const char *StrData = Buffer.first+LocInfo.second;
+
+  if (isWhitespace(StrData[0]))
+    return 0;
+
+  // Create a lexer starting at the beginning of this token.
+  Lexer TheLexer(Loc, LangOpts, Buffer.first, StrData, Buffer.second);
+  TheLexer.SetCommentRetentionState(true);
+  Token TheTok;
+  TheLexer.LexFromRawLexer(TheTok);
+  return TheTok.getLength();
+}
+
+//===----------------------------------------------------------------------===//
+// Character information.
+//===----------------------------------------------------------------------===//
+
+enum {
+  CHAR_HORZ_WS  = 0x01,  // ' ', '\t', '\f', '\v'.  Note, no '\0'
+  CHAR_VERT_WS  = 0x02,  // '\r', '\n'
+  CHAR_LETTER   = 0x04,  // a-z,A-Z
+  CHAR_NUMBER   = 0x08,  // 0-9
+  CHAR_UNDER    = 0x10,  // _
+  CHAR_PERIOD   = 0x20   // .
+};
+
+// Statically initialize CharInfo table based on ASCII character set
+// Reference: FreeBSD 7.2 /usr/share/misc/ascii
+static const unsigned char CharInfo[256] =
+{
+// 0 NUL         1 SOH         2 STX         3 ETX
+// 4 EOT         5 ENQ         6 ACK         7 BEL
+   0           , 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+// 8 BS          9 HT         10 NL         11 VT
+//12 NP         13 CR         14 SO         15 SI
+   0           , CHAR_HORZ_WS, CHAR_VERT_WS, CHAR_HORZ_WS,
+   CHAR_HORZ_WS, CHAR_VERT_WS, 0           , 0           ,
+//16 DLE        17 DC1        18 DC2        19 DC3
+//20 DC4        21 NAK        22 SYN        23 ETB
+   0           , 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//24 CAN        25 EM         26 SUB        27 ESC
+//28 FS         29 GS         30 RS         31 US
+   0           , 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//32 SP         33  !         34  "         35  #
+//36  $         37  %         38  &         39  '
+   CHAR_HORZ_WS, 0           , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//40  (         41  )         42  *         43  +
+//44  ,         45  -         46  .         47  /
+   0           , 0           , 0           , 0           ,
+   0           , 0           , CHAR_PERIOD , 0           ,
+//48  0         49  1         50  2         51  3
+//52  4         53  5         54  6         55  7
+   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
+   CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
+//56  8         57  9         58  :         59  ;
+//60  <         61  =         62  >         63  ?
+   CHAR_NUMBER , CHAR_NUMBER , 0           , 0           ,
+   0           , 0           , 0           , 0           ,
+//64  @         65  A         66  B         67  C
+//68  D         69  E         70  F         71  G
+   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//72  H         73  I         74  J         75  K
+//76  L         77  M         78  N         79  O
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//80  P         81  Q         82  R         83  S
+//84  T         85  U         86  V         87  W
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//88  X         89  Y         90  Z         91  [
+//92  \         93  ]         94  ^         95  _
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
+   0           , 0           , 0           , CHAR_UNDER  ,
+//96  `         97  a         98  b         99  c
+//100  d       101  e        102  f        103  g
+   0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//104  h       105  i        106  j        107  k
+//108  l       109  m        110  n        111  o
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//112  p       113  q        114  r        115  s
+//116  t       117  u        118  v        119  w
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
+//120  x       121  y        122  z        123  {
+//124  |        125  }        126  ~        127 DEL
+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
+   0           , 0           , 0           , 0
+};
+
+static void InitCharacterInfo() {
+  static bool isInited = false;
+  if (isInited) return;
+  // check the statically-initialized CharInfo table
+  assert(CHAR_HORZ_WS == CharInfo[(int)' ']);
+  assert(CHAR_HORZ_WS == CharInfo[(int)'\t']);
+  assert(CHAR_HORZ_WS == CharInfo[(int)'\f']);
+  assert(CHAR_HORZ_WS == CharInfo[(int)'\v']);
+  assert(CHAR_VERT_WS == CharInfo[(int)'\n']);
+  assert(CHAR_VERT_WS == CharInfo[(int)'\r']);
+  assert(CHAR_UNDER   == CharInfo[(int)'_']);
+  assert(CHAR_PERIOD  == CharInfo[(int)'.']);
+  for (unsigned i = 'a'; i <= 'z'; ++i) {
+    assert(CHAR_LETTER == CharInfo[i]);
+    assert(CHAR_LETTER == CharInfo[i+'A'-'a']);
+  }
+  for (unsigned i = '0'; i <= '9'; ++i)
+    assert(CHAR_NUMBER == CharInfo[i]);
+    
+  isInited = true;
+}
+
+
+/// isIdentifierBody - Return true if this is the body character of an
+/// identifier, which is [a-zA-Z0-9_].
+static inline bool isIdentifierBody(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER)) ? true : false;
+}
+
+/// isHorizontalWhitespace - Return true if this character is horizontal
+/// whitespace: ' ', '\t', '\f', '\v'.  Note that this returns false for '\0'.
+static inline bool isHorizontalWhitespace(unsigned char c) {
+  return (CharInfo[c] & CHAR_HORZ_WS) ? true : false;
+}
+
+/// isWhitespace - Return true if this character is horizontal or vertical
+/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.  Note that this returns false
+/// for '\0'.
+static inline bool isWhitespace(unsigned char c) {
+  return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false;
+}
+
+/// isNumberBody - Return true if this is the body character of an
+/// preprocessing number, which is [a-zA-Z0-9_.].
+static inline bool isNumberBody(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD)) ?
+    true : false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Diagnostics forwarding code.
+//===----------------------------------------------------------------------===//
+
+/// GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the
+/// lexer buffer was all instantiated at a single point, perform the mapping.
+/// This is currently only used for _Pragma implementation, so it is the slow
+/// path of the hot getSourceLocation method.  Do not allow it to be inlined.
+static DISABLE_INLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+                                                       SourceLocation FileLoc,
+                                                       unsigned CharNo,
+                                                       unsigned TokLen);
+static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
+                                        SourceLocation FileLoc,
+                                        unsigned CharNo, unsigned TokLen) {
+  assert(FileLoc.isMacroID() && "Must be an instantiation");
+
+  // Otherwise, we're lexing "mapped tokens".  This is used for things like
+  // _Pragma handling.  Combine the instantiation location of FileLoc with the
+  // spelling location.
+  SourceManager &SM = PP.getSourceManager();
+
+  // Create a new SLoc which is expanded from Instantiation(FileLoc) but whose
+  // characters come from spelling(FileLoc)+Offset.
+  SourceLocation SpellingLoc = SM.getSpellingLoc(FileLoc);
+  SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo);
+
+  // Figure out the expansion loc range, which is the range covered by the
+  // original _Pragma(...) sequence.
+  std::pair<SourceLocation,SourceLocation> II =
+    SM.getImmediateInstantiationRange(FileLoc);
+
+  return SM.createInstantiationLoc(SpellingLoc, II.first, II.second, TokLen);
+}
+
+/// getSourceLocation - Return a source location identifier for the specified
+/// offset in the current file.
+SourceLocation Lexer::getSourceLocation(const char *Loc,
+                                        unsigned TokLen) const {
+  assert(Loc >= BufferStart && Loc <= BufferEnd &&
+         "Location out of range for this buffer!");
+
+  // In the normal case, we're just lexing from a simple file buffer, return
+  // the file id from FileLoc with the offset specified.
+  unsigned CharNo = Loc-BufferStart;
+  if (FileLoc.isFileID())
+    return FileLoc.getFileLocWithOffset(CharNo);
+
+  // Otherwise, this is the _Pragma lexer case, which pretends that all of the
+  // tokens are lexed from where the _Pragma was defined.
+  assert(PP && "This doesn't work on raw lexers");
+  return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen);
+}
+
+/// Diag - Forwarding function for diagnostics.  This translate a source
+/// position in the current buffer into a SourceLocation object for rendering.
+DiagnosticBuilder Lexer::Diag(const char *Loc, unsigned DiagID) const {
+  return PP->Diag(getSourceLocation(Loc), DiagID);
+}
+
+//===----------------------------------------------------------------------===//
+// Trigraph and Escaped Newline Handling Code.
+//===----------------------------------------------------------------------===//
+
+/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
+/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
+static char GetTrigraphCharForLetter(char Letter) {
+  switch (Letter) {
+  default:   return 0;
+  case '=':  return '#';
+  case ')':  return ']';
+  case '(':  return '[';
+  case '!':  return '|';
+  case '\'': return '^';
+  case '>':  return '}';
+  case '/':  return '\\';
+  case '<':  return '{';
+  case '-':  return '~';
+  }
+}
+
+/// DecodeTrigraphChar - If the specified character is a legal trigraph when
+/// prefixed with ??, emit a trigraph warning.  If trigraphs are enabled,
+/// return the result character.  Finally, emit a warning about trigraph use
+/// whether trigraphs are enabled or not.
+static char DecodeTrigraphChar(const char *CP, Lexer *L) {
+  char Res = GetTrigraphCharForLetter(*CP);
+  if (!Res || !L) return Res;
+
+  if (!L->getFeatures().Trigraphs) {
+    if (!L->isLexingRawMode())
+      L->Diag(CP-2, diag::trigraph_ignored);
+    return 0;
+  }
+
+  if (!L->isLexingRawMode())
+    L->Diag(CP-2, diag::trigraph_converted) << std::string()+Res;
+  return Res;
+}
+
+/// getEscapedNewLineSize - Return the size of the specified escaped newline,
+/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" or a
+/// trigraph equivalent on entry to this function.
+unsigned Lexer::getEscapedNewLineSize(const char *Ptr) {
+  unsigned Size = 0;
+  while (isWhitespace(Ptr[Size])) {
+    ++Size;
+
+    if (Ptr[Size-1] != '\n' && Ptr[Size-1] != '\r')
+      continue;
+
+    // If this is a \r\n or \n\r, skip the other half.
+    if ((Ptr[Size] == '\r' || Ptr[Size] == '\n') &&
+        Ptr[Size-1] != Ptr[Size])
+      ++Size;
+
+    return Size;
+  }
+
+  // Not an escaped newline, must be a \t or something else.
+  return 0;
+}
+
+/// SkipEscapedNewLines - If P points to an escaped newline (or a series of
+/// them), skip over them and return the first non-escaped-newline found,
+/// otherwise return P.
+const char *Lexer::SkipEscapedNewLines(const char *P) {
+  while (1) {
+    const char *AfterEscape;
+    if (*P == '\\') {
+      AfterEscape = P+1;
+    } else if (*P == '?') {
+      // If not a trigraph for escape, bail out.
+      if (P[1] != '?' || P[2] != '/')
+        return P;
+      AfterEscape = P+3;
+    } else {
+      return P;
+    }
+
+    unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
+    if (NewLineSize == 0) return P;
+    P = AfterEscape+NewLineSize;
+  }
+}
+
+
+/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
+/// get its size, and return it.  This is tricky in several cases:
+///   1. If currently at the start of a trigraph, we warn about the trigraph,
+///      then either return the trigraph (skipping 3 chars) or the '?',
+///      depending on whether trigraphs are enabled or not.
+///   2. If this is an escaped newline (potentially with whitespace between
+///      the backslash and newline), implicitly skip the newline and return
+///      the char after it.
+///   3. If this is a UCN, return it.  FIXME: C++ UCN's?
+///
+/// This handles the slow/uncommon case of the getCharAndSize method.  Here we
+/// know that we can accumulate into Size, and that we have already incremented
+/// Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
+/// be updated to match.
+///
+char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
+                               Token *Tok) {
+  // If we have a slash, look for an escaped newline.
+  if (Ptr[0] == '\\') {
+    ++Size;
+    ++Ptr;
+Slash:
+    // Common case, backslash-char where the char is not whitespace.
+    if (!isWhitespace(Ptr[0])) return '\\';
+
+    // See if we have optional whitespace characters between the slash and
+    // newline.
+    if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
+      // Remember that this token needs to be cleaned.
+      if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+      // Warn if there was whitespace between the backslash and newline.
+      if (Ptr[0] != '\n' && Ptr[0] != '\r' && Tok && !isLexingRawMode())
+        Diag(Ptr, diag::backslash_newline_space);
+
+      // Found backslash<whitespace><newline>.  Parse the char after it.
+      Size += EscapedNewLineSize;
+      Ptr  += EscapedNewLineSize;
+      // Use slow version to accumulate a correct size field.
+      return getCharAndSizeSlow(Ptr, Size, Tok);
+    }
+
+    // Otherwise, this is not an escaped newline, just return the slash.
+    return '\\';
+  }
+
+  // If this is a trigraph, process it.
+  if (Ptr[0] == '?' && Ptr[1] == '?') {
+    // If this is actually a legal trigraph (not something like "??x"), emit
+    // a trigraph warning.  If so, and if trigraphs are enabled, return it.
+    if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
+      // Remember that this token needs to be cleaned.
+      if (Tok) Tok->setFlag(Token::NeedsCleaning);
+
+      Ptr += 3;
+      Size += 3;
+      if (C == '\\') goto Slash;
+      return C;
+    }
+  }
+
+  // If this is neither, return a single character.
+  ++Size;
+  return *Ptr;
+}
+
+
+/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
+/// getCharAndSizeNoWarn method.  Here we know that we can accumulate into Size,
+/// and that we have already incremented Ptr by Size bytes.
+///
+/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
+/// be updated to match.
+char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+                                     const LangOptions &Features) {
+  // If we have a slash, look for an escaped newline.
+  if (Ptr[0] == '\\') {
+    ++Size;
+    ++Ptr;
+Slash:
+    // Common case, backslash-char where the char is not whitespace.
+    if (!isWhitespace(Ptr[0])) return '\\';
+
+    // See if we have optional whitespace characters followed by a newline.
+    if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
+      // Found backslash<whitespace><newline>.  Parse the char after it.
+      Size += EscapedNewLineSize;
+      Ptr  += EscapedNewLineSize;
+
+      // Use slow version to accumulate a correct size field.
+      return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
+    }
+
+    // Otherwise, this is not an escaped newline, just return the slash.
+    return '\\';
+  }
+
+  // If this is a trigraph, process it.
+  if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
+    // If this is actually a legal trigraph (not something like "??x"), return
+    // it.
+    if (char C = GetTrigraphCharForLetter(Ptr[2])) {
+      Ptr += 3;
+      Size += 3;
+      if (C == '\\') goto Slash;
+      return C;
+    }
+  }
+
+  // If this is neither, return a single character.
+  ++Size;
+  return *Ptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper methods for lexing.
+//===----------------------------------------------------------------------===//
+
+void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
+  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+  unsigned Size;
+  unsigned char C = *CurPtr++;
+  while (isIdentifierBody(C))
+    C = *CurPtr++;
+
+  --CurPtr;   // Back up over the skipped character.
+
+  // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
+  // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
+  // FIXME: UCNs.
+  //
+  // TODO: Could merge these checks into a CharInfo flag to make the comparison
+  // cheaper
+  if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
+FinishIdentifier:
+    const char *IdStart = BufferPtr;
+    FormTokenWithChars(Result, CurPtr, tok::identifier);
+
+    // If we are in raw mode, return this identifier raw.  There is no need to
+    // look up identifier information or attempt to macro expand it.
+    if (LexingRawMode) return;
+
+    // Fill in Result.IdentifierInfo, looking up the identifier in the
+    // identifier table.
+    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
+
+    // Change the kind of this identifier to the appropriate token kind, e.g.
+    // turning "for" into a keyword.
+    Result.setKind(II->getTokenID());
+
+    // Finally, now that we know we have an identifier, pass this off to the
+    // preprocessor, which may macro expand it or something.
+    if (II->isHandleIdentifierCase())
+      PP->HandleIdentifier(Result);
+    return;
+  }
+
+  // Otherwise, $,\,? in identifier found.  Enter slower path.
+
+  C = getCharAndSize(CurPtr, Size);
+  while (1) {
+    if (C == '$') {
+      // If we hit a $ and they are not supported in identifiers, we are done.
+      if (!Features.DollarIdents) goto FinishIdentifier;
+
+      // Otherwise, emit a diagnostic and continue.
+      if (!isLexingRawMode())
+        Diag(CurPtr, diag::ext_dollar_in_identifier);
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
+      // Found end of identifier.
+      goto FinishIdentifier;
+    }
+
+    // Otherwise, this character is good, consume it.
+    CurPtr = ConsumeChar(CurPtr, Size, Result);
+
+    C = getCharAndSize(CurPtr, Size);
+    while (isIdentifierBody(C)) { // FIXME: UCNs.
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+    }
+  }
+}
+
+
+/// LexNumericConstant - Lex the remainder of a integer or floating point
+/// constant. From[-1] is the first character lexed.  Return the end of the
+/// constant.
+void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
+  unsigned Size;
+  char C = getCharAndSize(CurPtr, Size);
+  char PrevCh = 0;
+  while (isNumberBody(C)) { // FIXME: UCNs?
+    CurPtr = ConsumeChar(CurPtr, Size, Result);
+    PrevCh = C;
+    C = getCharAndSize(CurPtr, Size);
+  }
+
+  // If we fell out, check for a sign, due to 1e+12.  If we have one, continue.
+  if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e'))
+    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+
+  // If we have a hex FP constant, continue.
+  if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p') &&
+      (!PP || !PP->getLangOptions().CPlusPlus0x))
+    return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
+
+  // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
+  FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
+  Result.setLiteralData(TokStart);
+}
+
+/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
+/// either " or L".
+void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
+  const char *NulCharacter = 0; // Does this string contain the \0 character?
+
+  char C = getAndAdvanceChar(CurPtr, Result);
+  while (C != '"') {
+    // Skip escaped characters.
+    if (C == '\\') {
+      // Skip the escaped character.
+      C = getAndAdvanceChar(CurPtr, Result);
+    } else if (C == '\n' || C == '\r' ||             // Newline.
+               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+      if (!isLexingRawMode() && !Features.AsmPreprocessor)
+        Diag(BufferPtr, diag::err_unterminated_string);
+      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+      return;
+    } else if (C == 0) {
+      NulCharacter = CurPtr-1;
+    }
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+
+  // If a nul character existed in the string, warn about it.
+  if (NulCharacter && !isLexingRawMode())
+    Diag(NulCharacter, diag::null_in_string);
+
+  // Update the location of the token as well as the BufferPtr instance var.
+  const char *TokStart = BufferPtr;
+  FormTokenWithChars(Result, CurPtr,
+                     Wide ? tok::wide_string_literal : tok::string_literal);
+  Result.setLiteralData(TokStart);
+}
+
+/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
+/// after having lexed the '<' character.  This is used for #include filenames.
+void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this string contain the \0 character?
+  const char *AfterLessPos = CurPtr;
+  char C = getAndAdvanceChar(CurPtr, Result);
+  while (C != '>') {
+    // Skip escaped characters.
+    if (C == '\\') {
+      // Skip the escaped character.
+      C = getAndAdvanceChar(CurPtr, Result);
+    } else if (C == '\n' || C == '\r' ||             // Newline.
+               (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
+      // If the filename is unterminated, then it must just be a lone <
+      // character.  Return this as such.
+      FormTokenWithChars(Result, AfterLessPos, tok::less);
+      return;
+    } else if (C == 0) {
+      NulCharacter = CurPtr-1;
+    }
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+
+  // If a nul character existed in the string, warn about it.
+  if (NulCharacter && !isLexingRawMode())
+    Diag(NulCharacter, diag::null_in_string);
+
+  // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
+  FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
+  Result.setLiteralData(TokStart);
+}
+
+
+/// LexCharConstant - Lex the remainder of a character constant, after having
+/// lexed either ' or L'.
+void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
+  const char *NulCharacter = 0; // Does this character contain the \0 character?
+
+  // Handle the common case of 'x' and '\y' efficiently.
+  char C = getAndAdvanceChar(CurPtr, Result);
+  if (C == '\'') {
+    if (!isLexingRawMode() && !Features.AsmPreprocessor)
+      Diag(BufferPtr, diag::err_empty_character);
+    FormTokenWithChars(Result, CurPtr, tok::unknown);
+    return;
+  } else if (C == '\\') {
+    // Skip the escaped character.
+    // FIXME: UCN's.
+    C = getAndAdvanceChar(CurPtr, Result);
+  }
+
+  if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
+    ++CurPtr;
+  } else {
+    // Fall back on generic code for embedded nulls, newlines, wide chars.
+    do {
+      // Skip escaped characters.
+      if (C == '\\') {
+        // Skip the escaped character.
+        C = getAndAdvanceChar(CurPtr, Result);
+      } else if (C == '\n' || C == '\r' ||               // Newline.
+                 (C == 0 && CurPtr-1 == BufferEnd)) {    // End of file.
+        if (!isLexingRawMode() && !Features.AsmPreprocessor)
+          Diag(BufferPtr, diag::err_unterminated_char);
+        FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+        return;
+      } else if (C == 0) {
+        NulCharacter = CurPtr-1;
+      }
+      C = getAndAdvanceChar(CurPtr, Result);
+    } while (C != '\'');
+  }
+
+  if (NulCharacter && !isLexingRawMode())
+    Diag(NulCharacter, diag::null_in_char);
+
+  // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
+  FormTokenWithChars(Result, CurPtr, tok::char_constant);
+  Result.setLiteralData(TokStart);
+}
+
+/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
+/// Update BufferPtr to point to the next non-whitespace character and return.
+///
+/// This method forms a token and returns true if KeepWhitespaceMode is enabled.
+///
+bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
+  // Whitespace - Skip it, then return the token after the whitespace.
+  unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
+  while (1) {
+    // Skip horizontal whitespace very aggressively.
+    while (isHorizontalWhitespace(Char))
+      Char = *++CurPtr;
+
+    // Otherwise if we have something other than whitespace, we're done.
+    if (Char != '\n' && Char != '\r')
+      break;
+
+    if (ParsingPreprocessorDirective) {
+      // End of preprocessor directive line, let LexTokenInternal handle this.
+      BufferPtr = CurPtr;
+      return false;
+    }
+
+    // ok, but handle newline.
+    // The returned token is at the start of the line.
+    Result.setFlag(Token::StartOfLine);
+    // No leading whitespace seen so far.
+    Result.clearFlag(Token::LeadingSpace);
+    Char = *++CurPtr;
+  }
+
+  // If this isn't immediately after a newline, there is leading space.
+  char PrevChar = CurPtr[-1];
+  if (PrevChar != '\n' && PrevChar != '\r')
+    Result.setFlag(Token::LeadingSpace);
+
+  // If the client wants us to return whitespace, return it now.
+  if (isKeepWhitespaceMode()) {
+    FormTokenWithChars(Result, CurPtr, tok::unknown);
+    return true;
+  }
+
+  BufferPtr = CurPtr;
+  return false;
+}
+
+// SkipBCPLComment - We have just read the // characters from input.  Skip until
+// we find the newline character thats terminate the comment.  Then update
+/// BufferPtr and return.
+///
+/// If we're in KeepCommentMode or any CommentHandler has inserted
+/// some tokens, this will store the first token and return true.
+bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) {
+  // If BCPL comments aren't explicitly enabled for this language, emit an
+  // extension warning.
+  if (!Features.BCPLComment && !isLexingRawMode()) {
+    Diag(BufferPtr, diag::ext_bcpl_comment);
+
+    // Mark them enabled so we only emit one warning for this translation
+    // unit.
+    Features.BCPLComment = true;
+  }
+
+  // Scan over the body of the comment.  The common case, when scanning, is that
+  // the comment contains normal ascii characters with nothing interesting in
+  // them.  As such, optimize for this case with the inner loop.
+  char C;
+  do {
+    C = *CurPtr;
+    // FIXME: Speedup BCPL comment lexing.  Just scan for a \n or \r character.
+    // If we find a \n character, scan backwards, checking to see if it's an
+    // escaped newline, like we do for block comments.
+
+    // Skip over characters in the fast loop.
+    while (C != 0 &&                // Potentially EOF.
+           C != '\\' &&             // Potentially escaped newline.
+           C != '?' &&              // Potentially trigraph.
+           C != '\n' && C != '\r')  // Newline or DOS-style newline.
+      C = *++CurPtr;
+
+    // If this is a newline, we're done.
+    if (C == '\n' || C == '\r')
+      break;  // Found the newline? Break out!
+
+    // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to
+    // properly decode the character.  Read it in raw mode to avoid emitting
+    // diagnostics about things like trigraphs.  If we see an escaped newline,
+    // we'll handle it below.
+    const char *OldPtr = CurPtr;
+    bool OldRawMode = isLexingRawMode();
+    LexingRawMode = true;
+    C = getAndAdvanceChar(CurPtr, Result);
+    LexingRawMode = OldRawMode;
+
+    // If the char that we finally got was a \n, then we must have had something
+    // like \<newline><newline>.  We don't want to have consumed the second
+    // newline, we want CurPtr, to end up pointing to it down below.
+    if (C == '\n' || C == '\r') {
+      --CurPtr;
+      C = 'x'; // doesn't matter what this is.
+    }
+
+    // If we read multiple characters, and one of those characters was a \r or
+    // \n, then we had an escaped newline within the comment.  Emit diagnostic
+    // unless the next line is also a // comment.
+    if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
+      for (; OldPtr != CurPtr; ++OldPtr)
+        if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
+          // Okay, we found a // comment that ends in a newline, if the next
+          // line is also a // comment, but has spaces, don't emit a diagnostic.
+          if (isspace(C)) {
+            const char *ForwardPtr = CurPtr;
+            while (isspace(*ForwardPtr))  // Skip whitespace.
+              ++ForwardPtr;
+            if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
+              break;
+          }
+
+          if (!isLexingRawMode())
+            Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
+          break;
+        }
+    }
+
+    if (CurPtr == BufferEnd+1) { --CurPtr; break; }
+  } while (C != '\n' && C != '\r');
+
+  // Found but did not consume the newline.  Notify comment handlers about the
+  // comment unless we're in a #if 0 block.
+  if (PP && !isLexingRawMode() &&
+      PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr),
+                                            getSourceLocation(CurPtr)))) {
+    BufferPtr = CurPtr;
+    return true; // A token has to be returned.
+  }
+
+  // If we are returning comments as tokens, return this comment as a token.
+  if (inKeepCommentMode())
+    return SaveBCPLComment(Result, CurPtr);
+
+  // If we are inside a preprocessor directive and we see the end of line,
+  // return immediately, so that the lexer can return this as an EOM token.
+  if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
+    BufferPtr = CurPtr;
+    return false;
+  }
+
+  // Otherwise, eat the \n character.  We don't care if this is a \n\r or
+  // \r\n sequence.  This is an efficiency hack (because we know the \n can't
+  // contribute to another token), it isn't needed for correctness.  Note that
+  // this is ok even in KeepWhitespaceMode, because we would have returned the
+  /// comment above in that mode.
+  ++CurPtr;
+
+  // The next returned token is at the start of the line.
+  Result.setFlag(Token::StartOfLine);
+  // No leading whitespace seen so far.
+  Result.clearFlag(Token::LeadingSpace);
+  BufferPtr = CurPtr;
+  return false;
+}
+
+/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
+/// an appropriate way and return it.
+bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
+  // If we're not in a preprocessor directive, just return the // comment
+  // directly.
+  FormTokenWithChars(Result, CurPtr, tok::comment);
+
+  if (!ParsingPreprocessorDirective)
+    return true;
+
+  // If this BCPL-style comment is in a macro definition, transmogrify it into
+  // a C-style block comment.
+  std::string Spelling = PP->getSpelling(Result);
+  assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
+  Spelling[1] = '*';   // Change prefix to "/*".
+  Spelling += "*/";    // add suffix.
+
+  Result.setKind(tok::comment);
+  PP->CreateString(&Spelling[0], Spelling.size(), Result,
+                   Result.getLocation());
+  return true;
+}
+
+/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
+/// character (either \n or \r) is part of an escaped newline sequence.  Issue a
+/// diagnostic if so.  We know that the newline is inside of a block comment.
+static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
+                                                  Lexer *L) {
+  assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+
+  // Back up off the newline.
+  --CurPtr;
+
+  // If this is a two-character newline sequence, skip the other character.
+  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+    // \n\n or \r\r -> not escaped newline.
+    if (CurPtr[0] == CurPtr[1])
+      return false;
+    // \n\r or \r\n -> skip the newline.
+    --CurPtr;
+  }
+
+  // If we have horizontal whitespace, skip over it.  We allow whitespace
+  // between the slash and newline.
+  bool HasSpace = false;
+  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+    --CurPtr;
+    HasSpace = true;
+  }
+
+  // If we have a slash, we know this is an escaped newline.
+  if (*CurPtr == '\\') {
+    if (CurPtr[-1] != '*') return false;
+  } else {
+    // It isn't a slash, is it the ?? / trigraph?
+    if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
+        CurPtr[-3] != '*')
+      return false;
+
+    // This is the trigraph ending the comment.  Emit a stern warning!
+    CurPtr -= 2;
+
+    // If no trigraphs are enabled, warn that we ignored this trigraph and
+    // ignore this * character.
+    if (!L->getFeatures().Trigraphs) {
+      if (!L->isLexingRawMode())
+        L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+      return false;
+    }
+    if (!L->isLexingRawMode())
+      L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+  }
+
+  // Warn about having an escaped newline between the */ characters.
+  if (!L->isLexingRawMode())
+    L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+
+  // If there was space between the backslash and newline, warn about it.
+  if (HasSpace && !L->isLexingRawMode())
+    L->Diag(CurPtr, diag::backslash_newline_space);
+
+  return true;
+}
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#elif __ALTIVEC__
+#include <altivec.h>
+#undef bool
+#endif
+
+/// SkipBlockComment - We have just read the /* characters from input.  Read
+/// until we find the */ characters that terminate the comment.  Note that we
+/// don't bother decoding trigraphs or escaped newlines in block comments,
+/// because they cannot cause the comment to end.  The only thing that can
+/// happen is the comment could end with an escaped newline between the */ end
+/// of comment.
+///
+/// If we're in KeepCommentMode or any CommentHandler has inserted
+/// some tokens, this will store the first token and return true.
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
+  // Scan one character past where we should, looking for a '/' character.  Once
+  // we find it, check to see if it was preceeded by a *.  This common
+  // optimization helps people who like to put a lot of * characters in their
+  // comments.
+
+  // The first character we get with newlines and trigraphs skipped to handle
+  // the degenerate /*/ case below correctly if the * has an escaped newline
+  // after it.
+  unsigned CharSize;
+  unsigned char C = getCharAndSize(CurPtr, CharSize);
+  CurPtr += CharSize;
+  if (C == 0 && CurPtr == BufferEnd+1) {
+    if (!isLexingRawMode())
+      Diag(BufferPtr, diag::err_unterminated_block_comment);
+    --CurPtr;
+
+    // KeepWhitespaceMode should return this broken comment as a token.  Since
+    // it isn't a well formed comment, just return it as an 'unknown' token.
+    if (isKeepWhitespaceMode()) {
+      FormTokenWithChars(Result, CurPtr, tok::unknown);
+      return true;
+    }
+
+    BufferPtr = CurPtr;
+    return false;
+  }
+
+  // Check to see if the first character after the '/*' is another /.  If so,
+  // then this slash does not end the block comment, it is part of it.
+  if (C == '/')
+    C = *CurPtr++;
+
+  while (1) {
+    // Skip over all non-interesting characters until we find end of buffer or a
+    // (probably ending) '/' character.
+    if (CurPtr + 24 < BufferEnd) {
+      // While not aligned to a 16-byte boundary.
+      while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
+        C = *CurPtr++;
+
+      if (C == '/') goto FoundSlash;
+
+#ifdef __SSE2__
+      __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/',
+                                     '/', '/', '/', '/', '/', '/', '/', '/');
+      while (CurPtr+16 <= BufferEnd &&
+             _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0)
+        CurPtr += 16;
+#elif __ALTIVEC__
+      __vector unsigned char Slashes = {
+        '/', '/', '/', '/',  '/', '/', '/', '/',
+        '/', '/', '/', '/',  '/', '/', '/', '/'
+      };
+      while (CurPtr+16 <= BufferEnd &&
+             !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes))
+        CurPtr += 16;
+#else
+      // Scan for '/' quickly.  Many block comments are very large.
+      while (CurPtr[0] != '/' &&
+             CurPtr[1] != '/' &&
+             CurPtr[2] != '/' &&
+             CurPtr[3] != '/' &&
+             CurPtr+4 < BufferEnd) {
+        CurPtr += 4;
+      }
+#endif
+
+      // It has to be one of the bytes scanned, increment to it and read one.
+      C = *CurPtr++;
+    }
+
+    // Loop to scan the remainder.
+    while (C != '/' && C != '\0')
+      C = *CurPtr++;
+
+  FoundSlash:
+    if (C == '/') {
+      if (CurPtr[-2] == '*')  // We found the final */.  We're done!
+        break;
+
+      if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) {
+        if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
+          // We found the final */, though it had an escaped newline between the
+          // * and /.  We're done!
+          break;
+        }
+      }
+      if (CurPtr[0] == '*' && CurPtr[1] != '/') {
+        // If this is a /* inside of the comment, emit a warning.  Don't do this
+        // if this is a /*/, which will end the comment.  This misses cases with
+        // embedded escaped newlines, but oh well.
+        if (!isLexingRawMode())
+          Diag(CurPtr-1, diag::warn_nested_block_comment);
+      }
+    } else if (C == 0 && CurPtr == BufferEnd+1) {
+      if (!isLexingRawMode())
+        Diag(BufferPtr, diag::err_unterminated_block_comment);
+      // Note: the user probably forgot a */.  We could continue immediately
+      // after the /*, but this would involve lexing a lot of what really is the
+      // comment, which surely would confuse the parser.
+      --CurPtr;
+
+      // KeepWhitespaceMode should return this broken comment as a token.  Since
+      // it isn't a well formed comment, just return it as an 'unknown' token.
+      if (isKeepWhitespaceMode()) {
+        FormTokenWithChars(Result, CurPtr, tok::unknown);
+        return true;
+      }
+
+      BufferPtr = CurPtr;
+      return false;
+    }
+    C = *CurPtr++;
+  }
+
+  // Notify comment handlers about the comment unless we're in a #if 0 block.
+  if (PP && !isLexingRawMode() &&
+      PP->HandleComment(Result, SourceRange(getSourceLocation(BufferPtr),
+                                            getSourceLocation(CurPtr)))) {
+    BufferPtr = CurPtr;
+    return true; // A token has to be returned.
+  }
+
+  // If we are returning comments as tokens, return this comment as a token.
+  if (inKeepCommentMode()) {
+    FormTokenWithChars(Result, CurPtr, tok::comment);
+    return true;
+  }
+
+  // It is common for the tokens immediately after a /**/ comment to be
+  // whitespace.  Instead of going through the big switch, handle it
+  // efficiently now.  This is safe even in KeepWhitespaceMode because we would
+  // have already returned above with the comment as a token.
+  if (isHorizontalWhitespace(*CurPtr)) {
+    Result.setFlag(Token::LeadingSpace);
+    SkipWhitespace(Result, CurPtr+1);
+    return false;
+  }
+
+  // Otherwise, just return so that the next character will be lexed as a token.
+  BufferPtr = CurPtr;
+  Result.setFlag(Token::LeadingSpace);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Primary Lexing Entry Points
+//===----------------------------------------------------------------------===//
+
+/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
+/// uninterpreted string.  This switches the lexer out of directive mode.
+std::string Lexer::ReadToEndOfLine() {
+  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+  std::string Result;
+  Token Tmp;
+
+  // CurPtr - Cache BufferPtr in an automatic variable.
+  const char *CurPtr = BufferPtr;
+  while (1) {
+    char Char = getAndAdvanceChar(CurPtr, Tmp);
+    switch (Char) {
+    default:
+      Result += Char;
+      break;
+    case 0:  // Null.
+      // Found end of file?
+      if (CurPtr-1 != BufferEnd) {
+        // Nope, normal character, continue.
+        Result += Char;
+        break;
+      }
+      // FALL THROUGH.
+    case '\r':
+    case '\n':
+      // Okay, we found the end of the line. First, back up past the \0, \r, \n.
+      assert(CurPtr[-1] == Char && "Trigraphs for newline?");
+      BufferPtr = CurPtr-1;
+
+      // Next, lex the character, which should handle the EOM transition.
+      Lex(Tmp);
+      assert(Tmp.is(tok::eom) && "Unexpected token!");
+
+      // Finally, we're done, return the string we found.
+      return Result;
+    }
+  }
+}
+
+/// LexEndOfFile - CurPtr points to the end of this file.  Handle this
+/// condition, reporting diagnostics and handling other edge cases as required.
+/// This returns true if Result contains a token, false if PP.Lex should be
+/// called again.
+bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
+  // If we hit the end of the file while parsing a preprocessor directive,
+  // end the preprocessor directive first.  The next token returned will
+  // then be the end of file.
+  if (ParsingPreprocessorDirective) {
+    // Done parsing the "line".
+    ParsingPreprocessorDirective = false;
+    // Update the location of token as well as BufferPtr.
+    FormTokenWithChars(Result, CurPtr, tok::eom);
+
+    // Restore comment saving mode, in case it was disabled for directive.
+    SetCommentRetentionState(PP->getCommentRetentionState());
+    return true;  // Have a token.
+  }
+ 
+  // If we are in raw mode, return this event as an EOF token.  Let the caller
+  // that put us in raw mode handle the event.
+  if (isLexingRawMode()) {
+    Result.startToken();
+    BufferPtr = BufferEnd;
+    FormTokenWithChars(Result, BufferEnd, tok::eof);
+    return true;
+  }
+
+  // Otherwise, check if we are code-completing, then issue diagnostics for 
+  // unterminated #if and missing newline.
+
+  if (PP && PP->isCodeCompletionFile(FileLoc)) {
+    // We're at the end of the file, but we've been asked to consider the
+    // end of the file to be a code-completion token. Return the
+    // code-completion token.
+    Result.startToken();
+    FormTokenWithChars(Result, CurPtr, tok::code_completion);
+    
+    // Only do the eof -> code_completion translation once.
+    PP->SetCodeCompletionPoint(0, 0, 0);
+    return true;
+  }
+  
+  // If we are in a #if directive, emit an error.
+  while (!ConditionalStack.empty()) {
+    PP->Diag(ConditionalStack.back().IfLoc,
+             diag::err_pp_unterminated_conditional);
+    ConditionalStack.pop_back();
+  }
+
+  // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
+  // a pedwarn.
+  if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r'))
+    Diag(BufferEnd, diag::ext_no_newline_eof)
+      << CodeModificationHint::CreateInsertion(getSourceLocation(BufferEnd),
+                                               "\n");
+
+  BufferPtr = CurPtr;
+
+  // Finally, let the preprocessor handle this.
+  return PP->HandleEndOfFile(Result);
+}
+
+/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
+/// the specified lexer will return a tok::l_paren token, 0 if it is something
+/// else and 2 if there are no more tokens in the buffer controlled by the
+/// lexer.
+unsigned Lexer::isNextPPTokenLParen() {
+  assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
+
+  // Switch to 'skipping' mode.  This will ensure that we can lex a token
+  // without emitting diagnostics, disables macro expansion, and will cause EOF
+  // to return an EOF token instead of popping the include stack.
+  LexingRawMode = true;
+
+  // Save state that can be changed while lexing so that we can restore it.
+  const char *TmpBufferPtr = BufferPtr;
+  bool inPPDirectiveMode = ParsingPreprocessorDirective;
+
+  Token Tok;
+  Tok.startToken();
+  LexTokenInternal(Tok);
+
+  // Restore state that may have changed.
+  BufferPtr = TmpBufferPtr;
+  ParsingPreprocessorDirective = inPPDirectiveMode;
+
+  // Restore the lexer back to non-skipping mode.
+  LexingRawMode = false;
+
+  if (Tok.is(tok::eof))
+    return 2;
+  return Tok.is(tok::l_paren);
+}
+
+/// FindConflictEnd - Find the end of a version control conflict marker.
+static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
+  llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
+  size_t Pos = RestOfBuffer.find(">>>>>>>");
+  while (Pos != llvm::StringRef::npos) {
+    // Must occur at start of line.
+    if (RestOfBuffer[Pos-1] != '\r' &&
+        RestOfBuffer[Pos-1] != '\n') {
+      RestOfBuffer = RestOfBuffer.substr(Pos+7);
+      continue;
+    }
+    return RestOfBuffer.data()+Pos;
+  }
+  return 0;
+}
+
+/// IsStartOfConflictMarker - If the specified pointer is the start of a version
+/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
+/// and recover nicely.  This returns true if it is a conflict marker and false
+/// if not.
+bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
+  // Only a conflict marker if it starts at the beginning of a line.
+  if (CurPtr != BufferStart &&
+      CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+    return false;
+  
+  // Check to see if we have <<<<<<<.
+  if (BufferEnd-CurPtr < 8 ||
+      llvm::StringRef(CurPtr, 7) != "<<<<<<<")
+    return false;
+
+  // If we have a situation where we don't care about conflict markers, ignore
+  // it.
+  if (IsInConflictMarker || isLexingRawMode())
+    return false;
+  
+  // Check to see if there is a >>>>>>> somewhere in the buffer at the start of
+  // a line to terminate this conflict marker.
+  if (FindConflictEnd(CurPtr+7, BufferEnd)) {
+    // We found a match.  We are really in a conflict marker.
+    // Diagnose this, and ignore to the end of line.
+    Diag(CurPtr, diag::err_conflict_marker);
+    IsInConflictMarker = true;
+    
+    // Skip ahead to the end of line.  We know this exists because the
+    // end-of-conflict marker starts with \r or \n.
+    while (*CurPtr != '\r' && *CurPtr != '\n') {
+      assert(CurPtr != BufferEnd && "Didn't find end of line");
+      ++CurPtr;
+    }
+    BufferPtr = CurPtr;
+    return true;
+  }
+  
+  // No end of conflict marker found.
+  return false;
+}
+
+
+/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
+/// marker, then it is the end of a conflict marker.  Handle it by ignoring up
+/// until the end of the line.  This returns true if it is a conflict marker and
+/// false if not.
+bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
+  // Only a conflict marker if it starts at the beginning of a line.
+  if (CurPtr != BufferStart &&
+      CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+    return false;
+  
+  // If we have a situation where we don't care about conflict markers, ignore
+  // it.
+  if (!IsInConflictMarker || isLexingRawMode())
+    return false;
+  
+  // Check to see if we have the marker (7 characters in a row).
+  for (unsigned i = 1; i != 7; ++i)
+    if (CurPtr[i] != CurPtr[0])
+      return false;
+  
+  // If we do have it, search for the end of the conflict marker.  This could
+  // fail if it got skipped with a '#if 0' or something.  Note that CurPtr might
+  // be the end of conflict marker.
+  if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
+    CurPtr = End;
+    
+    // Skip ahead to the end of line.
+    while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
+      ++CurPtr;
+    
+    BufferPtr = CurPtr;
+    
+    // No longer in the conflict marker.
+    IsInConflictMarker = false;
+    return true;
+  }
+  
+  return false;
+}
+
+
+/// LexTokenInternal - This implements a simple C family lexer.  It is an
+/// extremely performance critical piece of code.  This assumes that the buffer
+/// has a null character at the end of the file.  This returns a preprocessing
+/// token, not a normal token, as such, it is an internal interface.  It assumes
+/// that the Flags of result have been cleared before calling this.
+void Lexer::LexTokenInternal(Token &Result) {
+LexNextToken:
+  // New token, can't need cleaning yet.
+  Result.clearFlag(Token::NeedsCleaning);
+  Result.setIdentifierInfo(0);
+
+  // CurPtr - Cache BufferPtr in an automatic variable.
+  const char *CurPtr = BufferPtr;
+
+  // Small amounts of horizontal whitespace is very common between tokens.
+  if ((*CurPtr == ' ') || (*CurPtr == '\t')) {
+    ++CurPtr;
+    while ((*CurPtr == ' ') || (*CurPtr == '\t'))
+      ++CurPtr;
+
+    // If we are keeping whitespace and other tokens, just return what we just
+    // skipped.  The next lexer invocation will return the token after the
+    // whitespace.
+    if (isKeepWhitespaceMode()) {
+      FormTokenWithChars(Result, CurPtr, tok::unknown);
+      return;
+    }
+
+    BufferPtr = CurPtr;
+    Result.setFlag(Token::LeadingSpace);
+  }
+
+  unsigned SizeTmp, SizeTmp2;   // Temporaries for use in cases below.
+
+  // Read a character, advancing over it.
+  char Char = getAndAdvanceChar(CurPtr, Result);
+  tok::TokenKind Kind;
+
+  switch (Char) {
+  case 0:  // Null.
+    // Found end of file?
+    if (CurPtr-1 == BufferEnd) {
+      // Read the PP instance variable into an automatic variable, because
+      // LexEndOfFile will often delete 'this'.
+      Preprocessor *PPCache = PP;
+      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
+        return;   // Got a token to return.
+      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+      return PPCache->Lex(Result);
+    }
+
+    if (!isLexingRawMode())
+      Diag(CurPtr-1, diag::null_in_file);
+    Result.setFlag(Token::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr))
+      return; // KeepWhitespaceMode
+
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+      
+  case 26:  // DOS & CP/M EOF: "^Z".
+    // If we're in Microsoft extensions mode, treat this as end of file.
+    if (Features.Microsoft) {
+      // Read the PP instance variable into an automatic variable, because
+      // LexEndOfFile will often delete 'this'.
+      Preprocessor *PPCache = PP;
+      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
+        return;   // Got a token to return.
+      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+      return PPCache->Lex(Result);
+    }
+    // If Microsoft extensions are disabled, this is just random garbage.
+    Kind = tok::unknown;
+    break;
+      
+  case '\n':
+  case '\r':
+    // If we are inside a preprocessor directive and we see the end of line,
+    // we know we are done with the directive, so return an EOM token.
+    if (ParsingPreprocessorDirective) {
+      // Done parsing the "line".
+      ParsingPreprocessorDirective = false;
+
+      // Restore comment saving mode, in case it was disabled for directive.
+      SetCommentRetentionState(PP->getCommentRetentionState());
+
+      // Since we consumed a newline, we are back at the start of a line.
+      IsAtStartOfLine = true;
+
+      Kind = tok::eom;
+      break;
+    }
+    // The returned token is at the start of the line.
+    Result.setFlag(Token::StartOfLine);
+    // No leading whitespace seen so far.
+    Result.clearFlag(Token::LeadingSpace);
+
+    if (SkipWhitespace(Result, CurPtr))
+      return; // KeepWhitespaceMode
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+  case ' ':
+  case '\t':
+  case '\f':
+  case '\v':
+  SkipHorizontalWhitespace:
+    Result.setFlag(Token::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr))
+      return; // KeepWhitespaceMode
+
+  SkipIgnoredUnits:
+    CurPtr = BufferPtr;
+
+    // If the next token is obviously a // or /* */ comment, skip it efficiently
+    // too (without going through the big switch stmt).
+    if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
+        Features.BCPLComment) {
+      if (SkipBCPLComment(Result, CurPtr+2))
+        return; // There is a token to return.
+      goto SkipIgnoredUnits;
+    } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
+      if (SkipBlockComment(Result, CurPtr+2))
+        return; // There is a token to return.
+      goto SkipIgnoredUnits;
+    } else if (isHorizontalWhitespace(*CurPtr)) {
+      goto SkipHorizontalWhitespace;
+    }
+    goto LexNextToken;   // GCC isn't tail call eliminating.
+      
+  // C99 6.4.4.1: Integer Constants.
+  // C99 6.4.4.2: Floating Constants.
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexNumericConstant(Result, CurPtr);
+
+  case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    Char = getCharAndSize(CurPtr, SizeTmp);
+
+    // Wide string literal.
+    if (Char == '"')
+      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                              true);
+
+    // Wide character constant.
+    if (Char == '\'')
+      return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+    // FALL THROUGH, treating L like the start of an identifier.
+
+  // C99 6.4.2: Identifiers.
+  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
+  case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
+  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
+  case 'V': case 'W': case 'X': case 'Y': case 'Z':
+  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
+  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
+  case 'v': case 'w': case 'x': case 'y': case 'z':
+  case '_':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexIdentifier(Result, CurPtr);
+
+  case '$':   // $ in identifiers.
+    if (Features.DollarIdents) {
+      if (!isLexingRawMode())
+        Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
+      return LexIdentifier(Result, CurPtr);
+    }
+
+    Kind = tok::unknown;
+    break;
+
+  // C99 6.4.4: Character Constants.
+  case '\'':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexCharConstant(Result, CurPtr);
+
+  // C99 6.4.5: String Literals.
+  case '"':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
+    return LexStringLiteral(Result, CurPtr, false);
+
+  // C99 6.4.6: Punctuators.
+  case '?':
+    Kind = tok::question;
+    break;
+  case '[':
+    Kind = tok::l_square;
+    break;
+  case ']':
+    Kind = tok::r_square;
+    break;
+  case '(':
+    Kind = tok::l_paren;
+    break;
+  case ')':
+    Kind = tok::r_paren;
+    break;
+  case '{':
+    Kind = tok::l_brace;
+    break;
+  case '}':
+    Kind = tok::r_brace;
+    break;
+  case '.':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char >= '0' && Char <= '9') {
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
+
+      return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+    } else if (Features.CPlusPlus && Char == '*') {
+      Kind = tok::periodstar;
+      CurPtr += SizeTmp;
+    } else if (Char == '.' &&
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
+      Kind = tok::ellipsis;
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+    } else {
+      Kind = tok::period;
+    }
+    break;
+  case '&':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '&') {
+      Kind = tok::ampamp;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '=') {
+      Kind = tok::ampequal;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Kind = tok::amp;
+    }
+    break;
+  case '*':
+    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+      Kind = tok::starequal;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Kind = tok::star;
+    }
+    break;
+  case '+':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '+') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::plusplus;
+    } else if (Char == '=') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::plusequal;
+    } else {
+      Kind = tok::plus;
+    }
+    break;
+  case '-':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '-') {      // --
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::minusminus;
+    } else if (Char == '>' && Features.CPlusPlus &&
+               getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {  // C++ ->*
+      CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                           SizeTmp2, Result);
+      Kind = tok::arrowstar;
+    } else if (Char == '>') {   // ->
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::arrow;
+    } else if (Char == '=') {   // -=
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::minusequal;
+    } else {
+      Kind = tok::minus;
+    }
+    break;
+  case '~':
+    Kind = tok::tilde;
+    break;
+  case '!':
+    if (getCharAndSize(CurPtr, SizeTmp) == '=') {
+      Kind = tok::exclaimequal;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Kind = tok::exclaim;
+    }
+    break;
+  case '/':
+    // 6.4.9: Comments
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '/') {         // BCPL comment.
+      // Even if BCPL comments are disabled (e.g. in C89 mode), we generally
+      // want to lex this as a comment.  There is one problem with this though,
+      // that in one particular corner case, this can change the behavior of the
+      // resultant program.  For example, In  "foo //**/ bar", C89 would lex
+      // this as "foo / bar" and langauges with BCPL comments would lex it as
+      // "foo".  Check to see if the character after the second slash is a '*'.
+      // If so, we will lex that as a "/" instead of the start of a comment.
+      if (Features.BCPLComment ||
+          getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') {
+        if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+          return; // There is a token to return.
+
+        // It is common for the tokens immediately after a // comment to be
+        // whitespace (indentation for the next line).  Instead of going through
+        // the big switch, handle it efficiently now.
+        goto SkipIgnoredUnits;
+      }
+    }
+
+    if (Char == '*') {  // /**/ comment.
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+        return; // There is a token to return.
+      goto LexNextToken;   // GCC isn't tail call eliminating.
+    }
+
+    if (Char == '=') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::slashequal;
+    } else {
+      Kind = tok::slash;
+    }
+    break;
+  case '%':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Kind = tok::percentequal;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == '>') {
+      Kind = tok::r_brace;                             // '%>' -> '}'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.Digraphs && Char == ':') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Char = getCharAndSize(CurPtr, SizeTmp);
+      if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
+        Kind = tok::hashhash;                          // '%:%:' -> '##'
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+      } else if (Char == '@' && Features.Microsoft) {  // %:@ -> #@ -> Charize
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+        if (!isLexingRawMode())
+          Diag(BufferPtr, diag::charize_microsoft_ext);
+        Kind = tok::hashat;
+      } else {                                         // '%:' -> '#'
+        // We parsed a # character.  If this occurs at the start of the line,
+        // it's actually the start of a preprocessing directive.  Callback to
+        // the preprocessor to handle it.
+        // FIXME: -fpreprocessed mode??
+        if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
+          FormTokenWithChars(Result, CurPtr, tok::hash);
+          PP->HandleDirective(Result);
+
+          // As an optimization, if the preprocessor didn't switch lexers, tail
+          // recurse.
+          if (PP->isCurrentLexer(this)) {
+            // Start a new token. If this is a #include or something, the PP may
+            // want us starting at the beginning of the line again.  If so, set
+            // the StartOfLine flag.
+            if (IsAtStartOfLine) {
+              Result.setFlag(Token::StartOfLine);
+              IsAtStartOfLine = false;
+            }
+            goto LexNextToken;   // GCC isn't tail call eliminating.
+          }
+
+          return PP->Lex(Result);
+        }
+
+        Kind = tok::hash;
+      }
+    } else {
+      Kind = tok::percent;
+    }
+    break;
+  case '<':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (ParsingFilename) {
+      return LexAngledStringLiteral(Result, CurPtr);
+    } else if (Char == '<') {
+      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      if (After == '=') {
+        Kind = tok::lesslessequal;
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+      } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
+        // If this is actually a '<<<<<<<' version control conflict marker,
+        // recognize it as such and recover nicely.
+        goto LexNextToken;
+      } else {
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+        Kind = tok::lessless;
+      }
+    } else if (Char == '=') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::lessequal;
+    } else if (Features.Digraphs && Char == ':') {     // '<:' -> '['
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::l_square;
+    } else if (Features.Digraphs && Char == '%') {     // '<%' -> '{'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::l_brace;
+    } else {
+      Kind = tok::less;
+    }
+    break;
+  case '>':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::greaterequal;
+    } else if (Char == '>') {
+      char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+      if (After == '=') {
+        CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+                             SizeTmp2, Result);
+        Kind = tok::greatergreaterequal;
+      } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
+        // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
+        goto LexNextToken;
+      } else {
+        CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+        Kind = tok::greatergreater;
+      }
+      
+    } else {
+      Kind = tok::greater;
+    }
+    break;
+  case '^':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::caretequal;
+    } else {
+      Kind = tok::caret;
+    }
+    break;
+  case '|':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      Kind = tok::pipeequal;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '|') {
+      // If this is '|||||||' and we're in a conflict marker, ignore it.
+      if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
+        goto LexNextToken;
+      Kind = tok::pipepipe;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Kind = tok::pipe;
+    }
+    break;
+  case ':':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Features.Digraphs && Char == '>') {
+      Kind = tok::r_square; // ':>' -> ']'
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Features.CPlusPlus && Char == ':') {
+      Kind = tok::coloncolon;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Kind = tok::colon;
+    }
+    break;
+  case ';':
+    Kind = tok::semi;
+    break;
+  case '=':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '=') {
+      // If this is '=======' and we're in a conflict marker, ignore it.
+      if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
+        goto LexNextToken;
+      
+      Kind = tok::equalequal;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      Kind = tok::equal;
+    }
+    break;
+  case ',':
+    Kind = tok::comma;
+    break;
+  case '#':
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == '#') {
+      Kind = tok::hashhash;
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else if (Char == '@' && Features.Microsoft) {  // #@ -> Charize
+      Kind = tok::hashat;
+      if (!isLexingRawMode())
+        Diag(BufferPtr, diag::charize_microsoft_ext);
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+    } else {
+      // We parsed a # character.  If this occurs at the start of the line,
+      // it's actually the start of a preprocessing directive.  Callback to
+      // the preprocessor to handle it.
+      // FIXME: -fpreprocessed mode??
+      if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer) {
+        FormTokenWithChars(Result, CurPtr, tok::hash);
+        PP->HandleDirective(Result);
+
+        // As an optimization, if the preprocessor didn't switch lexers, tail
+        // recurse.
+        if (PP->isCurrentLexer(this)) {
+          // Start a new token.  If this is a #include or something, the PP may
+          // want us starting at the beginning of the line again.  If so, set
+          // the StartOfLine flag.
+          if (IsAtStartOfLine) {
+            Result.setFlag(Token::StartOfLine);
+            IsAtStartOfLine = false;
+          }
+          goto LexNextToken;   // GCC isn't tail call eliminating.
+        }
+        return PP->Lex(Result);
+      }
+
+      Kind = tok::hash;
+    }
+    break;
+
+  case '@':
+    // Objective C support.
+    if (CurPtr[-1] == '@' && Features.ObjC1)
+      Kind = tok::at;
+    else
+      Kind = tok::unknown;
+    break;
+
+  case '\\':
+    // FIXME: UCN's.
+    // FALL THROUGH.
+  default:
+    Kind = tok::unknown;
+    break;
+  }
+
+  // Notify MIOpt that we read a non-whitespace/non-comment token.
+  MIOpt.ReadToken();
+
+  // Update the location of token as well as BufferPtr.
+  FormTokenWithChars(Result, CurPtr, Kind);
+}

diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
new file mode 100644
index 0000000..004e675
--- /dev/null
+++ b/lib/Lex/LiteralSupport.cpp

@@ -0,0 +1,940 @@
+//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NumericLiteralParser, CharLiteralParser, and
+// StringLiteralParser interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace clang;
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  if (C >= 'A' && C <= 'F') return C-'A'+10;
+  return -1;
+}
+
+/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
+/// either a character or a string literal.
+static unsigned ProcessCharEscape(const char *&ThisTokBuf,
+                                  const char *ThisTokEnd, bool &HadError,
+                                  SourceLocation Loc, bool IsWide,
+                                  Preprocessor &PP) {
+  // Skip the '\' char.
+  ++ThisTokBuf;
+
+  // We know that this character can't be off the end of the buffer, because
+  // that would have been \", which would not have been the end of string.
+  unsigned ResultChar = *ThisTokBuf++;
+  switch (ResultChar) {
+  // These map to themselves.
+  case '\\': case '\'': case '"': case '?': break;
+
+    // These have fixed mappings.
+  case 'a':
+    // TODO: K&R: the meaning of '\\a' is different in traditional C
+    ResultChar = 7;
+    break;
+  case 'b':
+    ResultChar = 8;
+    break;
+  case 'e':
+    PP.Diag(Loc, diag::ext_nonstandard_escape) << "e";
+    ResultChar = 27;
+    break;
+  case 'E':
+    PP.Diag(Loc, diag::ext_nonstandard_escape) << "E";
+    ResultChar = 27;
+    break;
+  case 'f':
+    ResultChar = 12;
+    break;
+  case 'n':
+    ResultChar = 10;
+    break;
+  case 'r':
+    ResultChar = 13;
+    break;
+  case 't':
+    ResultChar = 9;
+    break;
+  case 'v':
+    ResultChar = 11;
+    break;
+  case 'x': { // Hex escape.
+    ResultChar = 0;
+    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+      PP.Diag(Loc, diag::err_hex_escape_no_digits);
+      HadError = 1;
+      break;
+    }
+
+    // Hex escapes are a maximal series of hex digits.
+    bool Overflow = false;
+    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
+      int CharVal = HexDigitValue(ThisTokBuf[0]);
+      if (CharVal == -1) break;
+      // About to shift out a digit?
+      Overflow |= (ResultChar & 0xF0000000) ? true : false;
+      ResultChar <<= 4;
+      ResultChar |= CharVal;
+    }
+
+    // See if any bits will be truncated when evaluated as a character.
+    unsigned CharWidth = IsWide
+                       ? PP.getTargetInfo().getWCharWidth()
+                       : PP.getTargetInfo().getCharWidth();
+
+    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+      Overflow = true;
+      ResultChar &= ~0U >> (32-CharWidth);
+    }
+
+    // Check for overflow.
+    if (Overflow)   // Too many digits to fit in
+      PP.Diag(Loc, diag::warn_hex_escape_too_large);
+    break;
+  }
+  case '0': case '1': case '2': case '3':
+  case '4': case '5': case '6': case '7': {
+    // Octal escapes.
+    --ThisTokBuf;
+    ResultChar = 0;
+
+    // Octal escapes are a series of octal digits with maximum length 3.
+    // "\0123" is a two digit sequence equal to "\012" "3".
+    unsigned NumDigits = 0;
+    do {
+      ResultChar <<= 3;
+      ResultChar |= *ThisTokBuf++ - '0';
+      ++NumDigits;
+    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
+             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
+
+    // Check for overflow.  Reject '\777', but not L'\777'.
+    unsigned CharWidth = IsWide
+                       ? PP.getTargetInfo().getWCharWidth()
+                       : PP.getTargetInfo().getCharWidth();
+
+    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
+      PP.Diag(Loc, diag::warn_octal_escape_too_large);
+      ResultChar &= ~0U >> (32-CharWidth);
+    }
+    break;
+  }
+
+    // Otherwise, these are not valid escapes.
+  case '(': case '{': case '[': case '%':
+    // GCC accepts these as extensions.  We warn about them as such though.
+    PP.Diag(Loc, diag::ext_nonstandard_escape)
+      << std::string()+(char)ResultChar;
+    break;
+  default:
+    if (isgraph(ThisTokBuf[0]))
+      PP.Diag(Loc, diag::ext_unknown_escape) << std::string()+(char)ResultChar;
+    else
+      PP.Diag(Loc, diag::ext_unknown_escape) << "x"+llvm::utohexstr(ResultChar);
+    break;
+  }
+
+  return ResultChar;
+}
+
+/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
+/// convert the UTF32 to UTF8. This is a subroutine of StringLiteralParser.
+/// When we decide to implement UCN's for character constants and identifiers,
+/// we will likely rework our support for UCN's.
+static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
+                             char *&ResultBuf, bool &HadError,
+                             SourceLocation Loc, bool IsWide, Preprocessor &PP)
+{
+  // FIXME: Add a warning - UCN's are only valid in C++ & C99.
+  // FIXME: Handle wide strings.
+
+  // Save the beginning of the string (for error diagnostics).
+  const char *ThisTokBegin = ThisTokBuf;
+
+  // Skip the '\u' char's.
+  ThisTokBuf += 2;
+
+  if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
+    PP.Diag(Loc, diag::err_ucn_escape_no_digits);
+    HadError = 1;
+    return;
+  }
+  typedef uint32_t UTF32;
+
+  UTF32 UcnVal = 0;
+  unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
+  for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
+    int CharVal = HexDigitValue(ThisTokBuf[0]);
+    if (CharVal == -1) break;
+    UcnVal <<= 4;
+    UcnVal |= CharVal;
+  }
+  // If we didn't consume the proper number of digits, there is a problem.
+  if (UcnLen) {
+    PP.Diag(PP.AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin),
+            diag::err_ucn_escape_incomplete);
+    HadError = 1;
+    return;
+  }
+  // Check UCN constraints (C99 6.4.3p2).
+  if ((UcnVal < 0xa0 &&
+      (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, `
+      || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF)
+      || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ {
+    PP.Diag(Loc, diag::err_ucn_escape_invalid);
+    HadError = 1;
+    return;
+  }
+  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
+  // The conversion below was inspired by:
+  //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
+  // First, we determine how many bytes the result will require.
+  typedef uint8_t UTF8;
+
+  unsigned short bytesToWrite = 0;
+  if (UcnVal < (UTF32)0x80)
+    bytesToWrite = 1;
+  else if (UcnVal < (UTF32)0x800)
+    bytesToWrite = 2;
+  else if (UcnVal < (UTF32)0x10000)
+    bytesToWrite = 3;
+  else
+    bytesToWrite = 4;
+
+  const unsigned byteMask = 0xBF;
+  const unsigned byteMark = 0x80;
+
+  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
+  // into the first byte, depending on how many bytes follow.
+  static const UTF8 firstByteMark[5] = {
+    0x00, 0x00, 0xC0, 0xE0, 0xF0
+  };
+  // Finally, we write the bytes into ResultBuf.
+  ResultBuf += bytesToWrite;
+  switch (bytesToWrite) { // note: everything falls through.
+    case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+    case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+    case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
+    case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
+  }
+  // Update the buffer.
+  ResultBuf += bytesToWrite;
+}
+
+
+///       integer-constant: [C99 6.4.4.1]
+///         decimal-constant integer-suffix
+///         octal-constant integer-suffix
+///         hexadecimal-constant integer-suffix
+///       decimal-constant:
+///         nonzero-digit
+///         decimal-constant digit
+///       octal-constant:
+///         0
+///         octal-constant octal-digit
+///       hexadecimal-constant:
+///         hexadecimal-prefix hexadecimal-digit
+///         hexadecimal-constant hexadecimal-digit
+///       hexadecimal-prefix: one of
+///         0x 0X
+///       integer-suffix:
+///         unsigned-suffix [long-suffix]
+///         unsigned-suffix [long-long-suffix]
+///         long-suffix [unsigned-suffix]
+///         long-long-suffix [unsigned-sufix]
+///       nonzero-digit:
+///         1 2 3 4 5 6 7 8 9
+///       octal-digit:
+///         0 1 2 3 4 5 6 7
+///       hexadecimal-digit:
+///         0 1 2 3 4 5 6 7 8 9
+///         a b c d e f
+///         A B C D E F
+///       unsigned-suffix: one of
+///         u U
+///       long-suffix: one of
+///         l L
+///       long-long-suffix: one of
+///         ll LL
+///
+///       floating-constant: [C99 6.4.4.2]
+///         TODO: add rules...
+///
+NumericLiteralParser::
+NumericLiteralParser(const char *begin, const char *end,
+                     SourceLocation TokLoc, Preprocessor &pp)
+  : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
+
+  // This routine assumes that the range begin/end matches the regex for integer
+  // and FP constants (specifically, the 'pp-number' regex), and assumes that
+  // the byte at "*end" is both valid and not part of the regex.  Because of
+  // this, it doesn't have to check for 'overscan' in various places.
+  assert(!isalnum(*end) && *end != '.' && *end != '_' &&
+         "Lexer didn't maximally munch?");
+
+  s = DigitsBegin = begin;
+  saw_exponent = false;
+  saw_period = false;
+  isLong = false;
+  isUnsigned = false;
+  isLongLong = false;
+  isFloat = false;
+  isImaginary = false;
+  isMicrosoftInteger = false;
+  hadError = false;
+
+  if (*s == '0') { // parse radix
+    ParseNumberStartingWithZero(TokLoc);
+    if (hadError)
+      return;
+  } else { // the first digit is non-zero
+    radix = 10;
+    s = SkipDigits(s);
+    if (s == ThisTokEnd) {
+      // Done.
+    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
+              diag::err_invalid_decimal_digit) << std::string(s, s+1);
+      hadError = true;
+      return;
+    } else if (*s == '.') {
+      s++;
+      saw_period = true;
+      s = SkipDigits(s);
+    }
+    if ((*s == 'e' || *s == 'E')) { // exponent
+      const char *Exponent = s;
+      s++;
+      saw_exponent = true;
+      if (*s == '+' || *s == '-')  s++; // sign
+      const char *first_non_digit = SkipDigits(s);
+      if (first_non_digit != s) {
+        s = first_non_digit;
+      } else {
+        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
+                diag::err_exponent_has_no_digits);
+        hadError = true;
+        return;
+      }
+    }
+  }
+
+  SuffixBegin = s;
+
+  // Parse the suffix.  At this point we can classify whether we have an FP or
+  // integer constant.
+  bool isFPConstant = isFloatingLiteral();
+
+  // Loop over all of the characters of the suffix.  If we see something bad,
+  // we break out of the loop.
+  for (; s != ThisTokEnd; ++s) {
+    switch (*s) {
+    case 'f':      // FP Suffix for "float"
+    case 'F':
+      if (!isFPConstant) break;  // Error for integer constant.
+      if (isFloat || isLong) break; // FF, LF invalid.
+      isFloat = true;
+      continue;  // Success.
+    case 'u':
+    case 'U':
+      if (isFPConstant) break;  // Error for floating constant.
+      if (isUnsigned) break;    // Cannot be repeated.
+      isUnsigned = true;
+      continue;  // Success.
+    case 'l':
+    case 'L':
+      if (isLong || isLongLong) break;  // Cannot be repeated.
+      if (isFloat) break;               // LF invalid.
+
+      // Check for long long.  The L's need to be adjacent and the same case.
+      if (s+1 != ThisTokEnd && s[1] == s[0]) {
+        if (isFPConstant) break;        // long long invalid for floats.
+        isLongLong = true;
+        ++s;  // Eat both of them.
+      } else {
+        isLong = true;
+      }
+      continue;  // Success.
+    case 'i':
+      if (PP.getLangOptions().Microsoft) {
+        if (isFPConstant || isLong || isLongLong) break;
+
+        // Allow i8, i16, i32, i64, and i128.
+        if (s + 1 != ThisTokEnd) {
+          switch (s[1]) {
+            case '8':
+              s += 2; // i8 suffix
+              isMicrosoftInteger = true;
+              break;
+            case '1':
+              if (s + 2 == ThisTokEnd) break;
+              if (s[2] == '6') s += 3; // i16 suffix
+              else if (s[2] == '2') {
+                if (s + 3 == ThisTokEnd) break;
+                if (s[3] == '8') s += 4; // i128 suffix
+              }
+              isMicrosoftInteger = true;
+              break;
+            case '3':
+              if (s + 2 == ThisTokEnd) break;
+              if (s[2] == '2') s += 3; // i32 suffix
+              isMicrosoftInteger = true;
+              break;
+            case '6':
+              if (s + 2 == ThisTokEnd) break;
+              if (s[2] == '4') s += 3; // i64 suffix
+              isMicrosoftInteger = true;
+              break;
+            default:
+              break;
+          }
+          break;
+        }
+      }
+      // fall through.
+    case 'I':
+    case 'j':
+    case 'J':
+      if (isImaginary) break;   // Cannot be repeated.
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
+              diag::ext_imaginary_constant);
+      isImaginary = true;
+      continue;  // Success.
+    }
+    // If we reached here, there was an error.
+    break;
+  }
+
+  // Report an error if there are any.
+  if (s != ThisTokEnd) {
+    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
+            isFPConstant ? diag::err_invalid_suffix_float_constant :
+                           diag::err_invalid_suffix_integer_constant)
+      << std::string(SuffixBegin, ThisTokEnd);
+    hadError = true;
+    return;
+  }
+}
+
+/// ParseNumberStartingWithZero - This method is called when the first character
+/// of the number is found to be a zero.  This means it is either an octal
+/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
+/// a floating point number (01239.123e4).  Eat the prefix, determining the
+/// radix etc.
+void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
+  assert(s[0] == '0' && "Invalid method call");
+  s++;
+
+  // Handle a hex number like 0x1234.
+  if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
+    s++;
+    radix = 16;
+    DigitsBegin = s;
+    s = SkipHexDigits(s);
+    if (s == ThisTokEnd) {
+      // Done.
+    } else if (*s == '.') {
+      s++;
+      saw_period = true;
+      s = SkipHexDigits(s);
+    }
+    // A binary exponent can appear with or with a '.'. If dotted, the
+    // binary exponent is required.
+    if ((*s == 'p' || *s == 'P') && !PP.getLangOptions().CPlusPlus0x) {
+      const char *Exponent = s;
+      s++;
+      saw_exponent = true;
+      if (*s == '+' || *s == '-')  s++; // sign
+      const char *first_non_digit = SkipDigits(s);
+      if (first_non_digit == s) {
+        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+                diag::err_exponent_has_no_digits);
+        hadError = true;
+        return;
+      }
+      s = first_non_digit;
+
+      // In C++0x, we cannot support hexadecmial floating literals because
+      // they conflict with user-defined literals, so we warn in previous
+      // versions of C++ by default.
+      if (PP.getLangOptions().CPlusPlus)
+        PP.Diag(TokLoc, diag::ext_hexconstant_cplusplus);
+      else if (!PP.getLangOptions().HexFloats)
+        PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
+    } else if (saw_period) {
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
+              diag::err_hexconstant_requires_exponent);
+      hadError = true;
+    }
+    return;
+  }
+
+  // Handle simple binary numbers 0b01010
+  if (*s == 'b' || *s == 'B') {
+    // 0b101010 is a GCC extension.
+    PP.Diag(TokLoc, diag::ext_binary_literal);
+    ++s;
+    radix = 2;
+    DigitsBegin = s;
+    s = SkipBinaryDigits(s);
+    if (s == ThisTokEnd) {
+      // Done.
+    } else if (isxdigit(*s)) {
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
+              diag::err_invalid_binary_digit) << std::string(s, s+1);
+      hadError = true;
+    }
+    // Other suffixes will be diagnosed by the caller.
+    return;
+  }
+
+  // For now, the radix is set to 8. If we discover that we have a
+  // floating point constant, the radix will change to 10. Octal floating
+  // point constants are not permitted (only decimal and hexadecimal).
+  radix = 8;
+  DigitsBegin = s;
+  s = SkipOctalDigits(s);
+  if (s == ThisTokEnd)
+    return; // Done, simple octal number like 01234
+
+  // If we have some other non-octal digit that *is* a decimal digit, see if
+  // this is part of a floating point number like 094.123 or 09e1.
+  if (isdigit(*s)) {
+    const char *EndDecimal = SkipDigits(s);
+    if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
+      s = EndDecimal;
+      radix = 10;
+    }
+  }
+
+  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
+  // the code is using an incorrect base.
+  if (isxdigit(*s) && *s != 'e' && *s != 'E') {
+    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
+            diag::err_invalid_octal_digit) << std::string(s, s+1);
+    hadError = true;
+    return;
+  }
+
+  if (*s == '.') {
+    s++;
+    radix = 10;
+    saw_period = true;
+    s = SkipDigits(s); // Skip suffix.
+  }
+  if (*s == 'e' || *s == 'E') { // exponent
+    const char *Exponent = s;
+    s++;
+    radix = 10;
+    saw_exponent = true;
+    if (*s == '+' || *s == '-')  s++; // sign
+    const char *first_non_digit = SkipDigits(s);
+    if (first_non_digit != s) {
+      s = first_non_digit;
+    } else {
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+              diag::err_exponent_has_no_digits);
+      hadError = true;
+      return;
+    }
+  }
+}
+
+
+/// GetIntegerValue - Convert this numeric literal value to an APInt that
+/// matches Val's input width.  If there is an overflow, set Val to the low bits
+/// of the result and return true.  Otherwise, return false.
+bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
+  // Fast path: Compute a conservative bound on the maximum number of
+  // bits per digit in this radix. If we can't possibly overflow a
+  // uint64 based on that bound then do the simple conversion to
+  // integer. This avoids the expensive overflow checking below, and
+  // handles the common cases that matter (small decimal integers and
+  // hex/octal values which don't overflow).
+  unsigned MaxBitsPerDigit = 1;
+  while ((1U << MaxBitsPerDigit) < radix)
+    MaxBitsPerDigit += 1;
+  if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) {
+    uint64_t N = 0;
+    for (s = DigitsBegin; s != SuffixBegin; ++s)
+      N = N*radix + HexDigitValue(*s);
+
+    // This will truncate the value to Val's input width. Simply check
+    // for overflow by comparing.
+    Val = N;
+    return Val.getZExtValue() != N;
+  }
+
+  Val = 0;
+  s = DigitsBegin;
+
+  llvm::APInt RadixVal(Val.getBitWidth(), radix);
+  llvm::APInt CharVal(Val.getBitWidth(), 0);
+  llvm::APInt OldVal = Val;
+
+  bool OverflowOccurred = false;
+  while (s < SuffixBegin) {
+    unsigned C = HexDigitValue(*s++);
+
+    // If this letter is out of bound for this radix, reject it.
+    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
+
+    CharVal = C;
+
+    // Add the digit to the value in the appropriate radix.  If adding in digits
+    // made the value smaller, then this overflowed.
+    OldVal = Val;
+
+    // Multiply by radix, did overflow occur on the multiply?
+    Val *= RadixVal;
+    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
+
+    // Add value, did overflow occur on the value?
+    //   (a + b) ult b  <=> overflow
+    Val += CharVal;
+    OverflowOccurred |= Val.ult(CharVal);
+  }
+  return OverflowOccurred;
+}
+
+llvm::APFloat::opStatus
+NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
+  using llvm::APFloat;
+  using llvm::StringRef;
+
+  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
+  return Result.convertFromString(StringRef(ThisTokBegin, n),
+                                  APFloat::rmNearestTiesToEven);
+}
+
+
+CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
+                                     SourceLocation Loc, Preprocessor &PP) {
+  // At this point we know that the character matches the regex "L?'.*'".
+  HadError = false;
+
+  // Determine if this is a wide character.
+  IsWide = begin[0] == 'L';
+  if (IsWide) ++begin;
+
+  // Skip over the entry quote.
+  assert(begin[0] == '\'' && "Invalid token lexed");
+  ++begin;
+
+  // FIXME: The "Value" is an uint64_t so we can handle char literals of
+  // upto 64-bits.
+  // FIXME: This extensively assumes that 'char' is 8-bits.
+  assert(PP.getTargetInfo().getCharWidth() == 8 &&
+         "Assumes char is 8 bits");
+  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
+         (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
+         "Assumes sizeof(int) on target is <= 64 and a multiple of char");
+  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
+         "Assumes sizeof(wchar) on target is <= 64");
+
+  // This is what we will use for overflow detection
+  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
+
+  unsigned NumCharsSoFar = 0;
+  while (begin[0] != '\'') {
+    uint64_t ResultChar;
+    if (begin[0] != '\\')     // If this is a normal character, consume it.
+      ResultChar = *begin++;
+    else                      // Otherwise, this is an escape character.
+      ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
+
+    // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
+    // implementation defined (C99 6.4.4.4p10).
+    if (NumCharsSoFar) {
+      if (IsWide) {
+        // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
+        LitVal = 0;
+      } else {
+        // Narrow character literals act as though their value is concatenated
+        // in this implementation, but warn on overflow.
+        if (LitVal.countLeadingZeros() < 8)
+          PP.Diag(Loc, diag::warn_char_constant_too_large);
+        LitVal <<= 8;
+      }
+    }
+
+    LitVal = LitVal + ResultChar;
+    ++NumCharsSoFar;
+  }
+
+  // If this is the second character being processed, do special handling.
+  if (NumCharsSoFar > 1) {
+    // Warn about discarding the top bits for multi-char wide-character
+    // constants (L'abcd').
+    if (IsWide)
+      PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
+    else if (NumCharsSoFar != 4)
+      PP.Diag(Loc, diag::ext_multichar_character_literal);
+    else
+      PP.Diag(Loc, diag::ext_four_char_character_literal);
+    IsMultiChar = true;
+  } else
+    IsMultiChar = false;
+
+  // Transfer the value from APInt to uint64_t
+  Value = LitVal.getZExtValue();
+
+  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
+  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
+  // character constants are not sign extended in the this implementation:
+  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
+  if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&
+      PP.getLangOptions().CharIsSigned)
+    Value = (signed char)Value;
+}
+
+
+///       string-literal: [C99 6.4.5]
+///          " [s-char-sequence] "
+///         L" [s-char-sequence] "
+///       s-char-sequence:
+///         s-char
+///         s-char-sequence s-char
+///       s-char:
+///         any source character except the double quote ",
+///           backslash \, or newline character
+///         escape-character
+///         universal-character-name
+///       escape-character: [C99 6.4.4.4]
+///         \ escape-code
+///         universal-character-name
+///       escape-code:
+///         character-escape-code
+///         octal-escape-code
+///         hex-escape-code
+///       character-escape-code: one of
+///         n t b r f v a
+///         \ ' " ?
+///       octal-escape-code:
+///         octal-digit
+///         octal-digit octal-digit
+///         octal-digit octal-digit octal-digit
+///       hex-escape-code:
+///         x hex-digit
+///         hex-escape-code hex-digit
+///       universal-character-name:
+///         \u hex-quad
+///         \U hex-quad hex-quad
+///       hex-quad:
+///         hex-digit hex-digit hex-digit hex-digit
+///
+StringLiteralParser::
+StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
+                    Preprocessor &pp) : PP(pp) {
+  // Scan all of the string portions, remember the max individual token length,
+  // computing a bound on the concatenated string length, and see whether any
+  // piece is a wide-string.  If any of the string portions is a wide-string
+  // literal, the result is a wide-string literal [C99 6.4.5p4].
+  MaxTokenLength = StringToks[0].getLength();
+  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
+  AnyWide = StringToks[0].is(tok::wide_string_literal);
+
+  hadError = false;
+
+  // Implement Translation Phase #6: concatenation of string literals
+  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
+  for (unsigned i = 1; i != NumStringToks; ++i) {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+
+    // Remember maximum string piece length.
+    if (StringToks[i].getLength() > MaxTokenLength)
+      MaxTokenLength = StringToks[i].getLength();
+
+    // Remember if we see any wide strings.
+    AnyWide |= StringToks[i].is(tok::wide_string_literal);
+  }
+
+  // Include space for the null terminator.
+  ++SizeBound;
+
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+
+  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
+  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
+  wchar_tByteWidth = ~0U;
+  if (AnyWide) {
+    wchar_tByteWidth = PP.getTargetInfo().getWCharWidth();
+    assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
+    wchar_tByteWidth /= 8;
+  }
+
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+
+  // Size the temporary buffer to hold the result string data.
+  ResultBuf.resize(SizeBound);
+
+  // Likewise, but for each string piece.
+  llvm::SmallString<512> TokenBuf;
+  TokenBuf.resize(MaxTokenLength);
+
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
+
+  Pascal = false;
+
+  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+    const char *ThisTokBuf = &TokenBuf[0];
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+
+    // TODO: Input character set mapping support.
+
+    // Skip L marker for wide strings.
+    bool ThisIsWide = false;
+    if (ThisTokBuf[0] == 'L') {
+      ++ThisTokBuf;
+      ThisIsWide = true;
+    }
+
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+
+    // Check if this is a pascal string
+    if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
+        ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+
+      // If the \p sequence is found in the first token, we have a pascal string
+      // Otherwise, if we already have a pascal string, ignore the first \p
+      if (i == 0) {
+        ++ThisTokBuf;
+        Pascal = true;
+      } else if (Pascal)
+        ThisTokBuf += 2;
+    }
+
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+              *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      // Is this a Universal Character Name escape?
+      if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
+        ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
+                         hadError, StringToks[i].getLocation(), ThisIsWide, PP);
+        continue;
+      }
+      // Otherwise, this is a non-UCN escape character.  Process it.
+      unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
+                                              StringToks[i].getLocation(),
+                                              ThisIsWide, PP);
+
+      // Note: our internal rep of wide char tokens is always little-endian.
+      *ResultPtr++ = ResultChar & 0xFF;
+
+      if (AnyWide) {
+        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+          *ResultPtr++ = ResultChar >> i*8;
+      }
+    }
+  }
+
+  if (Pascal) {
+    ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
+
+    // Verify that pascal strings aren't too large.
+    if (GetStringLength() > 256) {
+      PP.Diag(StringToks[0].getLocation(), diag::err_pascal_string_too_long)
+        << SourceRange(StringToks[0].getLocation(),
+                       StringToks[NumStringToks-1].getLocation());
+      hadError = 1;
+      return;
+    }
+  }
+}
+
+
+/// getOffsetOfStringByte - This function returns the offset of the
+/// specified byte of the string data represented by Token.  This handles
+/// advancing over escape sequences in the string.
+unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
+                                                    unsigned ByteNo,
+                                                    Preprocessor &PP) {
+  // Get the spelling of the token.
+  llvm::SmallString<16> SpellingBuffer;
+  SpellingBuffer.resize(Tok.getLength());
+
+  const char *SpellingPtr = &SpellingBuffer[0];
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+
+  assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
+
+
+  const char *SpellingStart = SpellingPtr;
+  const char *SpellingEnd = SpellingPtr+TokLen;
+
+  // Skip over the leading quote.
+  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
+  ++SpellingPtr;
+
+  // Skip over bytes until we find the offset we're looking for.
+  while (ByteNo) {
+    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
+
+    // Step over non-escapes simply.
+    if (*SpellingPtr != '\\') {
+      ++SpellingPtr;
+      --ByteNo;
+      continue;
+    }
+
+    // Otherwise, this is an escape character.  Advance over it.
+    bool HadError = false;
+    ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
+                      Tok.getLocation(), false, PP);
+    assert(!HadError && "This method isn't valid on erroneous strings");
+    --ByteNo;
+  }
+
+  return SpellingPtr-SpellingStart;
+}

diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
new file mode 100644
index 0000000..2f1a34c
--- /dev/null
+++ b/lib/Lex/MacroArgs.cpp

@@ -0,0 +1,286 @@
+//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LexDiagnostic.h"
+using namespace clang;
+
+/// MacroArgs ctor function - This destroys the vector passed in.
+MacroArgs *MacroArgs::create(const MacroInfo *MI,
+                             const Token *UnexpArgTokens,
+                             unsigned NumToks, bool VarargsElided,
+                             Preprocessor &PP) {
+  assert(MI->isFunctionLike() &&
+         "Can't have args for an object-like macro!");
+  MacroArgs **ResultEnt = 0;
+  unsigned ClosestMatch = ~0U;
+  
+  // See if we have an entry with a big enough argument list to reuse on the
+  // free list.  If so, reuse it.
+  for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
+       Entry = &(*Entry)->ArgCache)
+    if ((*Entry)->NumUnexpArgTokens >= NumToks &&
+        (*Entry)->NumUnexpArgTokens < ClosestMatch) {
+      ResultEnt = Entry;
+      
+      // If we have an exact match, use it.
+      if ((*Entry)->NumUnexpArgTokens == NumToks)
+        break;
+      // Otherwise, use the best fit.
+      ClosestMatch = (*Entry)->NumUnexpArgTokens;
+    }
+  
+  MacroArgs *Result;
+  if (ResultEnt == 0) {
+    // Allocate memory for a MacroArgs object with the lexer tokens at the end.
+    Result = (MacroArgs*)malloc(sizeof(MacroArgs) + NumToks*sizeof(Token));
+    // Construct the MacroArgs object.
+    new (Result) MacroArgs(NumToks, VarargsElided);
+  } else {
+    Result = *ResultEnt;
+    // Unlink this node from the preprocessors singly linked list.
+    *ResultEnt = Result->ArgCache;
+    Result->NumUnexpArgTokens = NumToks;
+    Result->VarargsElided = VarargsElided;
+  }
+
+  // Copy the actual unexpanded tokens to immediately after the result ptr.
+  if (NumToks)
+    memcpy(const_cast<Token*>(Result->getUnexpArgument(0)),
+           UnexpArgTokens, NumToks*sizeof(Token));
+
+  return Result;
+}
+
+/// destroy - Destroy and deallocate the memory for this object.
+///
+void MacroArgs::destroy(Preprocessor &PP) {
+  StringifiedArgs.clear();
+
+  // Don't clear PreExpArgTokens, just clear the entries.  Clearing the entries
+  // would deallocate the element vectors.
+  for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
+    PreExpArgTokens[i].clear();
+  
+  // Add this to the preprocessor's free list.
+  ArgCache = PP.MacroArgCache;
+  PP.MacroArgCache = this;
+}
+
+/// deallocate - This should only be called by the Preprocessor when managing
+/// its freelist.
+MacroArgs *MacroArgs::deallocate() {
+  MacroArgs *Next = ArgCache;
+  
+  // Run the dtor to deallocate the vectors.
+  this->~MacroArgs();
+  // Release the memory for the object.
+  free(this);
+  
+  return Next;
+}
+
+
+/// getArgLength - Given a pointer to an expanded or unexpanded argument,
+/// return the number of tokens, not counting the EOF, that make up the
+/// argument.
+unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
+  unsigned NumArgTokens = 0;
+  for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
+    ++NumArgTokens;
+  return NumArgTokens;
+}
+
+
+/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
+///
+const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
+  // The unexpanded argument tokens start immediately after the MacroArgs object
+  // in memory.
+  const Token *Start = (const Token *)(this+1);
+  const Token *Result = Start;
+  // Scan to find Arg.
+  for (; Arg; ++Result) {
+    assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
+    if (Result->is(tok::eof))
+      --Arg;
+  }
+  assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
+  return Result;
+}
+
+
+/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+/// by pre-expansion, return false.  Otherwise, conservatively return true.
+bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
+                                     Preprocessor &PP) const {
+  // If there are no identifiers in the argument list, or if the identifiers are
+  // known to not be macros, pre-expansion won't modify it.
+  for (; ArgTok->isNot(tok::eof); ++ArgTok)
+    if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
+      if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled())
+        // Return true even though the macro could be a function-like macro
+        // without a following '(' token.
+        return true;
+    }
+  return false;
+}
+
+/// getPreExpArgument - Return the pre-expanded form of the specified
+/// argument.
+const std::vector<Token> &
+MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI, 
+                             Preprocessor &PP) {
+  assert(Arg < MI->getNumArgs() && "Invalid argument number!");
+
+  // If we have already computed this, return it.
+  if (PreExpArgTokens.size() < MI->getNumArgs())
+    PreExpArgTokens.resize(MI->getNumArgs());
+  
+  std::vector<Token> &Result = PreExpArgTokens[Arg];
+  if (!Result.empty()) return Result;
+
+  const Token *AT = getUnexpArgument(Arg);
+  unsigned NumToks = getArgLength(AT)+1;  // Include the EOF.
+
+  // Otherwise, we have to pre-expand this argument, populating Result.  To do
+  // this, we set up a fake TokenLexer to lex from the unexpanded argument
+  // list.  With this installed, we lex expanded tokens until we hit the EOF
+  // token at the end of the unexp list.
+  PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
+                      false /*owns tokens*/);
+
+  // Lex all of the macro-expanded tokens into Result.
+  do {
+    Result.push_back(Token());
+    Token &Tok = Result.back();
+    PP.Lex(Tok);
+  } while (Result.back().isNot(tok::eof));
+
+  // Pop the token stream off the top of the stack.  We know that the internal
+  // pointer inside of it is to the "end" of the token stream, but the stack
+  // will not otherwise be popped until the next token is lexed.  The problem is
+  // that the token may be lexed sometime after the vector of tokens itself is
+  // destroyed, which would be badness.
+  PP.RemoveTopOfLexerStack();
+  return Result;
+}
+
+
+/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+/// tokens into the literal string token that should be produced by the C #
+/// preprocessor operator.  If Charify is true, then it should be turned into
+/// a character literal for the Microsoft charize (#@) extension.
+///
+Token MacroArgs::StringifyArgument(const Token *ArgToks,
+                                   Preprocessor &PP, bool Charify) {
+  Token Tok;
+  Tok.startToken();
+  Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
+
+  const Token *ArgTokStart = ArgToks;
+
+  // Stringify all the tokens.
+  llvm::SmallString<128> Result;
+  Result += "\"";
+
+  bool isFirst = true;
+  for (; ArgToks->isNot(tok::eof); ++ArgToks) {
+    const Token &Tok = *ArgToks;
+    if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
+      Result += ' ';
+    isFirst = false;
+
+    // If this is a string or character constant, escape the token as specified
+    // by 6.10.3.2p2.
+    if (Tok.is(tok::string_literal) ||       // "foo"
+        Tok.is(tok::wide_string_literal) ||  // L"foo"
+        Tok.is(tok::char_constant)) {        // 'x' and L'x'.
+      std::string Str = Lexer::Stringify(PP.getSpelling(Tok));
+      Result.append(Str.begin(), Str.end());
+    } else {
+      // Otherwise, just append the token.  Do some gymnastics to get the token
+      // in place and avoid copies where possible.
+      unsigned CurStrLen = Result.size();
+      Result.resize(CurStrLen+Tok.getLength());
+      const char *BufPtr = &Result[CurStrLen];
+      unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr);
+
+      // If getSpelling returned a pointer to an already uniqued version of the
+      // string instead of filling in BufPtr, memcpy it onto our string.
+      if (BufPtr != &Result[CurStrLen])
+        memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
+
+      // If the token was dirty, the spelling may be shorter than the token.
+      if (ActualTokLen != Tok.getLength())
+        Result.resize(CurStrLen+ActualTokLen);
+    }
+  }
+
+  // If the last character of the string is a \, and if it isn't escaped, this
+  // is an invalid string literal, diagnose it as specified in C99.
+  if (Result.back() == '\\') {
+    // Count the number of consequtive \ characters.  If even, then they are
+    // just escaped backslashes, otherwise it's an error.
+    unsigned FirstNonSlash = Result.size()-2;
+    // Guaranteed to find the starting " if nothing else.
+    while (Result[FirstNonSlash] == '\\')
+      --FirstNonSlash;
+    if ((Result.size()-1-FirstNonSlash) & 1) {
+      // Diagnose errors for things like: #define F(X) #X   /   F(\)
+      PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
+      Result.pop_back();  // remove one of the \'s.
+    }
+  }
+  Result += '"';
+
+  // If this is the charify operation and the result is not a legal character
+  // constant, diagnose it.
+  if (Charify) {
+    // First step, turn double quotes into single quotes:
+    Result[0] = '\'';
+    Result[Result.size()-1] = '\'';
+
+    // Check for bogus character.
+    bool isBad = false;
+    if (Result.size() == 3)
+      isBad = Result[1] == '\'';   // ''' is not legal. '\' already fixed above.
+    else
+      isBad = (Result.size() != 4 || Result[1] != '\\');  // Not '\x'
+
+    if (isBad) {
+      PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
+      Result = "' '";  // Use something arbitrary, but legal.
+    }
+  }
+
+  PP.CreateString(&Result[0], Result.size(), Tok);
+  return Tok;
+}
+
+/// getStringifiedArgument - Compute, cache, and return the specified argument
+/// that has been 'stringified' as required by the # operator.
+const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
+                                               Preprocessor &PP) {
+  assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
+  if (StringifiedArgs.empty()) {
+    StringifiedArgs.resize(getNumArguments());
+    memset(&StringifiedArgs[0], 0,
+           sizeof(StringifiedArgs[0])*getNumArguments());
+  }
+  if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
+    StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP);
+  return StringifiedArgs[ArgNo];
+}

diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
new file mode 100644
index 0000000..6ff4856
--- /dev/null
+++ b/lib/Lex/MacroArgs.h

@@ -0,0 +1,119 @@
+//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MacroArgs interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MACROARGS_H
+#define LLVM_CLANG_MACROARGS_H
+
+#include <vector>
+
+namespace clang {
+  class MacroInfo;
+  class Preprocessor;
+  class Token;
+
+/// MacroArgs - An instance of this class captures information about
+/// the formal arguments specified to a function-like macro invocation.
+class MacroArgs {
+  /// NumUnexpArgTokens - The number of raw, unexpanded tokens for the
+  /// arguments.  All of the actual argument tokens are allocated immediately
+  /// after the MacroArgs object in memory.  This is all of the arguments
+  /// concatenated together, with 'EOF' markers at the end of each argument.
+  unsigned NumUnexpArgTokens;
+
+  /// VarargsElided - True if this is a C99 style varargs macro invocation and
+  /// there was no argument specified for the "..." argument.  If the argument
+  /// was specified (even empty) or this isn't a C99 style varargs function, or
+  /// if in strict mode and the C99 varargs macro had only a ... argument, this
+  /// is false.
+  bool VarargsElided;
+  
+  /// PreExpArgTokens - Pre-expanded tokens for arguments that need them.  Empty
+  /// if not yet computed.  This includes the EOF marker at the end of the
+  /// stream.
+  std::vector<std::vector<Token> > PreExpArgTokens;
+
+  /// StringifiedArgs - This contains arguments in 'stringified' form.  If the
+  /// stringified form of an argument has not yet been computed, this is empty.
+  std::vector<Token> StringifiedArgs;
+
+  /// ArgCache - This is a linked list of MacroArgs objects that the
+  /// Preprocessor owns which we use to avoid thrashing malloc/free.
+  MacroArgs *ArgCache;
+  
+  MacroArgs(unsigned NumToks, bool varargsElided)
+    : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided), ArgCache(0) {}
+  ~MacroArgs() {}
+public:
+  /// MacroArgs ctor function - Create a new MacroArgs object with the specified
+  /// macro and argument info.
+  static MacroArgs *create(const MacroInfo *MI,
+                           const Token *UnexpArgTokens,
+                           unsigned NumArgTokens, bool VarargsElided,
+                           Preprocessor &PP);
+
+  /// destroy - Destroy and deallocate the memory for this object.
+  ///
+  void destroy(Preprocessor &PP);
+
+  /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
+  /// by pre-expansion, return false.  Otherwise, conservatively return true.
+  bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const;
+
+  /// getUnexpArgument - Return a pointer to the first token of the unexpanded
+  /// token list for the specified formal.
+  ///
+  const Token *getUnexpArgument(unsigned Arg) const;
+
+  /// getArgLength - Given a pointer to an expanded or unexpanded argument,
+  /// return the number of tokens, not counting the EOF, that make up the
+  /// argument.
+  static unsigned getArgLength(const Token *ArgPtr);
+
+  /// getPreExpArgument - Return the pre-expanded form of the specified
+  /// argument.
+  const std::vector<Token> &
+    getPreExpArgument(unsigned Arg, const MacroInfo *MI, Preprocessor &PP);
+
+  /// getStringifiedArgument - Compute, cache, and return the specified argument
+  /// that has been 'stringified' as required by the # operator.
+  const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP);
+
+  /// getNumArguments - Return the number of arguments passed into this macro
+  /// invocation.
+  unsigned getNumArguments() const { return NumUnexpArgTokens; }
+
+
+  /// isVarargsElidedUse - Return true if this is a C99 style varargs macro
+  /// invocation and there was no argument specified for the "..." argument.  If
+  /// the argument was specified (even empty) or this isn't a C99 style varargs
+  /// function, or if in strict mode and the C99 varargs macro had only a ...
+  /// argument, this returns false.
+  bool isVarargsElidedUse() const { return VarargsElided; }
+
+  /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
+  /// tokens into the literal string token that should be produced by the C #
+  /// preprocessor operator.  If Charify is true, then it should be turned into
+  /// a character literal for the Microsoft charize (#@) extension.
+  ///
+  static Token StringifyArgument(const Token *ArgToks,
+                                 Preprocessor &PP, bool Charify = false);
+  
+  
+  /// deallocate - This should only be called by the Preprocessor when managing
+  /// its freelist.
+  MacroArgs *deallocate();
+};
+
+}  // end namespace clang
+
+#endif

diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
new file mode 100644
index 0000000..fda884c
--- /dev/null
+++ b/lib/Lex/MacroInfo.cpp

@@ -0,0 +1,75 @@
+//===--- MacroInfo.cpp - Information about #defined identifiers -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MacroInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) {
+  IsFunctionLike = false;
+  IsC99Varargs = false;
+  IsGNUVarargs = false;
+  IsBuiltinMacro = false;
+  IsDisabled = false;
+  IsUsed = true;
+
+  ArgumentList = 0;
+  NumArguments = 0;
+}
+
+/// isIdenticalTo - Return true if the specified macro definition is equal to
+/// this macro in spelling, arguments, and whitespace.  This is used to emit
+/// duplicate definition warnings.  This implements the rules in C99 6.10.3.
+///
+bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const {
+  // Check # tokens in replacement, number of args, and various flags all match.
+  if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
+      getNumArgs() != Other.getNumArgs() ||
+      isFunctionLike() != Other.isFunctionLike() ||
+      isC99Varargs() != Other.isC99Varargs() ||
+      isGNUVarargs() != Other.isGNUVarargs())
+    return false;
+
+  // Check arguments.
+  for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end();
+       I != E; ++I, ++OI)
+    if (*I != *OI) return false;
+
+  // Check all the tokens.
+  for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
+    const Token &A = ReplacementTokens[i];
+    const Token &B = Other.ReplacementTokens[i];
+    if (A.getKind() != B.getKind())
+      return false;
+
+    // If this isn't the first first token, check that the whitespace and
+    // start-of-line characteristics match.
+    if (i != 0 &&
+        (A.isAtStartOfLine() != B.isAtStartOfLine() ||
+         A.hasLeadingSpace() != B.hasLeadingSpace()))
+      return false;
+
+    // If this is an identifier, it is easy.
+    if (A.getIdentifierInfo() || B.getIdentifierInfo()) {
+      if (A.getIdentifierInfo() != B.getIdentifierInfo())
+        return false;
+      continue;
+    }
+
+    // Otherwise, check the spelling.
+    if (PP.getSpelling(A) != PP.getSpelling(B))
+      return false;
+  }
+
+  return true;
+}

diff --git a/lib/Lex/Makefile b/lib/Lex/Makefile
new file mode 100644
index 0000000..5090770
--- /dev/null
+++ b/lib/Lex/Makefile

@@ -0,0 +1,27 @@
+##===- clang/lib/Lex/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+#  This implements the Lexer library for the C-Language front-end.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+include $(LEVEL)/Makefile.config
+
+LIBRARYNAME := clangLex
+BUILD_ARCHIVE = 1
+
+ifeq ($(ARCH),PowerPC)
+CXXFLAGS += -maltivec
+endif
+
+CPPFLAGS += -I$(PROJ_SRC_DIR)/../../include -I$(PROJ_OBJ_DIR)/../../include
+
+include $(LEVEL)/Makefile.common
+

diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
new file mode 100644
index 0000000..6aeb6fa
--- /dev/null
+++ b/lib/Lex/PPCaching.cpp

@@ -0,0 +1,112 @@
+//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// caching of lexed tokens.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+/// EnableBacktrackAtThisPos - From the point that this method is called, and
+/// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
+/// keeps track of the lexed tokens so that a subsequent Backtrack() call will
+/// make the Preprocessor re-lex the same tokens.
+///
+/// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
+/// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
+/// be combined with the EnableBacktrackAtThisPos calls in reverse order.
+void Preprocessor::EnableBacktrackAtThisPos() {
+  BacktrackPositions.push_back(CachedLexPos);
+  EnterCachingLexMode();
+}
+
+/// CommitBacktrackedTokens - Disable the last EnableBacktrackAtThisPos call.
+void Preprocessor::CommitBacktrackedTokens() {
+  assert(!BacktrackPositions.empty()
+         && "EnableBacktrackAtThisPos was not called!");
+  BacktrackPositions.pop_back();
+}
+
+/// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
+/// EnableBacktrackAtThisPos() was previously called.
+void Preprocessor::Backtrack() {
+  assert(!BacktrackPositions.empty()
+         && "EnableBacktrackAtThisPos was not called!");
+  CachedLexPos = BacktrackPositions.back();
+  BacktrackPositions.pop_back();
+}
+
+void Preprocessor::CachingLex(Token &Result) {
+  if (CachedLexPos < CachedTokens.size()) {
+    Result = CachedTokens[CachedLexPos++];
+    return;
+  }
+
+  ExitCachingLexMode();
+  Lex(Result);
+
+  if (!isBacktrackEnabled()) {
+    // All cached tokens were consumed.
+    CachedTokens.clear();
+    CachedLexPos = 0;
+    return;
+  }
+
+  // We should cache the lexed token.
+
+  EnterCachingLexMode();
+  if (Result.isNot(tok::eof)) {
+    CachedTokens.push_back(Result);
+    ++CachedLexPos;
+  }
+}
+
+void Preprocessor::EnterCachingLexMode() {
+  if (InCachingLexMode())
+    return;
+
+  PushIncludeMacroStack();
+}
+
+
+const Token &Preprocessor::PeekAhead(unsigned N) {
+  assert(CachedLexPos + N > CachedTokens.size() && "Confused caching.");
+  ExitCachingLexMode();
+  for (unsigned C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
+    CachedTokens.push_back(Token());
+    Lex(CachedTokens.back());
+  }
+  EnterCachingLexMode();
+  return CachedTokens.back();
+}
+
+void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
+  assert(Tok.isAnnotation() && "Expected annotation token");
+  assert(CachedLexPos != 0 && "Expected to have some cached tokens");
+  assert(CachedTokens[CachedLexPos-1].getLastLoc() == Tok.getAnnotationEndLoc()
+         && "The annotation should be until the most recent cached token");
+
+  // Start from the end of the cached tokens list and look for the token
+  // that is the beginning of the annotation token.
+  for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) {
+    CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1;
+    if (AnnotBegin->getLocation() == Tok.getLocation()) {
+      assert((BacktrackPositions.empty() || BacktrackPositions.back() < i) &&
+             "The backtrack pos points inside the annotated tokens!");
+      // Replace the cached tokens with the single annotation token.
+      if (i < CachedLexPos)
+        CachedTokens.erase(AnnotBegin + 1, CachedTokens.begin() + CachedLexPos);
+      *AnnotBegin = Tok;
+      CachedLexPos = i;
+      return;
+    }
+  }
+}

diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
new file mode 100644
index 0000000..b0e784b
--- /dev/null
+++ b/lib/Lex/PPDirectives.cpp

@@ -0,0 +1,1654 @@
+//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements # directive processing for the Preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/APInt.h"
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+// Utility Methods for Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
+  MacroInfo *MI;
+
+  if (!MICache.empty()) {
+    MI = MICache.back();
+    MICache.pop_back();
+  } else
+    MI = (MacroInfo*) BP.Allocate<MacroInfo>();
+  new (MI) MacroInfo(L);
+  return MI;
+}
+
+/// ReleaseMacroInfo - Release the specified MacroInfo.  This memory will
+///  be reused for allocating new MacroInfo objects.
+void Preprocessor::ReleaseMacroInfo(MacroInfo* MI) {
+  MICache.push_back(MI);
+  MI->FreeArgumentList(BP);
+}
+
+
+/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
+/// current line until the tok::eom token is found.
+void Preprocessor::DiscardUntilEndOfDirective() {
+  Token Tmp;
+  do {
+    LexUnexpandedToken(Tmp);
+  } while (Tmp.isNot(tok::eom));
+}
+
+/// ReadMacroName - Lex and validate a macro name, which occurs after a
+/// #define or #undef.  This sets the token kind to eom and discards the rest
+/// of the macro line if the macro name is invalid.  isDefineUndef is 1 if
+/// this is due to a a #define, 2 if #undef directive, 0 if it is something
+/// else (e.g. #ifdef).
+void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
+  // Read the token, don't allow macro expansion on it.
+  LexUnexpandedToken(MacroNameTok);
+
+  // Missing macro name?
+  if (MacroNameTok.is(tok::eom)) {
+    Diag(MacroNameTok, diag::err_pp_missing_macro_name);
+    return;
+  }
+
+  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+  if (II == 0) {
+    std::string Spelling = getSpelling(MacroNameTok);
+    const IdentifierInfo &Info = Identifiers.get(Spelling);
+    if (Info.isCPlusPlusOperatorKeyword())
+      // C++ 2.5p2: Alternative tokens behave the same as its primary token
+      // except for their spellings.
+      Diag(MacroNameTok, diag::err_pp_operator_used_as_macro_name) << Spelling;
+    else
+      Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
+    // Fall through on error.
+  } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) {
+    // Error if defining "defined": C99 6.10.8.4.
+    Diag(MacroNameTok, diag::err_defined_macro_name);
+  } else if (isDefineUndef && II->hasMacroDefinition() &&
+             getMacroInfo(II)->isBuiltinMacro()) {
+    // Error if defining "__LINE__" and other builtins: C99 6.10.8.4.
+    if (isDefineUndef == 1)
+      Diag(MacroNameTok, diag::pp_redef_builtin_macro);
+    else
+      Diag(MacroNameTok, diag::pp_undef_builtin_macro);
+  } else {
+    // Okay, we got a good identifier node.  Return it.
+    return;
+  }
+
+  // Invalid macro name, read and discard the rest of the line.  Then set the
+  // token kind to tok::eom.
+  MacroNameTok.setKind(tok::eom);
+  return DiscardUntilEndOfDirective();
+}
+
+/// CheckEndOfDirective - Ensure that the next token is a tok::eom token.  If
+/// not, emit a diagnostic and consume up until the eom.  If EnableMacros is
+/// true, then we consider macros that expand to zero tokens as being ok.
+void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
+  Token Tmp;
+  // Lex unexpanded tokens for most directives: macros might expand to zero
+  // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
+  // #line) allow empty macros.
+  if (EnableMacros)
+    Lex(Tmp);
+  else
+    LexUnexpandedToken(Tmp);
+
+  // There should be no tokens after the directive, but we allow them as an
+  // extension.
+  while (Tmp.is(tok::comment))  // Skip comments in -C mode.
+    LexUnexpandedToken(Tmp);
+
+  if (Tmp.isNot(tok::eom)) {
+    // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
+    // because it is more trouble than it is worth to insert /**/ and check that
+    // there is no /**/ in the range also.
+    CodeModificationHint FixItHint;
+    if (Features.GNUMode || Features.C99 || Features.CPlusPlus)
+      FixItHint = CodeModificationHint::CreateInsertion(Tmp.getLocation(),"//");
+    Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << FixItHint;
+    DiscardUntilEndOfDirective();
+  }
+}
+
+
+
+/// SkipExcludedConditionalBlock - We just read a #if or related directive and
+/// decided that the subsequent tokens are in the #if'd out portion of the
+/// file.  Lex the rest of the file, until we see an #endif.  If
+/// FoundNonSkipPortion is true, then we have already emitted code for part of
+/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk
+/// is true, then #else directives are ok, if not, then we have already seen one
+/// so a #else directive is a duplicate.  When this returns, the caller can lex
+/// the first valid token.
+void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
+                                                bool FoundNonSkipPortion,
+                                                bool FoundElse) {
+  ++NumSkipped;
+  assert(CurTokenLexer == 0 && CurPPLexer && "Lexing a macro, not a file?");
+
+  CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
+                                 FoundNonSkipPortion, FoundElse);
+
+  if (CurPTHLexer) {
+    PTHSkipExcludedConditionalBlock();
+    return;
+  }
+
+  // Enter raw mode to disable identifier lookup (and thus macro expansion),
+  // disabling warnings, etc.
+  CurPPLexer->LexingRawMode = true;
+  Token Tok;
+  while (1) {
+    CurLexer->Lex(Tok);
+
+    // If this is the end of the buffer, we have an error.
+    if (Tok.is(tok::eof)) {
+      // Emit errors for each unterminated conditional on the stack, including
+      // the current one.
+      while (!CurPPLexer->ConditionalStack.empty()) {
+        Diag(CurPPLexer->ConditionalStack.back().IfLoc,
+             diag::err_pp_unterminated_conditional);
+        CurPPLexer->ConditionalStack.pop_back();
+      }
+
+      // Just return and let the caller lex after this #include.
+      break;
+    }
+
+    // If this token is not a preprocessor directive, just skip it.
+    if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
+      continue;
+
+    // We just parsed a # character at the start of a line, so we're in
+    // directive mode.  Tell the lexer this so any newlines we see will be
+    // converted into an EOM token (this terminates the macro).
+    CurPPLexer->ParsingPreprocessorDirective = true;
+    if (CurLexer) CurLexer->SetCommentRetentionState(false);
+
+
+    // Read the next token, the directive flavor.
+    LexUnexpandedToken(Tok);
+
+    // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
+    // something bogus), skip it.
+    if (Tok.isNot(tok::identifier)) {
+      CurPPLexer->ParsingPreprocessorDirective = false;
+      // Restore comment saving mode.
+      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      continue;
+    }
+
+    // If the first letter isn't i or e, it isn't intesting to us.  We know that
+    // this is safe in the face of spelling differences, because there is no way
+    // to spell an i/e in a strange way that is another letter.  Skipping this
+    // allows us to avoid looking up the identifier info for #define/#undef and
+    // other common directives.
+    const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation());
+    char FirstChar = RawCharData[0];
+    if (FirstChar >= 'a' && FirstChar <= 'z' &&
+        FirstChar != 'i' && FirstChar != 'e') {
+      CurPPLexer->ParsingPreprocessorDirective = false;
+      // Restore comment saving mode.
+      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      continue;
+    }
+
+    // Get the identifier name without trigraphs or embedded newlines.  Note
+    // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
+    // when skipping.
+    char DirectiveBuf[20];
+    llvm::StringRef Directive;
+    if (!Tok.needsCleaning() && Tok.getLength() < 20) {
+      Directive = llvm::StringRef(RawCharData, Tok.getLength());
+    } else {
+      std::string DirectiveStr = getSpelling(Tok);
+      unsigned IdLen = DirectiveStr.size();
+      if (IdLen >= 20) {
+        CurPPLexer->ParsingPreprocessorDirective = false;
+        // Restore comment saving mode.
+        if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+        continue;
+      }
+      memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
+      Directive = llvm::StringRef(DirectiveBuf, IdLen);
+    }
+
+    if (Directive.startswith("if")) {
+      llvm::StringRef Sub = Directive.substr(2);
+      if (Sub.empty() ||   // "if"
+          Sub == "def" ||   // "ifdef"
+          Sub == "ndef") {  // "ifndef"
+        // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
+        // bother parsing the condition.
+        DiscardUntilEndOfDirective();
+        CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
+                                       /*foundnonskip*/false,
+                                       /*fnddelse*/false);
+      }
+    } else if (Directive[0] == 'e') {
+      llvm::StringRef Sub = Directive.substr(1);
+      if (Sub == "ndif") {  // "endif"
+        CheckEndOfDirective("endif");
+        PPConditionalInfo CondInfo;
+        CondInfo.WasSkipping = true; // Silence bogus warning.
+        bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
+        InCond = InCond;  // Silence warning in no-asserts mode.
+        assert(!InCond && "Can't be skipping if not in a conditional!");
+
+        // If we popped the outermost skipping block, we're done skipping!
+        if (!CondInfo.WasSkipping)
+          break;
+      } else if (Sub == "lse") { // "else".
+        // #else directive in a skipping conditional.  If not in some other
+        // skipping conditional, and if #else hasn't already been seen, enter it
+        // as a non-skipping conditional.
+        DiscardUntilEndOfDirective();  // C99 6.10p4.
+        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
+
+        // If this is a #else with a #else before it, report the error.
+        if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else);
+
+        // Note that we've seen a #else in this conditional.
+        CondInfo.FoundElse = true;
+
+        // If the conditional is at the top level, and the #if block wasn't
+        // entered, enter the #else block now.
+        if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
+          CondInfo.FoundNonSkip = true;
+          break;
+        }
+      } else if (Sub == "lif") {  // "elif".
+        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
+
+        bool ShouldEnter;
+        // If this is in a skipping block or if we're already handled this #if
+        // block, don't bother parsing the condition.
+        if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+          DiscardUntilEndOfDirective();
+          ShouldEnter = false;
+        } else {
+          // Restore the value of LexingRawMode so that identifiers are
+          // looked up, etc, inside the #elif expression.
+          assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
+          CurPPLexer->LexingRawMode = false;
+          IdentifierInfo *IfNDefMacro = 0;
+          ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
+          CurPPLexer->LexingRawMode = true;
+        }
+
+        // If this is a #elif with a #else before it, report the error.
+        if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else);
+
+        // If this condition is true, enter it!
+        if (ShouldEnter) {
+          CondInfo.FoundNonSkip = true;
+          break;
+        }
+      }
+    }
+
+    CurPPLexer->ParsingPreprocessorDirective = false;
+    // Restore comment saving mode.
+    if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+  }
+
+  // Finally, if we are out of the conditional (saw an #endif or ran off the end
+  // of the file, just stop skipping and return to lexing whatever came after
+  // the #if block.
+  CurPPLexer->LexingRawMode = false;
+}
+
+void Preprocessor::PTHSkipExcludedConditionalBlock() {
+
+  while (1) {
+    assert(CurPTHLexer);
+    assert(CurPTHLexer->LexingRawMode == false);
+
+    // Skip to the next '#else', '#elif', or #endif.
+    if (CurPTHLexer->SkipBlock()) {
+      // We have reached an #endif.  Both the '#' and 'endif' tokens
+      // have been consumed by the PTHLexer.  Just pop off the condition level.
+      PPConditionalInfo CondInfo;
+      bool InCond = CurPTHLexer->popConditionalLevel(CondInfo);
+      InCond = InCond;  // Silence warning in no-asserts mode.
+      assert(!InCond && "Can't be skipping if not in a conditional!");
+      break;
+    }
+
+    // We have reached a '#else' or '#elif'.  Lex the next token to get
+    // the directive flavor.
+    Token Tok;
+    LexUnexpandedToken(Tok);
+
+    // We can actually look up the IdentifierInfo here since we aren't in
+    // raw mode.
+    tok::PPKeywordKind K = Tok.getIdentifierInfo()->getPPKeywordID();
+
+    if (K == tok::pp_else) {
+      // #else: Enter the else condition.  We aren't in a nested condition
+      //  since we skip those. We're always in the one matching the last
+      //  blocked we skipped.
+      PPConditionalInfo &CondInfo = CurPTHLexer->peekConditionalLevel();
+      // Note that we've seen a #else in this conditional.
+      CondInfo.FoundElse = true;
+
+      // If the #if block wasn't entered then enter the #else block now.
+      if (!CondInfo.FoundNonSkip) {
+        CondInfo.FoundNonSkip = true;
+
+        // Scan until the eom token.
+        CurPTHLexer->ParsingPreprocessorDirective = true;
+        DiscardUntilEndOfDirective();
+        CurPTHLexer->ParsingPreprocessorDirective = false;
+
+        break;
+      }
+
+      // Otherwise skip this block.
+      continue;
+    }
+
+    assert(K == tok::pp_elif);
+    PPConditionalInfo &CondInfo = CurPTHLexer->peekConditionalLevel();
+
+    // If this is a #elif with a #else before it, report the error.
+    if (CondInfo.FoundElse)
+      Diag(Tok, diag::pp_err_elif_after_else);
+
+    // If this is in a skipping block or if we're already handled this #if
+    // block, don't bother parsing the condition.  We just skip this block.
+    if (CondInfo.FoundNonSkip)
+      continue;
+
+    // Evaluate the condition of the #elif.
+    IdentifierInfo *IfNDefMacro = 0;
+    CurPTHLexer->ParsingPreprocessorDirective = true;
+    bool ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
+    CurPTHLexer->ParsingPreprocessorDirective = false;
+
+    // If this condition is true, enter it!
+    if (ShouldEnter) {
+      CondInfo.FoundNonSkip = true;
+      break;
+    }
+
+    // Otherwise, skip this block and go to the next one.
+    continue;
+  }
+}
+
+/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
+/// return null on failure.  isAngled indicates whether the file reference is
+/// for system #include's or not (i.e. using <> instead of "").
+const FileEntry *Preprocessor::LookupFile(llvm::StringRef Filename,
+                                          bool isAngled,
+                                          const DirectoryLookup *FromDir,
+                                          const DirectoryLookup *&CurDir) {
+  // If the header lookup mechanism may be relative to the current file, pass in
+  // info about where the current file is.
+  const FileEntry *CurFileEnt = 0;
+  if (!FromDir) {
+    FileID FID = getCurrentFileLexer()->getFileID();
+    CurFileEnt = SourceMgr.getFileEntryForID(FID);
+
+    // If there is no file entry associated with this file, it must be the
+    // predefines buffer.  Any other file is not lexed with a normal lexer, so
+    // it won't be scanned for preprocessor directives.   If we have the
+    // predefines buffer, resolve #include references (which come from the
+    // -include command line argument) as if they came from the main file, this
+    // affects file lookup etc.
+    if (CurFileEnt == 0) {
+      FID = SourceMgr.getMainFileID();
+      CurFileEnt = SourceMgr.getFileEntryForID(FID);
+    }
+  }
+
+  // Do a standard file entry lookup.
+  CurDir = CurDirLookup;
+  const FileEntry *FE =
+    HeaderInfo.LookupFile(Filename, isAngled, FromDir, CurDir, CurFileEnt);
+  if (FE) return FE;
+
+  // Otherwise, see if this is a subframework header.  If so, this is relative
+  // to one of the headers on the #include stack.  Walk the list of the current
+  // headers on the #include stack and pass them to HeaderInfo.
+  if (IsFileLexer()) {
+    if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID())))
+      if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt)))
+        return FE;
+  }
+
+  for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
+    IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1];
+    if (IsFileLexer(ISEntry)) {
+      if ((CurFileEnt =
+           SourceMgr.getFileEntryForID(ISEntry.ThePPLexer->getFileID())))
+        if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt)))
+          return FE;
+    }
+  }
+
+  // Otherwise, we really couldn't find the file.
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleDirective - This callback is invoked when the lexer sees a # token
+/// at the start of a line.  This consumes the directive, modifies the
+/// lexer/preprocessor state, and advances the lexer(s) so that the next token
+/// read is the correct one.
+void Preprocessor::HandleDirective(Token &Result) {
+  // FIXME: Traditional: # with whitespace before it not recognized by K&R?
+
+  // We just parsed a # character at the start of a line, so we're in directive
+  // mode.  Tell the lexer this so any newlines we see will be converted into an
+  // EOM token (which terminates the directive).
+  CurPPLexer->ParsingPreprocessorDirective = true;
+
+  ++NumDirectives;
+  
+  // We are about to read a token.  For the multiple-include optimization FA to
+  // work, we have to remember if we had read any tokens *before* this
+  // pp-directive.
+  bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
+
+  // Save the '#' token in case we need to return it later.
+  Token SavedHash = Result;
+
+  // Read the next token, the directive flavor.  This isn't expanded due to
+  // C99 6.10.3p8.
+  LexUnexpandedToken(Result);
+
+  // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
+  //   #define A(x) #x
+  //   A(abc
+  //     #warning blah
+  //   def)
+  // If so, the user is relying on non-portable behavior, emit a diagnostic.
+  if (InMacroArgs)
+    Diag(Result, diag::ext_embedded_directive);
+
+TryAgain:
+  switch (Result.getKind()) {
+  case tok::eom:
+    return;   // null directive.
+  case tok::comment:
+    // Handle stuff like "# /*foo*/ define X" in -E -C mode.
+    LexUnexpandedToken(Result);
+    goto TryAgain;
+
+  case tok::numeric_constant:  // # 7  GNU line marker directive.
+    if (getLangOptions().AsmPreprocessor)
+      break;  // # 4 is not a preprocessor directive in .S files.
+    return HandleDigitDirective(Result);
+  default:
+    IdentifierInfo *II = Result.getIdentifierInfo();
+    if (II == 0) break;  // Not an identifier.
+
+    // Ask what the preprocessor keyword ID is.
+    switch (II->getPPKeywordID()) {
+    default: break;
+    // C99 6.10.1 - Conditional Inclusion.
+    case tok::pp_if:
+      return HandleIfDirective(Result, ReadAnyTokensBeforeDirective);
+    case tok::pp_ifdef:
+      return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
+    case tok::pp_ifndef:
+      return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
+    case tok::pp_elif:
+      return HandleElifDirective(Result);
+    case tok::pp_else:
+      return HandleElseDirective(Result);
+    case tok::pp_endif:
+      return HandleEndifDirective(Result);
+
+    // C99 6.10.2 - Source File Inclusion.
+    case tok::pp_include:
+      return HandleIncludeDirective(Result);       // Handle #include.
+    case tok::pp___include_macros:
+      return HandleIncludeMacrosDirective(Result); // Handle -imacros.
+
+    // C99 6.10.3 - Macro Replacement.
+    case tok::pp_define:
+      return HandleDefineDirective(Result);
+    case tok::pp_undef:
+      return HandleUndefDirective(Result);
+
+    // C99 6.10.4 - Line Control.
+    case tok::pp_line:
+      return HandleLineDirective(Result);
+
+    // C99 6.10.5 - Error Directive.
+    case tok::pp_error:
+      return HandleUserDiagnosticDirective(Result, false);
+
+    // C99 6.10.6 - Pragma Directive.
+    case tok::pp_pragma:
+      return HandlePragmaDirective();
+
+    // GNU Extensions.
+    case tok::pp_import:
+      return HandleImportDirective(Result);
+    case tok::pp_include_next:
+      return HandleIncludeNextDirective(Result);
+
+    case tok::pp_warning:
+      Diag(Result, diag::ext_pp_warning_directive);
+      return HandleUserDiagnosticDirective(Result, true);
+    case tok::pp_ident:
+      return HandleIdentSCCSDirective(Result);
+    case tok::pp_sccs:
+      return HandleIdentSCCSDirective(Result);
+    case tok::pp_assert:
+      //isExtension = true;  // FIXME: implement #assert
+      break;
+    case tok::pp_unassert:
+      //isExtension = true;  // FIXME: implement #unassert
+      break;
+    }
+    break;
+  }
+
+  // If this is a .S file, treat unknown # directives as non-preprocessor
+  // directives.  This is important because # may be a comment or introduce
+  // various pseudo-ops.  Just return the # token and push back the following
+  // token to be lexed next time.
+  if (getLangOptions().AsmPreprocessor) {
+    Token *Toks = new Token[2];
+    // Return the # and the token after it.
+    Toks[0] = SavedHash;
+    Toks[1] = Result;
+    // Enter this token stream so that we re-lex the tokens.  Make sure to
+    // enable macro expansion, in case the token after the # is an identifier
+    // that is expanded.
+    EnterTokenStream(Toks, 2, false, true);
+    return;
+  }
+
+  // If we reached here, the preprocessing token is not valid!
+  Diag(Result, diag::err_pp_invalid_directive);
+
+  // Read the rest of the PP line.
+  DiscardUntilEndOfDirective();
+
+  // Okay, we're done parsing the directive.
+}
+
+/// GetLineValue - Convert a numeric token into an unsigned value, emitting
+/// Diagnostic DiagID if it is invalid, and returning the value in Val.
+static bool GetLineValue(Token &DigitTok, unsigned &Val,
+                         unsigned DiagID, Preprocessor &PP) {
+  if (DigitTok.isNot(tok::numeric_constant)) {
+    PP.Diag(DigitTok, DiagID);
+
+    if (DigitTok.isNot(tok::eom))
+      PP.DiscardUntilEndOfDirective();
+    return true;
+  }
+
+  llvm::SmallString<64> IntegerBuffer;
+  IntegerBuffer.resize(DigitTok.getLength());
+  const char *DigitTokBegin = &IntegerBuffer[0];
+  unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin);
+
+  // Verify that we have a simple digit-sequence, and compute the value.  This
+  // is always a simple digit string computed in decimal, so we do this manually
+  // here.
+  Val = 0;
+  for (unsigned i = 0; i != ActualLength; ++i) {
+    if (!isdigit(DigitTokBegin[i])) {
+      PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
+              diag::err_pp_line_digit_sequence);
+      PP.DiscardUntilEndOfDirective();
+      return true;
+    }
+
+    unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
+    if (NextVal < Val) { // overflow.
+      PP.Diag(DigitTok, DiagID);
+      PP.DiscardUntilEndOfDirective();
+      return true;
+    }
+    Val = NextVal;
+  }
+
+  // Reject 0, this is needed both by #line numbers and flags.
+  if (Val == 0) {
+    PP.Diag(DigitTok, DiagID);
+    PP.DiscardUntilEndOfDirective();
+    return true;
+  }
+
+  if (DigitTokBegin[0] == '0')
+    PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal);
+
+  return false;
+}
+
+/// HandleLineDirective - Handle #line directive: C99 6.10.4.  The two
+/// acceptable forms are:
+///   # line digit-sequence
+///   # line digit-sequence "s-char-sequence"
+void Preprocessor::HandleLineDirective(Token &Tok) {
+  // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
+  // expanded.
+  Token DigitTok;
+  Lex(DigitTok);
+
+  // Validate the number and convert it to an unsigned.
+  unsigned LineNo;
+  if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
+    return;
+
+  // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
+  // number greater than 2147483647".  C90 requires that the line # be <= 32767.
+  unsigned LineLimit = Features.C99 ? 2147483648U : 32768U;
+  if (LineNo >= LineLimit)
+    Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
+
+  int FilenameID = -1;
+  Token StrTok;
+  Lex(StrTok);
+
+  // If the StrTok is "eom", then it wasn't present.  Otherwise, it must be a
+  // string followed by eom.
+  if (StrTok.is(tok::eom))
+    ; // ok
+  else if (StrTok.isNot(tok::string_literal)) {
+    Diag(StrTok, diag::err_pp_line_invalid_filename);
+    DiscardUntilEndOfDirective();
+    return;
+  } else {
+    // Parse and validate the string, converting it into a unique ID.
+    StringLiteralParser Literal(&StrTok, 1, *this);
+    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    if (Literal.hadError)
+      return DiscardUntilEndOfDirective();
+    if (Literal.Pascal) {
+      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
+      return DiscardUntilEndOfDirective();
+    }
+    FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString(),
+                                                  Literal.GetStringLength());
+
+    // Verify that there is nothing after the string, other than EOM.  Because
+    // of C99 6.10.4p5, macros that expand to empty tokens are ok.
+    CheckEndOfDirective("line", true);
+  }
+
+  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID);
+
+  if (Callbacks)
+    Callbacks->FileChanged(DigitTok.getLocation(), PPCallbacks::RenameFile,
+                           SrcMgr::C_User);
+}
+
+/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
+/// marker directive.
+static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
+                                bool &IsSystemHeader, bool &IsExternCHeader,
+                                Preprocessor &PP) {
+  unsigned FlagVal;
+  Token FlagTok;
+  PP.Lex(FlagTok);
+  if (FlagTok.is(tok::eom)) return false;
+  if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
+    return true;
+
+  if (FlagVal == 1) {
+    IsFileEntry = true;
+
+    PP.Lex(FlagTok);
+    if (FlagTok.is(tok::eom)) return false;
+    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
+      return true;
+  } else if (FlagVal == 2) {
+    IsFileExit = true;
+
+    SourceManager &SM = PP.getSourceManager();
+    // If we are leaving the current presumed file, check to make sure the
+    // presumed include stack isn't empty!
+    FileID CurFileID =
+      SM.getDecomposedInstantiationLoc(FlagTok.getLocation()).first;
+    PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
+
+    // If there is no include loc (main file) or if the include loc is in a
+    // different physical file, then we aren't in a "1" line marker flag region.
+    SourceLocation IncLoc = PLoc.getIncludeLoc();
+    if (IncLoc.isInvalid() ||
+        SM.getDecomposedInstantiationLoc(IncLoc).first != CurFileID) {
+      PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
+      PP.DiscardUntilEndOfDirective();
+      return true;
+    }
+
+    PP.Lex(FlagTok);
+    if (FlagTok.is(tok::eom)) return false;
+    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
+      return true;
+  }
+
+  // We must have 3 if there are still flags.
+  if (FlagVal != 3) {
+    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
+    PP.DiscardUntilEndOfDirective();
+    return true;
+  }
+
+  IsSystemHeader = true;
+
+  PP.Lex(FlagTok);
+  if (FlagTok.is(tok::eom)) return false;
+  if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
+    return true;
+
+  // We must have 4 if there is yet another flag.
+  if (FlagVal != 4) {
+    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
+    PP.DiscardUntilEndOfDirective();
+    return true;
+  }
+
+  IsExternCHeader = true;
+
+  PP.Lex(FlagTok);
+  if (FlagTok.is(tok::eom)) return false;
+
+  // There are no more valid flags here.
+  PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
+  PP.DiscardUntilEndOfDirective();
+  return true;
+}
+
+/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
+/// one of the following forms:
+///
+///     # 42
+///     # 42 "file" ('1' | '2')?
+///     # 42 "file" ('1' | '2')? '3' '4'?
+///
+void Preprocessor::HandleDigitDirective(Token &DigitTok) {
+  // Validate the number and convert it to an unsigned.  GNU does not have a
+  // line # limit other than it fit in 32-bits.
+  unsigned LineNo;
+  if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
+                   *this))
+    return;
+
+  Token StrTok;
+  Lex(StrTok);
+
+  bool IsFileEntry = false, IsFileExit = false;
+  bool IsSystemHeader = false, IsExternCHeader = false;
+  int FilenameID = -1;
+
+  // If the StrTok is "eom", then it wasn't present.  Otherwise, it must be a
+  // string followed by eom.
+  if (StrTok.is(tok::eom))
+    ; // ok
+  else if (StrTok.isNot(tok::string_literal)) {
+    Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
+    return DiscardUntilEndOfDirective();
+  } else {
+    // Parse and validate the string, converting it into a unique ID.
+    StringLiteralParser Literal(&StrTok, 1, *this);
+    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    if (Literal.hadError)
+      return DiscardUntilEndOfDirective();
+    if (Literal.Pascal) {
+      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
+      return DiscardUntilEndOfDirective();
+    }
+    FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString(),
+                                                  Literal.GetStringLength());
+
+    // If a filename was present, read any flags that are present.
+    if (ReadLineMarkerFlags(IsFileEntry, IsFileExit,
+                            IsSystemHeader, IsExternCHeader, *this))
+      return;
+  }
+
+  // Create a line note with this information.
+  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID,
+                        IsFileEntry, IsFileExit,
+                        IsSystemHeader, IsExternCHeader);
+
+  // If the preprocessor has callbacks installed, notify them of the #line
+  // change.  This is used so that the line marker comes out in -E mode for
+  // example.
+  if (Callbacks) {
+    PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
+    if (IsFileEntry)
+      Reason = PPCallbacks::EnterFile;
+    else if (IsFileExit)
+      Reason = PPCallbacks::ExitFile;
+    SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
+    if (IsExternCHeader)
+      FileKind = SrcMgr::C_ExternCSystem;
+    else if (IsSystemHeader)
+      FileKind = SrcMgr::C_System;
+
+    Callbacks->FileChanged(DigitTok.getLocation(), Reason, FileKind);
+  }
+}
+
+
+/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
+///
+void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
+                                                 bool isWarning) {
+  // PTH doesn't emit #warning or #error directives.
+  if (CurPTHLexer)
+    return CurPTHLexer->DiscardToEndOfLine();
+
+  // Read the rest of the line raw.  We do this because we don't want macros
+  // to be expanded and we don't require that the tokens be valid preprocessing
+  // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
+  // collapse multiple consequtive white space between tokens, but this isn't
+  // specified by the standard.
+  std::string Message = CurLexer->ReadToEndOfLine();
+  if (isWarning)
+    Diag(Tok, diag::pp_hash_warning) << Message;
+  else
+    Diag(Tok, diag::err_pp_hash_error) << Message;
+}
+
+/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
+///
+void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
+  // Yes, this directive is an extension.
+  Diag(Tok, diag::ext_pp_ident_directive);
+
+  // Read the string argument.
+  Token StrTok;
+  Lex(StrTok);
+
+  // If the token kind isn't a string, it's a malformed directive.
+  if (StrTok.isNot(tok::string_literal) &&
+      StrTok.isNot(tok::wide_string_literal)) {
+    Diag(StrTok, diag::err_pp_malformed_ident);
+    if (StrTok.isNot(tok::eom))
+      DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // Verify that there is nothing after the string, other than EOM.
+  CheckEndOfDirective("ident");
+
+  if (Callbacks)
+    Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok));
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Include Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
+/// checked and spelled filename, e.g. as an operand of #include. This returns
+/// true if the input filename was in <>'s or false if it were in ""'s.  The
+/// caller is expected to provide a buffer that is large enough to hold the
+/// spelling of the filename, but is also expected to handle the case when
+/// this method decides to use a different buffer.
+bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
+                                              llvm::StringRef &Buffer) {
+  // Get the text form of the filename.
+  assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
+
+  // Make sure the filename is <x> or "x".
+  bool isAngled;
+  if (Buffer[0] == '<') {
+    if (Buffer.back() != '>') {
+      Diag(Loc, diag::err_pp_expects_filename);
+      Buffer = llvm::StringRef();
+      return true;
+    }
+    isAngled = true;
+  } else if (Buffer[0] == '"') {
+    if (Buffer.back() != '"') {
+      Diag(Loc, diag::err_pp_expects_filename);
+      Buffer = llvm::StringRef();
+      return true;
+    }
+    isAngled = false;
+  } else {
+    Diag(Loc, diag::err_pp_expects_filename);
+    Buffer = llvm::StringRef();
+    return true;
+  }
+
+  // Diagnose #include "" as invalid.
+  if (Buffer.size() <= 2) {
+    Diag(Loc, diag::err_pp_empty_filename);
+    Buffer = llvm::StringRef();
+    return true;
+  }
+
+  // Skip the brackets.
+  Buffer = Buffer.substr(1, Buffer.size()-2);
+  return isAngled;
+}
+
+/// ConcatenateIncludeName - Handle cases where the #include name is expanded
+/// from a macro as multiple tokens, which need to be glued together.  This
+/// occurs for code like:
+///    #define FOO <a/b.h>
+///    #include FOO
+/// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
+///
+/// This code concatenates and consumes tokens up to the '>' token.  It returns
+/// false if the > was found, otherwise it returns true if it finds and consumes
+/// the EOM marker.
+bool Preprocessor::ConcatenateIncludeName(
+  llvm::SmallVector<char, 128> &FilenameBuffer) {
+  Token CurTok;
+
+  Lex(CurTok);
+  while (CurTok.isNot(tok::eom)) {
+    // Append the spelling of this token to the buffer. If there was a space
+    // before it, add it now.
+    if (CurTok.hasLeadingSpace())
+      FilenameBuffer.push_back(' ');
+
+    // Get the spelling of the token, directly into FilenameBuffer if possible.
+    unsigned PreAppendSize = FilenameBuffer.size();
+    FilenameBuffer.resize(PreAppendSize+CurTok.getLength());
+
+    const char *BufPtr = &FilenameBuffer[PreAppendSize];
+    unsigned ActualLen = getSpelling(CurTok, BufPtr);
+
+    // If the token was spelled somewhere else, copy it into FilenameBuffer.
+    if (BufPtr != &FilenameBuffer[PreAppendSize])
+      memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
+
+    // Resize FilenameBuffer to the correct size.
+    if (CurTok.getLength() != ActualLen)
+      FilenameBuffer.resize(PreAppendSize+ActualLen);
+
+    // If we found the '>' marker, return success.
+    if (CurTok.is(tok::greater))
+      return false;
+
+    Lex(CurTok);
+  }
+
+  // If we hit the eom marker, emit an error and return true so that the caller
+  // knows the EOM has been read.
+  Diag(CurTok.getLocation(), diag::err_pp_expects_filename);
+  return true;
+}
+
+/// HandleIncludeDirective - The "#include" tokens have just been read, read the
+/// file to be included from the lexer, then include it!  This is a common
+/// routine with functionality shared between #include, #include_next and
+/// #import.  LookupFrom is set when this is a #include_next directive, it
+/// specifies the file to start searching from.
+void Preprocessor::HandleIncludeDirective(Token &IncludeTok,
+                                          const DirectoryLookup *LookupFrom,
+                                          bool isImport) {
+
+  Token FilenameTok;
+  CurPPLexer->LexIncludeFilename(FilenameTok);
+
+  // Reserve a buffer to get the spelling.
+  llvm::SmallString<128> FilenameBuffer;
+  llvm::StringRef Filename;
+
+  switch (FilenameTok.getKind()) {
+  case tok::eom:
+    // If the token kind is EOM, the error has already been diagnosed.
+    return;
+
+  case tok::angle_string_literal:
+  case tok::string_literal: {
+    FilenameBuffer.resize(FilenameTok.getLength());
+    const char *FilenameStart = &FilenameBuffer[0];
+    unsigned Len = getSpelling(FilenameTok, FilenameStart);
+    Filename = llvm::StringRef(FilenameStart, Len);
+    break;
+  }
+
+  case tok::less:
+    // This could be a <foo/bar.h> file coming from a macro expansion.  In this
+    // case, glue the tokens together into FilenameBuffer and interpret those.
+    FilenameBuffer.push_back('<');
+    if (ConcatenateIncludeName(FilenameBuffer))
+      return;   // Found <eom> but no ">"?  Diagnostic already emitted.
+    Filename = FilenameBuffer.str();
+    break;
+  default:
+    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+    DiscardUntilEndOfDirective();
+    return;
+  }
+
+  bool isAngled = 
+    GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  if (Filename.empty()) {
+    DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // Verify that there is nothing after the filename, other than EOM.  Note that
+  // we allow macros that expand to nothing after the filename, because this
+  // falls into the category of "#include pp-tokens new-line" specified in
+  // C99 6.10.2p4.
+  CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
+
+  // Check that we don't have infinite #include recursion.
+  if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
+    Diag(FilenameTok, diag::err_pp_include_too_deep);
+    return;
+  }
+
+  // Search include directories.
+  const DirectoryLookup *CurDir;
+  const FileEntry *File = LookupFile(Filename, isAngled, LookupFrom, CurDir);
+  if (File == 0) {
+    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+    return;
+  }
+  
+  // Ask HeaderInfo if we should enter this #include file.  If not, #including
+  // this file will have no effect.
+  if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport))
+    return;
+
+  // The #included file will be considered to be a system header if either it is
+  // in a system include directory, or if the #includer is a system include
+  // header.
+  SrcMgr::CharacteristicKind FileCharacter =
+    std::max(HeaderInfo.getFileDirFlavor(File),
+             SourceMgr.getFileCharacteristic(FilenameTok.getLocation()));
+
+  // Look up the file, create a File ID for it.
+  FileID FID = SourceMgr.createFileID(File, FilenameTok.getLocation(),
+                                      FileCharacter);
+  if (FID.isInvalid()) {
+    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+    return;
+  }
+
+  // Finally, if all is good, enter the new file!
+  std::string ErrorStr;
+  if (EnterSourceFile(FID, CurDir, ErrorStr))
+    Diag(FilenameTok, diag::err_pp_error_opening_file)
+      << std::string(SourceMgr.getFileEntryForID(FID)->getName()) << ErrorStr;
+}
+
+/// HandleIncludeNextDirective - Implements #include_next.
+///
+void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) {
+  Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
+
+  // #include_next is like #include, except that we start searching after
+  // the current found directory.  If we can't do this, issue a
+  // diagnostic.
+  const DirectoryLookup *Lookup = CurDirLookup;
+  if (isInPrimaryFile()) {
+    Lookup = 0;
+    Diag(IncludeNextTok, diag::pp_include_next_in_primary);
+  } else if (Lookup == 0) {
+    Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
+  } else {
+    // Start looking up in the next directory.
+    ++Lookup;
+  }
+
+  return HandleIncludeDirective(IncludeNextTok, Lookup);
+}
+
+/// HandleImportDirective - Implements #import.
+///
+void Preprocessor::HandleImportDirective(Token &ImportTok) {
+  if (!Features.ObjC1)  // #import is standard for ObjC.
+    Diag(ImportTok, diag::ext_pp_import_directive);
+
+  return HandleIncludeDirective(ImportTok, 0, true);
+}
+
+/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
+/// pseudo directive in the predefines buffer.  This handles it by sucking all
+/// tokens through the preprocessor and discarding them (only keeping the side
+/// effects on the preprocessor).
+void Preprocessor::HandleIncludeMacrosDirective(Token &IncludeMacrosTok) {
+  // This directive should only occur in the predefines buffer.  If not, emit an
+  // error and reject it.
+  SourceLocation Loc = IncludeMacrosTok.getLocation();
+  if (strcmp(SourceMgr.getBufferName(Loc), "<built-in>") != 0) {
+    Diag(IncludeMacrosTok.getLocation(),
+         diag::pp_include_macros_out_of_predefines);
+    DiscardUntilEndOfDirective();
+    return;
+  }
+
+  // Treat this as a normal #include for checking purposes.  If this is
+  // successful, it will push a new lexer onto the include stack.
+  HandleIncludeDirective(IncludeMacrosTok, 0, false);
+
+  Token TmpTok;
+  do {
+    Lex(TmpTok);
+    assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
+  } while (TmpTok.isNot(tok::hashhash));
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Macro Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
+/// definition has just been read.  Lex the rest of the arguments and the
+/// closing ), updating MI with what we learn.  Return true if an error occurs
+/// parsing the arg list.
+bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {
+  llvm::SmallVector<IdentifierInfo*, 32> Arguments;
+
+  Token Tok;
+  while (1) {
+    LexUnexpandedToken(Tok);
+    switch (Tok.getKind()) {
+    case tok::r_paren:
+      // Found the end of the argument list.
+      if (Arguments.empty())  // #define FOO()
+        return false;
+      // Otherwise we have #define FOO(A,)
+      Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
+      return true;
+    case tok::ellipsis:  // #define X(... -> C99 varargs
+      // Warn if use of C99 feature in non-C99 mode.
+      if (!Features.C99) Diag(Tok, diag::ext_variadic_macro);
+
+      // Lex the token after the identifier.
+      LexUnexpandedToken(Tok);
+      if (Tok.isNot(tok::r_paren)) {
+        Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+        return true;
+      }
+      // Add the __VA_ARGS__ identifier as an argument.
+      Arguments.push_back(Ident__VA_ARGS__);
+      MI->setIsC99Varargs();
+      MI->setArgumentList(&Arguments[0], Arguments.size(), BP);
+      return false;
+    case tok::eom:  // #define X(
+      Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+      return true;
+    default:
+      // Handle keywords and identifiers here to accept things like
+      // #define Foo(for) for.
+      IdentifierInfo *II = Tok.getIdentifierInfo();
+      if (II == 0) {
+        // #define X(1
+        Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
+        return true;
+      }
+
+      // If this is already used as an argument, it is used multiple times (e.g.
+      // #define X(A,A.
+      if (std::find(Arguments.begin(), Arguments.end(), II) !=
+          Arguments.end()) {  // C99 6.10.3p6
+        Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
+        return true;
+      }
+
+      // Add the argument to the macro info.
+      Arguments.push_back(II);
+
+      // Lex the token after the identifier.
+      LexUnexpandedToken(Tok);
+
+      switch (Tok.getKind()) {
+      default:          // #define X(A B
+        Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
+        return true;
+      case tok::r_paren: // #define X(A)
+        MI->setArgumentList(&Arguments[0], Arguments.size(), BP);
+        return false;
+      case tok::comma:  // #define X(A,
+        break;
+      case tok::ellipsis:  // #define X(A... -> GCC extension
+        // Diagnose extension.
+        Diag(Tok, diag::ext_named_variadic_macro);
+
+        // Lex the token after the identifier.
+        LexUnexpandedToken(Tok);
+        if (Tok.isNot(tok::r_paren)) {
+          Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
+          return true;
+        }
+
+        MI->setIsGNUVarargs();
+        MI->setArgumentList(&Arguments[0], Arguments.size(), BP);
+        return false;
+      }
+    }
+  }
+}
+
+/// HandleDefineDirective - Implements #define.  This consumes the entire macro
+/// line then lets the caller lex the next real token.
+void Preprocessor::HandleDefineDirective(Token &DefineTok) {
+  ++NumDefined;
+
+  Token MacroNameTok;
+  ReadMacroName(MacroNameTok, 1);
+
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eom))
+    return;
+
+  Token LastTok = MacroNameTok;
+
+  // If we are supposed to keep comments in #defines, reenable comment saving
+  // mode.
+  if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
+
+  // Create the new macro.
+  MacroInfo *MI = AllocateMacroInfo(MacroNameTok.getLocation());
+
+  Token Tok;
+  LexUnexpandedToken(Tok);
+
+  // If this is a function-like macro definition, parse the argument list,
+  // marking each of the identifiers as being used as macro arguments.  Also,
+  // check other constraints on the first token of the macro body.
+  if (Tok.is(tok::eom)) {
+    // If there is no body to this macro, we have no special handling here.
+  } else if (Tok.hasLeadingSpace()) {
+    // This is a normal token with leading space.  Clear the leading space
+    // marker on the first token to get proper expansion.
+    Tok.clearFlag(Token::LeadingSpace);
+  } else if (Tok.is(tok::l_paren)) {
+    // This is a function-like macro definition.  Read the argument list.
+    MI->setIsFunctionLike();
+    if (ReadMacroDefinitionArgList(MI)) {
+      // Forget about MI.
+      ReleaseMacroInfo(MI);
+      // Throw away the rest of the line.
+      if (CurPPLexer->ParsingPreprocessorDirective)
+        DiscardUntilEndOfDirective();
+      return;
+    }
+
+    // If this is a definition of a variadic C99 function-like macro, not using
+    // the GNU named varargs extension, enabled __VA_ARGS__.
+
+    // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
+    // This gets unpoisoned where it is allowed.
+    assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!");
+    if (MI->isC99Varargs())
+      Ident__VA_ARGS__->setIsPoisoned(false);
+
+    // Read the first token after the arg list for down below.
+    LexUnexpandedToken(Tok);
+  } else if (Features.C99) {
+    // C99 requires whitespace between the macro definition and the body.  Emit
+    // a diagnostic for something like "#define X+".
+    Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
+  } else {
+    // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
+    // first character of a replacement list is not a character required by
+    // subclause 5.2.1, then there shall be white-space separation between the
+    // identifier and the replacement list.".  5.2.1 lists this set:
+    //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
+    // is irrelevant here.
+    bool isInvalid = false;
+    if (Tok.is(tok::at)) // @ is not in the list above.
+      isInvalid = true;
+    else if (Tok.is(tok::unknown)) {
+      // If we have an unknown token, it is something strange like "`".  Since
+      // all of valid characters would have lexed into a single character
+      // token of some sort, we know this is not a valid case.
+      isInvalid = true;
+    }
+    if (isInvalid)
+      Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
+    else
+      Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
+  }
+
+  if (!Tok.is(tok::eom))
+    LastTok = Tok;
+
+  // Read the rest of the macro body.
+  if (MI->isObjectLike()) {
+    // Object-like macros are very simple, just read their body.
+    while (Tok.isNot(tok::eom)) {
+      LastTok = Tok;
+      MI->AddTokenToBody(Tok);
+      // Get the next token of the macro.
+      LexUnexpandedToken(Tok);
+    }
+
+  } else {
+    // Otherwise, read the body of a function-like macro.  While we are at it,
+    // check C99 6.10.3.2p1: ensure that # operators are followed by macro
+    // parameters in function-like macro expansions.
+    while (Tok.isNot(tok::eom)) {
+      LastTok = Tok;
+
+      if (Tok.isNot(tok::hash)) {
+        MI->AddTokenToBody(Tok);
+
+        // Get the next token of the macro.
+        LexUnexpandedToken(Tok);
+        continue;
+      }
+
+      // Get the next token of the macro.
+      LexUnexpandedToken(Tok);
+
+      // Check for a valid macro arg identifier.
+      if (Tok.getIdentifierInfo() == 0 ||
+          MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) {
+
+        // If this is assembler-with-cpp mode, we accept random gibberish after
+        // the '#' because '#' is often a comment character.  However, change
+        // the kind of the token to tok::unknown so that the preprocessor isn't
+        // confused.
+        if (getLangOptions().AsmPreprocessor && Tok.isNot(tok::eom)) {
+          LastTok.setKind(tok::unknown);
+        } else {
+          Diag(Tok, diag::err_pp_stringize_not_parameter);
+          ReleaseMacroInfo(MI);
+
+          // Disable __VA_ARGS__ again.
+          Ident__VA_ARGS__->setIsPoisoned(true);
+          return;
+        }
+      }
+
+      // Things look ok, add the '#' and param name tokens to the macro.
+      MI->AddTokenToBody(LastTok);
+      MI->AddTokenToBody(Tok);
+      LastTok = Tok;
+
+      // Get the next token of the macro.
+      LexUnexpandedToken(Tok);
+    }
+  }
+
+
+  // Disable __VA_ARGS__ again.
+  Ident__VA_ARGS__->setIsPoisoned(true);
+
+  // Check that there is no paste (##) operator at the begining or end of the
+  // replacement list.
+  unsigned NumTokens = MI->getNumTokens();
+  if (NumTokens != 0) {
+    if (MI->getReplacementToken(0).is(tok::hashhash)) {
+      Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
+      ReleaseMacroInfo(MI);
+      return;
+    }
+    if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
+      Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
+      ReleaseMacroInfo(MI);
+      return;
+    }
+  }
+
+  // If this is the primary source file, remember that this macro hasn't been
+  // used yet.
+  if (isInPrimaryFile())
+    MI->setIsUsed(false);
+
+  MI->setDefinitionEndLoc(LastTok.getLocation());
+
+  // Finally, if this identifier already had a macro defined for it, verify that
+  // the macro bodies are identical and free the old definition.
+  if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+    // It is very common for system headers to have tons of macro redefinitions
+    // and for warnings to be disabled in system headers.  If this is the case,
+    // then don't bother calling MacroInfo::isIdenticalTo.
+    if (!getDiagnostics().getSuppressSystemWarnings() ||
+        !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
+      if (!OtherMI->isUsed())
+        Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+      // Macros must be identical.  This means all tokes and whitespace
+      // separation must be the same.  C99 6.10.3.2.
+      if (!MI->isIdenticalTo(*OtherMI, *this)) {
+        Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
+          << MacroNameTok.getIdentifierInfo();
+        Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
+      }
+    }
+
+    ReleaseMacroInfo(OtherMI);
+  }
+
+  setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
+
+  // If the callbacks want to know, tell them about the macro definition.
+  if (Callbacks)
+    Callbacks->MacroDefined(MacroNameTok.getIdentifierInfo(), MI);
+}
+
+/// HandleUndefDirective - Implements #undef.
+///
+void Preprocessor::HandleUndefDirective(Token &UndefTok) {
+  ++NumUndefined;
+
+  Token MacroNameTok;
+  ReadMacroName(MacroNameTok, 2);
+
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eom))
+    return;
+
+  // Check to see if this is the last token on the #undef line.
+  CheckEndOfDirective("undef");
+
+  // Okay, we finally have a valid identifier to undef.
+  MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
+
+  // If the macro is not defined, this is a noop undef, just return.
+  if (MI == 0) return;
+
+  if (!MI->isUsed())
+    Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
+
+  // If the callbacks want to know, tell them about the macro #undef.
+  if (Callbacks)
+    Callbacks->MacroUndefined(MacroNameTok.getIdentifierInfo(), MI);
+
+  // Free macro definition.
+  ReleaseMacroInfo(MI);
+  setMacroInfo(MacroNameTok.getIdentifierInfo(), 0);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Conditional Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive.  isIfndef is
+/// true when this is a #ifndef directive.  ReadAnyTokensBeforeDirective is true
+/// if any tokens have been returned or pp-directives activated before this
+/// #ifndef has been lexed.
+///
+void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
+                                        bool ReadAnyTokensBeforeDirective) {
+  ++NumIf;
+  Token DirectiveTok = Result;
+
+  Token MacroNameTok;
+  ReadMacroName(MacroNameTok);
+
+  // Error reading macro name?  If so, diagnostic already issued.
+  if (MacroNameTok.is(tok::eom)) {
+    // Skip code until we get to #endif.  This helps with recovery by not
+    // emitting an error when the #endif is reached.
+    SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
+                                 /*Foundnonskip*/false, /*FoundElse*/false);
+    return;
+  }
+
+  // Check to see if this is the last token on the #if[n]def line.
+  CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
+
+  if (CurPPLexer->getConditionalStackDepth() == 0) {
+    // If the start of a top-level #ifdef, inform MIOpt.
+    if (!ReadAnyTokensBeforeDirective) {
+      assert(isIfndef && "#ifdef shouldn't reach here");
+      CurPPLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
+    } else
+      CurPPLexer->MIOpt.EnterTopLevelConditional();
+  }
+
+  IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
+  MacroInfo *MI = getMacroInfo(MII);
+
+  // If there is a macro, process it.
+  if (MI)  // Mark it used.
+    MI->setIsUsed(true);
+
+  // Should we include the stuff contained by this directive?
+  if (!MI == isIfndef) {
+    // Yes, remember that we are inside a conditional, then lex the next token.
+    CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
+                                     /*wasskip*/false, /*foundnonskip*/true,
+                                     /*foundelse*/false);
+  } else {
+    // No, skip the contents of this block and return the first token after it.
+    SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
+                                 /*Foundnonskip*/false,
+                                 /*FoundElse*/false);
+  }
+}
+
+/// HandleIfDirective - Implements the #if directive.
+///
+void Preprocessor::HandleIfDirective(Token &IfToken,
+                                     bool ReadAnyTokensBeforeDirective) {
+  ++NumIf;
+
+  // Parse and evaluation the conditional expression.
+  IdentifierInfo *IfNDefMacro = 0;
+  bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro);
+
+
+  // If this condition is equivalent to #ifndef X, and if this is the first
+  // directive seen, handle it for the multiple-include optimization.
+  if (CurPPLexer->getConditionalStackDepth() == 0) {
+    if (!ReadAnyTokensBeforeDirective && IfNDefMacro)
+      CurPPLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro);
+    else
+      CurPPLexer->MIOpt.EnterTopLevelConditional();
+  }
+
+  // Should we include the stuff contained by this directive?
+  if (ConditionalTrue) {
+    // Yes, remember that we are inside a conditional, then lex the next token.
+    CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
+                                   /*foundnonskip*/true, /*foundelse*/false);
+  } else {
+    // No, skip the contents of this block and return the first token after it.
+    SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false,
+                                 /*FoundElse*/false);
+  }
+}
+
+/// HandleEndifDirective - Implements the #endif directive.
+///
+void Preprocessor::HandleEndifDirective(Token &EndifToken) {
+  ++NumEndif;
+
+  // Check that this is the whole directive.
+  CheckEndOfDirective("endif");
+
+  PPConditionalInfo CondInfo;
+  if (CurPPLexer->popConditionalLevel(CondInfo)) {
+    // No conditionals on the stack: this is an #endif without an #if.
+    Diag(EndifToken, diag::err_pp_endif_without_if);
+    return;
+  }
+
+  // If this the end of a top-level #endif, inform MIOpt.
+  if (CurPPLexer->getConditionalStackDepth() == 0)
+    CurPPLexer->MIOpt.ExitTopLevelConditional();
+
+  assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
+         "This code should only be reachable in the non-skipping case!");
+}
+
+
+void Preprocessor::HandleElseDirective(Token &Result) {
+  ++NumElse;
+
+  // #else directive in a non-skipping conditional... start skipping.
+  CheckEndOfDirective("else");
+
+  PPConditionalInfo CI;
+  if (CurPPLexer->popConditionalLevel(CI)) {
+    Diag(Result, diag::pp_err_else_without_if);
+    return;
+  }
+
+  // If this is a top-level #else, inform the MIOpt.
+  if (CurPPLexer->getConditionalStackDepth() == 0)
+    CurPPLexer->MIOpt.EnterTopLevelConditional();
+
+  // If this is a #else with a #else before it, report the error.
+  if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
+
+  // Finally, skip the rest of the contents of this block and return the first
+  // token after it.
+  return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+                                      /*FoundElse*/true);
+}
+
+void Preprocessor::HandleElifDirective(Token &ElifToken) {
+  ++NumElse;
+
+  // #elif directive in a non-skipping conditional... start skipping.
+  // We don't care what the condition is, because we will always skip it (since
+  // the block immediately before it was included).
+  DiscardUntilEndOfDirective();
+
+  PPConditionalInfo CI;
+  if (CurPPLexer->popConditionalLevel(CI)) {
+    Diag(ElifToken, diag::pp_err_elif_without_if);
+    return;
+  }
+
+  // If this is a top-level #elif, inform the MIOpt.
+  if (CurPPLexer->getConditionalStackDepth() == 0)
+    CurPPLexer->MIOpt.EnterTopLevelConditional();
+
+  // If this is a #elif with a #else before it, report the error.
+  if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
+
+  // Finally, skip the rest of the contents of this block and return the first
+  // token after it.
+  return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
+                                      /*FoundElse*/CI.FoundElse);
+}
+

diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
new file mode 100644
index 0000000..2a6b2a7
--- /dev/null
+++ b/lib/Lex/PPExpressions.cpp

@@ -0,0 +1,745 @@
+//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Preprocessor::EvaluateDirectiveExpression method,
+// which parses and evaluates integer constant expressions for #if directives.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: implement testing for #assert's.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/APSInt.h"
+using namespace clang;
+
+/// PPValue - Represents the value of a subexpression of a preprocessor
+/// conditional and the source range covered by it.
+class PPValue {
+  SourceRange Range;
+public:
+  llvm::APSInt Val;
+
+  // Default ctor - Construct an 'invalid' PPValue.
+  PPValue(unsigned BitWidth) : Val(BitWidth) {}
+
+  unsigned getBitWidth() const { return Val.getBitWidth(); }
+  bool isUnsigned() const { return Val.isUnsigned(); }
+
+  const SourceRange &getRange() const { return Range; }
+
+  void setRange(SourceLocation L) { Range.setBegin(L); Range.setEnd(L); }
+  void setRange(SourceLocation B, SourceLocation E) {
+    Range.setBegin(B); Range.setEnd(E);
+  }
+  void setBegin(SourceLocation L) { Range.setBegin(L); }
+  void setEnd(SourceLocation L) { Range.setEnd(L); }
+};
+
+static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
+                                     Token &PeekTok, bool ValueLive,
+                                     Preprocessor &PP);
+
+/// DefinedTracker - This struct is used while parsing expressions to keep track
+/// of whether !defined(X) has been seen.
+///
+/// With this simple scheme, we handle the basic forms:
+///    !defined(X)   and !defined X
+/// but we also trivially handle (silly) stuff like:
+///    !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)).
+struct DefinedTracker {
+  /// Each time a Value is evaluated, it returns information about whether the
+  /// parsed value is of the form defined(X), !defined(X) or is something else.
+  enum TrackerState {
+    DefinedMacro,        // defined(X)
+    NotDefinedMacro,     // !defined(X)
+    Unknown              // Something else.
+  } State;
+  /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this
+  /// indicates the macro that was checked.
+  IdentifierInfo *TheMacro;
+};
+
+/// EvaluateDefined - Process a 'defined(sym)' expression.
+static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
+                            bool ValueLive, Preprocessor &PP) {
+  IdentifierInfo *II;
+  Result.setBegin(PeekTok.getLocation());
+
+  // Get the next token, don't expand it.
+  PP.LexUnexpandedToken(PeekTok);
+
+  // Two options, it can either be a pp-identifier or a (.
+  SourceLocation LParenLoc;
+  if (PeekTok.is(tok::l_paren)) {
+    // Found a paren, remember we saw it and skip it.
+    LParenLoc = PeekTok.getLocation();
+    PP.LexUnexpandedToken(PeekTok);
+  }
+
+  // If we don't have a pp-identifier now, this is an error.
+  if ((II = PeekTok.getIdentifierInfo()) == 0) {
+    PP.Diag(PeekTok, diag::err_pp_defined_requires_identifier);
+    return true;
+  }
+
+  // Otherwise, we got an identifier, is it defined to something?
+  Result.Val = II->hasMacroDefinition();
+  Result.Val.setIsUnsigned(false);  // Result is signed intmax_t.
+
+  // If there is a macro, mark it used.
+  if (Result.Val != 0 && ValueLive) {
+    MacroInfo *Macro = PP.getMacroInfo(II);
+    Macro->setIsUsed(true);
+  }
+
+  // Consume identifier.
+  Result.setEnd(PeekTok.getLocation());
+  PP.LexNonComment(PeekTok);
+
+  // If we are in parens, ensure we have a trailing ).
+  if (LParenLoc.isValid()) {
+    if (PeekTok.isNot(tok::r_paren)) {
+      PP.Diag(PeekTok.getLocation(), diag::err_pp_missing_rparen) << "defined";
+      PP.Diag(LParenLoc, diag::note_matching) << "(";
+      return true;
+    }
+    // Consume the ).
+    Result.setEnd(PeekTok.getLocation());
+    PP.LexNonComment(PeekTok);
+  }
+
+  // Success, remember that we saw defined(X).
+  DT.State = DefinedTracker::DefinedMacro;
+  DT.TheMacro = II;
+  return false;
+}
+
+/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and
+/// return the computed value in Result.  Return true if there was an error
+/// parsing.  This function also returns information about the form of the
+/// expression in DT.  See above for information on what DT means.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used.  As such, avoid diagnostics that relate to
+/// evaluation.
+static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
+                          bool ValueLive, Preprocessor &PP) {
+  DT.State = DefinedTracker::Unknown;
+
+  // If this token's spelling is a pp-identifier, check to see if it is
+  // 'defined' or if it is a macro.  Note that we check here because many
+  // keywords are pp-identifiers, so we can't check the kind.
+  if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
+    // Handle "defined X" and "defined(X)".
+    if (II->isStr("defined"))
+      return(EvaluateDefined(Result, PeekTok, DT, ValueLive, PP));
+    
+    // If this identifier isn't 'defined' or one of the special
+    // preprocessor keywords and it wasn't macro expanded, it turns
+    // into a simple 0, unless it is the C++ keyword "true", in which case it
+    // turns into "1".
+    if (ValueLive)
+      PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II;
+    Result.Val = II->getTokenID() == tok::kw_true;
+    Result.Val.setIsUnsigned(false);  // "0" is signed intmax_t 0.
+    Result.setRange(PeekTok.getLocation());
+    PP.LexNonComment(PeekTok);
+    return false;
+  }
+
+  switch (PeekTok.getKind()) {
+  default:  // Non-value token.
+    PP.Diag(PeekTok, diag::err_pp_expr_bad_token_start_expr);
+    return true;
+  case tok::eom:
+  case tok::r_paren:
+    // If there is no expression, report and exit.
+    PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr);
+    return true;
+  case tok::numeric_constant: {
+    llvm::SmallString<64> IntegerBuffer;
+    IntegerBuffer.resize(PeekTok.getLength());
+    const char *ThisTokBegin = &IntegerBuffer[0];
+    unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
+    NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength,
+                                 PeekTok.getLocation(), PP);
+    if (Literal.hadError)
+      return true; // a diagnostic was already reported.
+
+    if (Literal.isFloatingLiteral() || Literal.isImaginary) {
+      PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal);
+      return true;
+    }
+    assert(Literal.isIntegerLiteral() && "Unknown ppnumber");
+
+    // long long is a C99 feature.
+    if (!PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus0x
+        && Literal.isLongLong)
+      PP.Diag(PeekTok, diag::ext_longlong);
+
+    // Parse the integer literal into Result.
+    if (Literal.GetIntegerValue(Result.Val)) {
+      // Overflow parsing integer literal.
+      if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large);
+      Result.Val.setIsUnsigned(true);
+    } else {
+      // Set the signedness of the result to match whether there was a U suffix
+      // or not.
+      Result.Val.setIsUnsigned(Literal.isUnsigned);
+
+      // Detect overflow based on whether the value is signed.  If signed
+      // and if the value is too large, emit a warning "integer constant is so
+      // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t
+      // is 64-bits.
+      if (!Literal.isUnsigned && Result.Val.isNegative()) {
+        // Don't warn for a hex literal: 0x8000..0 shouldn't warn.
+        if (ValueLive && Literal.getRadix() != 16)
+          PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed);
+        Result.Val.setIsUnsigned(true);
+      }
+    }
+
+    // Consume the token.
+    Result.setRange(PeekTok.getLocation());
+    PP.LexNonComment(PeekTok);
+    return false;
+  }
+  case tok::char_constant: {   // 'x'
+    llvm::SmallString<32> CharBuffer;
+    CharBuffer.resize(PeekTok.getLength());
+    const char *ThisTokBegin = &CharBuffer[0];
+    unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin);
+    CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength,
+                              PeekTok.getLocation(), PP);
+    if (Literal.hadError())
+      return true;  // A diagnostic was already emitted.
+
+    // Character literals are always int or wchar_t, expand to intmax_t.
+    const TargetInfo &TI = PP.getTargetInfo();
+    unsigned NumBits;
+    if (Literal.isMultiChar())
+      NumBits = TI.getIntWidth();
+    else if (Literal.isWide())
+      NumBits = TI.getWCharWidth();
+    else
+      NumBits = TI.getCharWidth();
+
+    // Set the width.
+    llvm::APSInt Val(NumBits);
+    // Set the value.
+    Val = Literal.getValue();
+    // Set the signedness.
+    Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
+
+    if (Result.Val.getBitWidth() > Val.getBitWidth()) {
+      Result.Val = Val.extend(Result.Val.getBitWidth());
+    } else {
+      assert(Result.Val.getBitWidth() == Val.getBitWidth() &&
+             "intmax_t smaller than char/wchar_t?");
+      Result.Val = Val;
+    }
+
+    // Consume the token.
+    Result.setRange(PeekTok.getLocation());
+    PP.LexNonComment(PeekTok);
+    return false;
+  }
+  case tok::l_paren: {
+    SourceLocation Start = PeekTok.getLocation();
+    PP.LexNonComment(PeekTok);  // Eat the (.
+    // Parse the value and if there are any binary operators involved, parse
+    // them.
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+
+    // If this is a silly value like (X), which doesn't need parens, check for
+    // !(defined X).
+    if (PeekTok.is(tok::r_paren)) {
+      // Just use DT unmodified as our result.
+    } else {
+      // Otherwise, we have something like (x+y), and we consumed '(x'.
+      if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP))
+        return true;
+
+      if (PeekTok.isNot(tok::r_paren)) {
+        PP.Diag(PeekTok.getLocation(), diag::err_pp_expected_rparen)
+          << Result.getRange();
+        PP.Diag(Start, diag::note_matching) << "(";
+        return true;
+      }
+      DT.State = DefinedTracker::Unknown;
+    }
+    Result.setRange(Start, PeekTok.getLocation());
+    PP.LexNonComment(PeekTok);  // Eat the ).
+    return false;
+  }
+  case tok::plus: {
+    SourceLocation Start = PeekTok.getLocation();
+    // Unary plus doesn't modify the value.
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    Result.setBegin(Start);
+    return false;
+  }
+  case tok::minus: {
+    SourceLocation Loc = PeekTok.getLocation();
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    Result.setBegin(Loc);
+
+    // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
+    Result.Val = -Result.Val;
+
+    // -MININT is the only thing that overflows.  Unsigned never overflows.
+    bool Overflow = !Result.isUnsigned() && Result.Val.isMinSignedValue();
+
+    // If this operator is live and overflowed, report the issue.
+    if (Overflow && ValueLive)
+      PP.Diag(Loc, diag::warn_pp_expr_overflow) << Result.getRange();
+
+    DT.State = DefinedTracker::Unknown;
+    return false;
+  }
+
+  case tok::tilde: {
+    SourceLocation Start = PeekTok.getLocation();
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    Result.setBegin(Start);
+
+    // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
+    Result.Val = ~Result.Val;
+    DT.State = DefinedTracker::Unknown;
+    return false;
+  }
+
+  case tok::exclaim: {
+    SourceLocation Start = PeekTok.getLocation();
+    PP.LexNonComment(PeekTok);
+    if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
+    Result.setBegin(Start);
+    Result.Val = !Result.Val;
+    // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
+    Result.Val.setIsUnsigned(false);
+
+    if (DT.State == DefinedTracker::DefinedMacro)
+      DT.State = DefinedTracker::NotDefinedMacro;
+    else if (DT.State == DefinedTracker::NotDefinedMacro)
+      DT.State = DefinedTracker::DefinedMacro;
+    return false;
+  }
+
+  // FIXME: Handle #assert
+  }
+}
+
+
+
+/// getPrecedence - Return the precedence of the specified binary operator
+/// token.  This returns:
+///   ~0 - Invalid token.
+///   14 -> 3 - various operators.
+///    0 - 'eom' or ')'
+static unsigned getPrecedence(tok::TokenKind Kind) {
+  switch (Kind) {
+  default: return ~0U;
+  case tok::percent:
+  case tok::slash:
+  case tok::star:                 return 14;
+  case tok::plus:
+  case tok::minus:                return 13;
+  case tok::lessless:
+  case tok::greatergreater:       return 12;
+  case tok::lessequal:
+  case tok::less:
+  case tok::greaterequal:
+  case tok::greater:              return 11;
+  case tok::exclaimequal:
+  case tok::equalequal:           return 10;
+  case tok::amp:                  return 9;
+  case tok::caret:                return 8;
+  case tok::pipe:                 return 7;
+  case tok::ampamp:               return 6;
+  case tok::pipepipe:             return 5;
+  case tok::question:             return 4;
+  case tok::comma:                return 3;
+  case tok::colon:                return 2;
+  case tok::r_paren:              return 0;   // Lowest priority, end of expr.
+  case tok::eom:                  return 0;   // Lowest priority, end of macro.
+  }
+}
+
+
+/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
+/// PeekTok, and whose precedence is PeekPrec.  This returns the result in LHS.
+///
+/// If ValueLive is false, then this value is being evaluated in a context where
+/// the result is not used.  As such, avoid diagnostics that relate to
+/// evaluation, such as division by zero warnings.
+static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
+                                     Token &PeekTok, bool ValueLive,
+                                     Preprocessor &PP) {
+  unsigned PeekPrec = getPrecedence(PeekTok.getKind());
+  // If this token isn't valid, report the error.
+  if (PeekPrec == ~0U) {
+    PP.Diag(PeekTok.getLocation(), diag::err_pp_expr_bad_token_binop)
+      << LHS.getRange();
+    return true;
+  }
+
+  while (1) {
+    // If this token has a lower precedence than we are allowed to parse, return
+    // it so that higher levels of the recursion can parse it.
+    if (PeekPrec < MinPrec)
+      return false;
+
+    tok::TokenKind Operator = PeekTok.getKind();
+
+    // If this is a short-circuiting operator, see if the RHS of the operator is
+    // dead.  Note that this cannot just clobber ValueLive.  Consider
+    // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)".  In
+    // this example, the RHS of the && being dead does not make the rest of the
+    // expr dead.
+    bool RHSIsLive;
+    if (Operator == tok::ampamp && LHS.Val == 0)
+      RHSIsLive = false;   // RHS of "0 && x" is dead.
+    else if (Operator == tok::pipepipe && LHS.Val != 0)
+      RHSIsLive = false;   // RHS of "1 || x" is dead.
+    else if (Operator == tok::question && LHS.Val == 0)
+      RHSIsLive = false;   // RHS (x) of "0 ? x : y" is dead.
+    else
+      RHSIsLive = ValueLive;
+
+    // Consume the operator, remembering the operator's location for reporting.
+    SourceLocation OpLoc = PeekTok.getLocation();
+    PP.LexNonComment(PeekTok);
+
+    PPValue RHS(LHS.getBitWidth());
+    // Parse the RHS of the operator.
+    DefinedTracker DT;
+    if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true;
+
+    // Remember the precedence of this operator and get the precedence of the
+    // operator immediately to the right of the RHS.
+    unsigned ThisPrec = PeekPrec;
+    PeekPrec = getPrecedence(PeekTok.getKind());
+
+    // If this token isn't valid, report the error.
+    if (PeekPrec == ~0U) {
+      PP.Diag(PeekTok.getLocation(), diag::err_pp_expr_bad_token_binop)
+        << RHS.getRange();
+      return true;
+    }
+
+    // Decide whether to include the next binop in this subexpression.  For
+    // example, when parsing x+y*z and looking at '*', we want to recursively
+    // handle y*z as a single subexpression.  We do this because the precedence
+    // of * is higher than that of +.  The only strange case we have to handle
+    // here is for the ?: operator, where the precedence is actually lower than
+    // the LHS of the '?'.  The grammar rule is:
+    //
+    // conditional-expression ::=
+    //    logical-OR-expression ? expression : conditional-expression
+    // where 'expression' is actually comma-expression.
+    unsigned RHSPrec;
+    if (Operator == tok::question)
+      // The RHS of "?" should be maximally consumed as an expression.
+      RHSPrec = getPrecedence(tok::comma);
+    else  // All others should munch while higher precedence.
+      RHSPrec = ThisPrec+1;
+
+    if (PeekPrec >= RHSPrec) {
+      if (EvaluateDirectiveSubExpr(RHS, RHSPrec, PeekTok, RHSIsLive, PP))
+        return true;
+      PeekPrec = getPrecedence(PeekTok.getKind());
+    }
+    assert(PeekPrec <= ThisPrec && "Recursion didn't work!");
+
+    // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+    // either operand is unsigned.
+    llvm::APSInt Res(LHS.getBitWidth());
+    switch (Operator) {
+    case tok::question:       // No UAC for x and y in "x ? y : z".
+    case tok::lessless:       // Shift amount doesn't UAC with shift value.
+    case tok::greatergreater: // Shift amount doesn't UAC with shift value.
+    case tok::comma:          // Comma operands are not subject to UACs.
+    case tok::pipepipe:       // Logical || does not do UACs.
+    case tok::ampamp:         // Logical && does not do UACs.
+      break;                  // No UAC
+    default:
+      Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned());
+      // If this just promoted something from signed to unsigned, and if the
+      // value was negative, warn about it.
+      if (ValueLive && Res.isUnsigned()) {
+        if (!LHS.isUnsigned() && LHS.Val.isNegative())
+          PP.Diag(OpLoc, diag::warn_pp_convert_lhs_to_positive)
+            << LHS.Val.toString(10, true) + " to " +
+               LHS.Val.toString(10, false)
+            << LHS.getRange() << RHS.getRange();
+        if (!RHS.isUnsigned() && RHS.Val.isNegative())
+          PP.Diag(OpLoc, diag::warn_pp_convert_rhs_to_positive)
+            << RHS.Val.toString(10, true) + " to " +
+               RHS.Val.toString(10, false)
+            << LHS.getRange() << RHS.getRange();
+      }
+      LHS.Val.setIsUnsigned(Res.isUnsigned());
+      RHS.Val.setIsUnsigned(Res.isUnsigned());
+    }
+
+    // FIXME: All of these should detect and report overflow??
+    bool Overflow = false;
+    switch (Operator) {
+    default: assert(0 && "Unknown operator token!");
+    case tok::percent:
+      if (RHS.Val != 0)
+        Res = LHS.Val % RHS.Val;
+      else if (ValueLive) {
+        PP.Diag(OpLoc, diag::err_pp_remainder_by_zero)
+          << LHS.getRange() << RHS.getRange();
+        return true;
+      }
+      break;
+    case tok::slash:
+      if (RHS.Val != 0) {
+        Res = LHS.Val / RHS.Val;
+        if (LHS.Val.isSigned())   // MININT/-1  -->  overflow.
+          Overflow = LHS.Val.isMinSignedValue() && RHS.Val.isAllOnesValue();
+      } else if (ValueLive) {
+        PP.Diag(OpLoc, diag::err_pp_division_by_zero)
+          << LHS.getRange() << RHS.getRange();
+        return true;
+      }
+      break;
+
+    case tok::star:
+      Res = LHS.Val * RHS.Val;
+      if (Res.isSigned() && LHS.Val != 0 && RHS.Val != 0)
+        Overflow = Res/RHS.Val != LHS.Val || Res/LHS.Val != RHS.Val;
+      break;
+    case tok::lessless: {
+      // Determine whether overflow is about to happen.
+      unsigned ShAmt = static_cast<unsigned>(RHS.Val.getLimitedValue());
+      if (ShAmt >= LHS.Val.getBitWidth())
+        Overflow = true, ShAmt = LHS.Val.getBitWidth()-1;
+      else if (LHS.isUnsigned())
+        Overflow = false;
+      else if (LHS.Val.isNonNegative()) // Don't allow sign change.
+        Overflow = ShAmt >= LHS.Val.countLeadingZeros();
+      else
+        Overflow = ShAmt >= LHS.Val.countLeadingOnes();
+
+      Res = LHS.Val << ShAmt;
+      break;
+    }
+    case tok::greatergreater: {
+      // Determine whether overflow is about to happen.
+      unsigned ShAmt = static_cast<unsigned>(RHS.Val.getLimitedValue());
+      if (ShAmt >= LHS.getBitWidth())
+        Overflow = true, ShAmt = LHS.getBitWidth()-1;
+      Res = LHS.Val >> ShAmt;
+      break;
+    }
+    case tok::plus:
+      Res = LHS.Val + RHS.Val;
+      if (LHS.isUnsigned())
+        Overflow = false;
+      else if (LHS.Val.isNonNegative() == RHS.Val.isNonNegative() &&
+               Res.isNonNegative() != LHS.Val.isNonNegative())
+        Overflow = true;  // Overflow for signed addition.
+      break;
+    case tok::minus:
+      Res = LHS.Val - RHS.Val;
+      if (LHS.isUnsigned())
+        Overflow = false;
+      else if (LHS.Val.isNonNegative() != RHS.Val.isNonNegative() &&
+               Res.isNonNegative() != LHS.Val.isNonNegative())
+        Overflow = true;  // Overflow for signed subtraction.
+      break;
+    case tok::lessequal:
+      Res = LHS.Val <= RHS.Val;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::less:
+      Res = LHS.Val < RHS.Val;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::greaterequal:
+      Res = LHS.Val >= RHS.Val;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::greater:
+      Res = LHS.Val > RHS.Val;
+      Res.setIsUnsigned(false);  // C99 6.5.8p6, result is always int (signed)
+      break;
+    case tok::exclaimequal:
+      Res = LHS.Val != RHS.Val;
+      Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed)
+      break;
+    case tok::equalequal:
+      Res = LHS.Val == RHS.Val;
+      Res.setIsUnsigned(false);  // C99 6.5.9p3, result is always int (signed)
+      break;
+    case tok::amp:
+      Res = LHS.Val & RHS.Val;
+      break;
+    case tok::caret:
+      Res = LHS.Val ^ RHS.Val;
+      break;
+    case tok::pipe:
+      Res = LHS.Val | RHS.Val;
+      break;
+    case tok::ampamp:
+      Res = (LHS.Val != 0 && RHS.Val != 0);
+      Res.setIsUnsigned(false);  // C99 6.5.13p3, result is always int (signed)
+      break;
+    case tok::pipepipe:
+      Res = (LHS.Val != 0 || RHS.Val != 0);
+      Res.setIsUnsigned(false);  // C99 6.5.14p3, result is always int (signed)
+      break;
+    case tok::comma:
+      // Comma is invalid in pp expressions in c89/c++ mode, but is valid in C99
+      // if not being evaluated.
+      if (!PP.getLangOptions().C99 || ValueLive)
+        PP.Diag(OpLoc, diag::ext_pp_comma_expr)
+          << LHS.getRange() << RHS.getRange();
+      Res = RHS.Val; // LHS = LHS,RHS -> RHS.
+      break;
+    case tok::question: {
+      // Parse the : part of the expression.
+      if (PeekTok.isNot(tok::colon)) {
+        PP.Diag(PeekTok.getLocation(), diag::err_expected_colon)
+          << LHS.getRange(), RHS.getRange();
+        PP.Diag(OpLoc, diag::note_matching) << "?";
+        return true;
+      }
+      // Consume the :.
+      PP.LexNonComment(PeekTok);
+
+      // Evaluate the value after the :.
+      bool AfterColonLive = ValueLive && LHS.Val == 0;
+      PPValue AfterColonVal(LHS.getBitWidth());
+      DefinedTracker DT;
+      if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP))
+        return true;
+
+      // Parse anything after the : with the same precedence as ?.  We allow
+      // things of equal precedence because ?: is right associative.
+      if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec,
+                                   PeekTok, AfterColonLive, PP))
+        return true;
+
+      // Now that we have the condition, the LHS and the RHS of the :, evaluate.
+      Res = LHS.Val != 0 ? RHS.Val : AfterColonVal.Val;
+      RHS.setEnd(AfterColonVal.getRange().getEnd());
+
+      // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
+      // either operand is unsigned.
+      Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned());
+
+      // Figure out the precedence of the token after the : part.
+      PeekPrec = getPrecedence(PeekTok.getKind());
+      break;
+    }
+    case tok::colon:
+      // Don't allow :'s to float around without being part of ?: exprs.
+      PP.Diag(OpLoc, diag::err_pp_colon_without_question)
+        << LHS.getRange() << RHS.getRange();
+      return true;
+    }
+
+    // If this operator is live and overflowed, report the issue.
+    if (Overflow && ValueLive)
+      PP.Diag(OpLoc, diag::warn_pp_expr_overflow)
+        << LHS.getRange() << RHS.getRange();
+
+    // Put the result back into 'LHS' for our next iteration.
+    LHS.Val = Res;
+    LHS.setEnd(RHS.getRange().getEnd());
+  }
+
+  return false;
+}
+
+/// EvaluateDirectiveExpression - Evaluate an integer constant expression that
+/// may occur after a #if or #elif directive.  If the expression is equivalent
+/// to "!defined(X)" return X in IfNDefMacro.
+bool Preprocessor::
+EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+  // Save the current state of 'DisableMacroExpansion' and reset it to false. If
+  // 'DisableMacroExpansion' is true, then we must be in a macro argument list
+  // in which case a directive is undefined behavior.  We want macros to be able
+  // to recursively expand in order to get more gcc-list behavior, so we force
+  // DisableMacroExpansion to false and restore it when we're done parsing the
+  // expression.
+  bool DisableMacroExpansionAtStartOfDirective = DisableMacroExpansion;
+  DisableMacroExpansion = false;
+  
+  // Peek ahead one token.
+  Token Tok;
+  Lex(Tok);
+
+  // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t.
+  unsigned BitWidth = getTargetInfo().getIntMaxTWidth();
+
+  PPValue ResVal(BitWidth);
+  DefinedTracker DT;
+  if (EvaluateValue(ResVal, Tok, DT, true, *this)) {
+    // Parse error, skip the rest of the macro line.
+    if (Tok.isNot(tok::eom))
+      DiscardUntilEndOfDirective();
+    
+    // Restore 'DisableMacroExpansion'.
+    DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+    return false;
+  }
+
+  // If we are at the end of the expression after just parsing a value, there
+  // must be no (unparenthesized) binary operators involved, so we can exit
+  // directly.
+  if (Tok.is(tok::eom)) {
+    // If the expression we parsed was of the form !defined(macro), return the
+    // macro in IfNDefMacro.
+    if (DT.State == DefinedTracker::NotDefinedMacro)
+      IfNDefMacro = DT.TheMacro;
+
+    // Restore 'DisableMacroExpansion'.
+    DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+    return ResVal.Val != 0;
+  }
+
+  // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the
+  // operator and the stuff after it.
+  if (EvaluateDirectiveSubExpr(ResVal, getPrecedence(tok::question),
+                               Tok, true, *this)) {
+    // Parse error, skip the rest of the macro line.
+    if (Tok.isNot(tok::eom))
+      DiscardUntilEndOfDirective();
+    
+    // Restore 'DisableMacroExpansion'.
+    DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+    return false;
+  }
+
+  // If we aren't at the tok::eom token, something bad happened, like an extra
+  // ')' token.
+  if (Tok.isNot(tok::eom)) {
+    Diag(Tok, diag::err_pp_expected_eol);
+    DiscardUntilEndOfDirective();
+  }
+
+  // Restore 'DisableMacroExpansion'.
+  DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective;
+  return ResVal.Val != 0;
+}
+

diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
new file mode 100644
index 0000000..0b26ccb
--- /dev/null
+++ b/lib/Lex/PPLexerChange.cpp

@@ -0,0 +1,356 @@
+//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// current lexer stack.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+using namespace clang;
+
+PPCallbacks::~PPCallbacks() {}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Methods.
+//===----------------------------------------------------------------------===//
+
+/// isInPrimaryFile - Return true if we're in the top-level file, not in a
+/// #include.  This looks through macro expansions and active _Pragma lexers.
+bool Preprocessor::isInPrimaryFile() const {
+  if (IsFileLexer())
+    return IncludeMacroStack.empty();
+
+  // If there are any stacked lexers, we're in a #include.
+  assert(IsFileLexer(IncludeMacroStack[0]) &&
+         "Top level include stack isn't our primary lexer?");
+  for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i)
+    if (IsFileLexer(IncludeMacroStack[i]))
+      return false;
+  return true;
+}
+
+/// getCurrentLexer - Return the current file lexer being lexed from.  Note
+/// that this ignores any potentially active macro expansions and _Pragma
+/// expansions going on at the time.
+PreprocessorLexer *Preprocessor::getCurrentFileLexer() const {
+  if (IsFileLexer())
+    return CurPPLexer;
+
+  // Look for a stacked lexer.
+  for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
+    const IncludeStackInfo& ISI = IncludeMacroStack[i-1];
+    if (IsFileLexer(ISI))
+      return ISI.ThePPLexer;
+  }
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for Entering and Callbacks for leaving various contexts
+//===----------------------------------------------------------------------===//
+
+/// EnterSourceFile - Add a source file to the top of the include stack and
+/// start lexing tokens from it instead of the current buffer.
+bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
+                                   std::string &ErrorStr) {
+  assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!");
+  ++NumEnteredSourceFiles;
+
+  if (MaxIncludeStackDepth < IncludeMacroStack.size())
+    MaxIncludeStackDepth = IncludeMacroStack.size();
+
+  if (PTH) {
+    if (PTHLexer *PL = PTH->CreateLexer(FID)) {
+      EnterSourceFileWithPTH(PL, CurDir);
+      return false;
+    }
+  }
+  
+  // Get the MemoryBuffer for this FID, if it fails, we fail.
+  const llvm::MemoryBuffer *InputFile =
+    getSourceManager().getBuffer(FID, &ErrorStr);
+  if (!ErrorStr.empty())
+    return true;
+  
+  EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
+  return false;
+}
+
+/// EnterSourceFileWithLexer - Add a source file to the top of the include stack
+///  and start lexing tokens from it instead of the current buffer.
+void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
+                                            const DirectoryLookup *CurDir) {
+
+  // Add the current lexer to the include stack.
+  if (CurPPLexer || CurTokenLexer)
+    PushIncludeMacroStack();
+
+  CurLexer.reset(TheLexer);
+  CurPPLexer = TheLexer;
+  CurDirLookup = CurDir;
+
+  // Notify the client, if desired, that we are in a new source file.
+  if (Callbacks && !CurLexer->Is_PragmaLexer) {
+    SrcMgr::CharacteristicKind FileType =
+       SourceMgr.getFileCharacteristic(CurLexer->getFileLoc());
+
+    Callbacks->FileChanged(CurLexer->getFileLoc(),
+                           PPCallbacks::EnterFile, FileType);
+  }
+}
+
+/// EnterSourceFileWithPTH - Add a source file to the top of the include stack
+/// and start getting tokens from it using the PTH cache.
+void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
+                                          const DirectoryLookup *CurDir) {
+
+  if (CurPPLexer || CurTokenLexer)
+    PushIncludeMacroStack();
+
+  CurDirLookup = CurDir;
+  CurPTHLexer.reset(PL);
+  CurPPLexer = CurPTHLexer.get();
+
+  // Notify the client, if desired, that we are in a new source file.
+  if (Callbacks) {
+    FileID FID = CurPPLexer->getFileID();
+    SourceLocation EnterLoc = SourceMgr.getLocForStartOfFile(FID);
+    SrcMgr::CharacteristicKind FileType =
+      SourceMgr.getFileCharacteristic(EnterLoc);
+    Callbacks->FileChanged(EnterLoc, PPCallbacks::EnterFile, FileType);
+  }
+}
+
+/// EnterMacro - Add a Macro to the top of the include stack and start lexing
+/// tokens from it instead of the current buffer.
+void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
+                              MacroArgs *Args) {
+  PushIncludeMacroStack();
+  CurDirLookup = 0;
+
+  if (NumCachedTokenLexers == 0) {
+    CurTokenLexer.reset(new TokenLexer(Tok, ILEnd, Args, *this));
+  } else {
+    CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
+    CurTokenLexer->Init(Tok, ILEnd, Args);
+  }
+}
+
+/// EnterTokenStream - Add a "macro" context to the top of the include stack,
+/// which will cause the lexer to start returning the specified tokens.
+///
+/// If DisableMacroExpansion is true, tokens lexed from the token stream will
+/// not be subject to further macro expansion.  Otherwise, these tokens will
+/// be re-macro-expanded when/if expansion is enabled.
+///
+/// If OwnsTokens is false, this method assumes that the specified stream of
+/// tokens has a permanent owner somewhere, so they do not need to be copied.
+/// If it is true, it assumes the array of tokens is allocated with new[] and
+/// must be freed.
+///
+void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
+                                    bool DisableMacroExpansion,
+                                    bool OwnsTokens) {
+  // Save our current state.
+  PushIncludeMacroStack();
+  CurDirLookup = 0;
+
+  // Create a macro expander to expand from the specified token stream.
+  if (NumCachedTokenLexers == 0) {
+    CurTokenLexer.reset(new TokenLexer(Toks, NumToks, DisableMacroExpansion,
+                                       OwnsTokens, *this));
+  } else {
+    CurTokenLexer.reset(TokenLexerCache[--NumCachedTokenLexers]);
+    CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
+  }
+}
+
+/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
+/// the current file.  This either returns the EOF token or pops a level off
+/// the include stack and keeps going.
+bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
+  assert(!CurTokenLexer &&
+         "Ending a file when currently in a macro!");
+
+  // See if this file had a controlling macro.
+  if (CurPPLexer) {  // Not ending a macro, ignore it.
+    if (const IdentifierInfo *ControllingMacro =
+          CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
+      // Okay, this has a controlling macro, remember in HeaderFileInfo.
+      if (const FileEntry *FE =
+            SourceMgr.getFileEntryForID(CurPPLexer->getFileID()))
+        HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
+    }
+  }
+
+  // If this is a #include'd file, pop it off the include stack and continue
+  // lexing the #includer file.
+  if (!IncludeMacroStack.empty()) {
+    // We're done with the #included file.
+    RemoveTopOfLexerStack();
+
+    // Notify the client, if desired, that we are in a new source file.
+    if (Callbacks && !isEndOfMacro && CurPPLexer) {
+      SrcMgr::CharacteristicKind FileType =
+        SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());
+      Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
+                             PPCallbacks::ExitFile, FileType);
+    }
+
+    // Client should lex another token.
+    return false;
+  }
+
+  // If the file ends with a newline, form the EOF token on the newline itself,
+  // rather than "on the line following it", which doesn't exist.  This makes
+  // diagnostics relating to the end of file include the last file that the user
+  // actually typed, which is goodness.
+  if (CurLexer) {
+    const char *EndPos = CurLexer->BufferEnd;
+    if (EndPos != CurLexer->BufferStart &&
+        (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
+      --EndPos;
+
+      // Handle \n\r and \r\n:
+      if (EndPos != CurLexer->BufferStart &&
+          (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
+          EndPos[-1] != EndPos[0])
+        --EndPos;
+    }
+
+    Result.startToken();
+    CurLexer->BufferPtr = EndPos;
+    CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);
+
+    // We're done with the #included file.
+    CurLexer.reset();
+  } else {
+    assert(CurPTHLexer && "Got EOF but no current lexer set!");
+    CurPTHLexer->getEOF(Result);
+    CurPTHLexer.reset();
+  }
+
+  CurPPLexer = 0;
+
+  // This is the end of the top-level file.  If the diag::pp_macro_not_used
+  // diagnostic is enabled, look for macros that have not been used.
+  if (getDiagnostics().getDiagnosticLevel(diag::pp_macro_not_used) !=
+        Diagnostic::Ignored) {
+    for (macro_iterator I = macro_begin(false), E = macro_end(false); 
+         I != E; ++I)
+      if (!I->second->isUsed())
+        Diag(I->second->getDefinitionLoc(), diag::pp_macro_not_used);
+  }
+  return true;
+}
+
+/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
+/// hits the end of its token stream.
+bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
+  assert(CurTokenLexer && !CurPPLexer &&
+         "Ending a macro when currently in a #include file!");
+
+  // Delete or cache the now-dead macro expander.
+  if (NumCachedTokenLexers == TokenLexerCacheSize)
+    CurTokenLexer.reset();
+  else
+    TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer.take();
+
+  // Handle this like a #include file being popped off the stack.
+  return HandleEndOfFile(Result, true);
+}
+
+/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
+/// lexer stack.  This should only be used in situations where the current
+/// state of the top-of-stack lexer is unknown.
+void Preprocessor::RemoveTopOfLexerStack() {
+  assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
+
+  if (CurTokenLexer) {
+    // Delete or cache the now-dead macro expander.
+    if (NumCachedTokenLexers == TokenLexerCacheSize)
+      CurTokenLexer.reset();
+    else
+      TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer.take();
+  }
+
+  PopIncludeMacroStack();
+}
+
+/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
+/// comment (/##/) in microsoft mode, this method handles updating the current
+/// state, returning the token on the next source line.
+void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
+  assert(CurTokenLexer && !CurPPLexer &&
+         "Pasted comment can only be formed from macro");
+
+  // We handle this by scanning for the closest real lexer, switching it to
+  // raw mode and preprocessor mode.  This will cause it to return \n as an
+  // explicit EOM token.
+  PreprocessorLexer *FoundLexer = 0;
+  bool LexerWasInPPMode = false;
+  for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
+    IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1);
+    if (ISI.ThePPLexer == 0) continue;  // Scan for a real lexer.
+
+    // Once we find a real lexer, mark it as raw mode (disabling macro
+    // expansions) and preprocessor mode (return EOM).  We know that the lexer
+    // was *not* in raw mode before, because the macro that the comment came
+    // from was expanded.  However, it could have already been in preprocessor
+    // mode (#if COMMENT) in which case we have to return it to that mode and
+    // return EOM.
+    FoundLexer = ISI.ThePPLexer;
+    FoundLexer->LexingRawMode = true;
+    LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
+    FoundLexer->ParsingPreprocessorDirective = true;
+    break;
+  }
+
+  // Okay, we either found and switched over the lexer, or we didn't find a
+  // lexer.  In either case, finish off the macro the comment came from, getting
+  // the next token.
+  if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
+
+  // Discarding comments as long as we don't have EOF or EOM.  This 'comments
+  // out' the rest of the line, including any tokens that came from other macros
+  // that were active, as in:
+  //  #define submacro a COMMENT b
+  //    submacro c
+  // which should lex to 'a' only: 'b' and 'c' should be removed.
+  while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof))
+    Lex(Tok);
+
+  // If we got an eom token, then we successfully found the end of the line.
+  if (Tok.is(tok::eom)) {
+    assert(FoundLexer && "Can't get end of line without an active lexer");
+    // Restore the lexer back to normal mode instead of raw mode.
+    FoundLexer->LexingRawMode = false;
+
+    // If the lexer was already in preprocessor mode, just return the EOM token
+    // to finish the preprocessor line.
+    if (LexerWasInPPMode) return;
+
+    // Otherwise, switch out of PP mode and return the next lexed token.
+    FoundLexer->ParsingPreprocessorDirective = false;
+    return Lex(Tok);
+  }
+
+  // If we got an EOF token, then we reached the end of the token stream but
+  // didn't find an explicit \n.  This can only happen if there was no lexer
+  // active (an active lexer would return EOM at EOF if there was no \n in
+  // preprocessor directive mode), so just return EOF as our token.
+  assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode");
+}

diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
new file mode 100644
index 0000000..b97ab24
--- /dev/null
+++ b/lib/Lex/PPMacroExpansion.cpp

@@ -0,0 +1,791 @@
+//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top level handling of macro expasion for the
+// preprocessor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <ctime>
+using namespace clang;
+
+/// setMacroInfo - Specify a macro for this identifier.
+///
+void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
+  if (MI) {
+    Macros[II] = MI;
+    II->setHasMacroDefinition(true);
+  } else if (II->hasMacroDefinition()) {
+    Macros.erase(II);
+    II->setHasMacroDefinition(false);
+  }
+}
+
+/// RegisterBuiltinMacro - Register the specified identifier in the identifier
+/// table and mark it as a builtin macro to be expanded.
+static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
+  // Get the identifier.
+  IdentifierInfo *Id = PP.getIdentifierInfo(Name);
+
+  // Mark it as being a macro that is builtin.
+  MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
+  MI->setIsBuiltinMacro();
+  PP.setMacroInfo(Id, MI);
+  return Id;
+}
+
+
+/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
+/// identifier table.
+void Preprocessor::RegisterBuiltinMacros() {
+  Ident__LINE__ = RegisterBuiltinMacro(*this, "__LINE__");
+  Ident__FILE__ = RegisterBuiltinMacro(*this, "__FILE__");
+  Ident__DATE__ = RegisterBuiltinMacro(*this, "__DATE__");
+  Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__");
+  Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__");
+  Ident_Pragma  = RegisterBuiltinMacro(*this, "_Pragma");
+
+  // GCC Extensions.
+  Ident__BASE_FILE__     = RegisterBuiltinMacro(*this, "__BASE_FILE__");
+  Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro(*this, "__INCLUDE_LEVEL__");
+  Ident__TIMESTAMP__     = RegisterBuiltinMacro(*this, "__TIMESTAMP__");
+
+  // Clang Extensions.
+  Ident__has_feature      = RegisterBuiltinMacro(*this, "__has_feature");
+  Ident__has_builtin      = RegisterBuiltinMacro(*this, "__has_builtin");
+  Ident__has_include      = RegisterBuiltinMacro(*this, "__has_include");
+  Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next");
+}
+
+/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
+/// in its expansion, currently expands to that token literally.
+static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
+                                          const IdentifierInfo *MacroIdent,
+                                          Preprocessor &PP) {
+  IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
+
+  // If the token isn't an identifier, it's always literally expanded.
+  if (II == 0) return true;
+
+  // If the identifier is a macro, and if that macro is enabled, it may be
+  // expanded so it's not a trivial expansion.
+  if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
+      // Fast expanding "#define X X" is ok, because X would be disabled.
+      II != MacroIdent)
+    return false;
+
+  // If this is an object-like macro invocation, it is safe to trivially expand
+  // it.
+  if (MI->isObjectLike()) return true;
+
+  // If this is a function-like macro invocation, it's safe to trivially expand
+  // as long as the identifier is not a macro argument.
+  for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
+       I != E; ++I)
+    if (*I == II)
+      return false;   // Identifier is a macro argument.
+
+  return true;
+}
+
+
+/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
+/// lexed is a '('.  If so, consume the token and return true, if not, this
+/// method should have no observable side-effect on the lexed tokens.
+bool Preprocessor::isNextPPTokenLParen() {
+  // Do some quick tests for rejection cases.
+  unsigned Val;
+  if (CurLexer)
+    Val = CurLexer->isNextPPTokenLParen();
+  else if (CurPTHLexer)
+    Val = CurPTHLexer->isNextPPTokenLParen();
+  else
+    Val = CurTokenLexer->isNextTokenLParen();
+
+  if (Val == 2) {
+    // We have run off the end.  If it's a source file we don't
+    // examine enclosing ones (C99 5.1.1.2p4).  Otherwise walk up the
+    // macro stack.
+    if (CurPPLexer)
+      return false;
+    for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
+      IncludeStackInfo &Entry = IncludeMacroStack[i-1];
+      if (Entry.TheLexer)
+        Val = Entry.TheLexer->isNextPPTokenLParen();
+      else if (Entry.ThePTHLexer)
+        Val = Entry.ThePTHLexer->isNextPPTokenLParen();
+      else
+        Val = Entry.TheTokenLexer->isNextTokenLParen();
+
+      if (Val != 2)
+        break;
+
+      // Ran off the end of a source file?
+      if (Entry.ThePPLexer)
+        return false;
+    }
+  }
+
+  // Okay, if we know that the token is a '(', lex it and return.  Otherwise we
+  // have found something that isn't a '(' or we found the end of the
+  // translation unit.  In either case, return false.
+  return Val == 1;
+}
+
+/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
+/// expanded as a macro, handle it and return the next token as 'Identifier'.
+bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
+                                                 MacroInfo *MI) {
+  if (Callbacks) Callbacks->MacroExpands(Identifier, MI);
+
+  // If this is a macro expansion in the "#if !defined(x)" line for the file,
+  // then the macro could expand to different things in other contexts, we need
+  // to disable the optimization in this case.
+  if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro();
+
+  // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
+  if (MI->isBuiltinMacro()) {
+    ExpandBuiltinMacro(Identifier);
+    return false;
+  }
+
+  /// Args - If this is a function-like macro expansion, this contains,
+  /// for each macro argument, the list of tokens that were provided to the
+  /// invocation.
+  MacroArgs *Args = 0;
+
+  // Remember where the end of the instantiation occurred.  For an object-like
+  // macro, this is the identifier.  For a function-like macro, this is the ')'.
+  SourceLocation InstantiationEnd = Identifier.getLocation();
+
+  // If this is a function-like macro, read the arguments.
+  if (MI->isFunctionLike()) {
+    // C99 6.10.3p10: If the preprocessing token immediately after the the macro
+    // name isn't a '(', this macro should not be expanded.
+    if (!isNextPPTokenLParen())
+      return true;
+
+    // Remember that we are now parsing the arguments to a macro invocation.
+    // Preprocessor directives used inside macro arguments are not portable, and
+    // this enables the warning.
+    InMacroArgs = true;
+    Args = ReadFunctionLikeMacroArgs(Identifier, MI, InstantiationEnd);
+
+    // Finished parsing args.
+    InMacroArgs = false;
+
+    // If there was an error parsing the arguments, bail out.
+    if (Args == 0) return false;
+
+    ++NumFnMacroExpanded;
+  } else {
+    ++NumMacroExpanded;
+  }
+
+  // Notice that this macro has been used.
+  MI->setIsUsed(true);
+
+  // If we started lexing a macro, enter the macro expansion body.
+
+  // If this macro expands to no tokens, don't bother to push it onto the
+  // expansion stack, only to take it right back off.
+  if (MI->getNumTokens() == 0) {
+    // No need for arg info.
+    if (Args) Args->destroy(*this);
+
+    // Ignore this macro use, just return the next token in the current
+    // buffer.
+    bool HadLeadingSpace = Identifier.hasLeadingSpace();
+    bool IsAtStartOfLine = Identifier.isAtStartOfLine();
+
+    Lex(Identifier);
+
+    // If the identifier isn't on some OTHER line, inherit the leading
+    // whitespace/first-on-a-line property of this token.  This handles
+    // stuff like "! XX," -> "! ," and "   XX," -> "    ,", when XX is
+    // empty.
+    if (!Identifier.isAtStartOfLine()) {
+      if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
+      if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
+    }
+    ++NumFastMacroExpanded;
+    return false;
+
+  } else if (MI->getNumTokens() == 1 &&
+             isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
+                                           *this)) {
+    // Otherwise, if this macro expands into a single trivially-expanded
+    // token: expand it now.  This handles common cases like
+    // "#define VAL 42".
+
+    // No need for arg info.
+    if (Args) Args->destroy(*this);
+
+    // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
+    // identifier to the expanded token.
+    bool isAtStartOfLine = Identifier.isAtStartOfLine();
+    bool hasLeadingSpace = Identifier.hasLeadingSpace();
+
+    // Remember where the token is instantiated.
+    SourceLocation InstantiateLoc = Identifier.getLocation();
+
+    // Replace the result token.
+    Identifier = MI->getReplacementToken(0);
+
+    // Restore the StartOfLine/LeadingSpace markers.
+    Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
+    Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
+
+    // Update the tokens location to include both its instantiation and physical
+    // locations.
+    SourceLocation Loc =
+      SourceMgr.createInstantiationLoc(Identifier.getLocation(), InstantiateLoc,
+                                       InstantiationEnd,Identifier.getLength());
+    Identifier.setLocation(Loc);
+
+    // If this is #define X X, we must mark the result as unexpandible.
+    if (IdentifierInfo *NewII = Identifier.getIdentifierInfo())
+      if (getMacroInfo(NewII) == MI)
+        Identifier.setFlag(Token::DisableExpand);
+
+    // Since this is not an identifier token, it can't be macro expanded, so
+    // we're done.
+    ++NumFastMacroExpanded;
+    return false;
+  }
+
+  // Start expanding the macro.
+  EnterMacro(Identifier, InstantiationEnd, Args);
+
+  // Now that the macro is at the top of the include stack, ask the
+  // preprocessor to read the next token from it.
+  Lex(Identifier);
+  return false;
+}
+
+/// ReadFunctionLikeMacroArgs - After reading "MACRO" and knowing that the next
+/// token is the '(' of the macro, this method is invoked to read all of the
+/// actual arguments specified for the macro invocation.  This returns null on
+/// error.
+MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
+                                                   MacroInfo *MI,
+                                                   SourceLocation &MacroEnd) {
+  // The number of fixed arguments to parse.
+  unsigned NumFixedArgsLeft = MI->getNumArgs();
+  bool isVariadic = MI->isVariadic();
+
+  // Outer loop, while there are more arguments, keep reading them.
+  Token Tok;
+
+  // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
+  // an argument value in a macro could expand to ',' or '(' or ')'.
+  LexUnexpandedToken(Tok);
+  assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
+
+  // ArgTokens - Build up a list of tokens that make up each argument.  Each
+  // argument is separated by an EOF token.  Use a SmallVector so we can avoid
+  // heap allocations in the common case.
+  llvm::SmallVector<Token, 64> ArgTokens;
+
+  unsigned NumActuals = 0;
+  while (Tok.isNot(tok::r_paren)) {
+    assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) &&
+           "only expect argument separators here");
+
+    unsigned ArgTokenStart = ArgTokens.size();
+    SourceLocation ArgStartLoc = Tok.getLocation();
+
+    // C99 6.10.3p11: Keep track of the number of l_parens we have seen.  Note
+    // that we already consumed the first one.
+    unsigned NumParens = 0;
+
+    while (1) {
+      // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
+      // an argument value in a macro could expand to ',' or '(' or ')'.
+      LexUnexpandedToken(Tok);
+
+      if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n"
+        Diag(MacroName, diag::err_unterm_macro_invoc);
+        // Do not lose the EOF/EOM.  Return it to the client.
+        MacroName = Tok;
+        return 0;
+      } else if (Tok.is(tok::r_paren)) {
+        // If we found the ) token, the macro arg list is done.
+        if (NumParens-- == 0) {
+          MacroEnd = Tok.getLocation();
+          break;
+        }
+      } else if (Tok.is(tok::l_paren)) {
+        ++NumParens;
+      } else if (Tok.is(tok::comma) && NumParens == 0) {
+        // Comma ends this argument if there are more fixed arguments expected.
+        // However, if this is a variadic macro, and this is part of the
+        // variadic part, then the comma is just an argument token.
+        if (!isVariadic) break;
+        if (NumFixedArgsLeft > 1)
+          break;
+      } else if (Tok.is(tok::comment) && !KeepMacroComments) {
+        // If this is a comment token in the argument list and we're just in
+        // -C mode (not -CC mode), discard the comment.
+        continue;
+      } else if (Tok.getIdentifierInfo() != 0) {
+        // Reading macro arguments can cause macros that we are currently
+        // expanding from to be popped off the expansion stack.  Doing so causes
+        // them to be reenabled for expansion.  Here we record whether any
+        // identifiers we lex as macro arguments correspond to disabled macros.
+        // If so, we mark the token as noexpand.  This is a subtle aspect of
+        // C99 6.10.3.4p2.
+        if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
+          if (!MI->isEnabled())
+            Tok.setFlag(Token::DisableExpand);
+      }
+      ArgTokens.push_back(Tok);
+    }
+
+    // If this was an empty argument list foo(), don't add this as an empty
+    // argument.
+    if (ArgTokens.empty() && Tok.getKind() == tok::r_paren)
+      break;
+
+    // If this is not a variadic macro, and too many args were specified, emit
+    // an error.
+    if (!isVariadic && NumFixedArgsLeft == 0) {
+      if (ArgTokens.size() != ArgTokenStart)
+        ArgStartLoc = ArgTokens[ArgTokenStart].getLocation();
+
+      // Emit the diagnostic at the macro name in case there is a missing ).
+      // Emitting it at the , could be far away from the macro name.
+      Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
+      return 0;
+    }
+
+    // Empty arguments are standard in C99 and supported as an extension in
+    // other modes.
+    if (ArgTokens.size() == ArgTokenStart && !Features.C99)
+      Diag(Tok, diag::ext_empty_fnmacro_arg);
+
+    // Add a marker EOF token to the end of the token list for this argument.
+    Token EOFTok;
+    EOFTok.startToken();
+    EOFTok.setKind(tok::eof);
+    EOFTok.setLocation(Tok.getLocation());
+    EOFTok.setLength(0);
+    ArgTokens.push_back(EOFTok);
+    ++NumActuals;
+    assert(NumFixedArgsLeft != 0 && "Too many arguments parsed");
+    --NumFixedArgsLeft;
+  }
+
+  // Okay, we either found the r_paren.  Check to see if we parsed too few
+  // arguments.
+  unsigned MinArgsExpected = MI->getNumArgs();
+
+  // See MacroArgs instance var for description of this.
+  bool isVarargsElided = false;
+
+  if (NumActuals < MinArgsExpected) {
+    // There are several cases where too few arguments is ok, handle them now.
+    if (NumActuals == 0 && MinArgsExpected == 1) {
+      // #define A(X)  or  #define A(...)   ---> A()
+
+      // If there is exactly one argument, and that argument is missing,
+      // then we have an empty "()" argument empty list.  This is fine, even if
+      // the macro expects one argument (the argument is just empty).
+      isVarargsElided = MI->isVariadic();
+    } else if (MI->isVariadic() &&
+               (NumActuals+1 == MinArgsExpected ||  // A(x, ...) -> A(X)
+                (NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A()
+      // Varargs where the named vararg parameter is missing: ok as extension.
+      // #define A(x, ...)
+      // A("blah")
+      Diag(Tok, diag::ext_missing_varargs_arg);
+
+      // Remember this occurred, allowing us to elide the comma when used for
+      // cases like:
+      //   #define A(x, foo...) blah(a, ## foo)
+      //   #define B(x, ...) blah(a, ## __VA_ARGS__)
+      //   #define C(...) blah(a, ## __VA_ARGS__)
+      //  A(x) B(x) C()
+      isVarargsElided = true;
+    } else {
+      // Otherwise, emit the error.
+      Diag(Tok, diag::err_too_few_args_in_macro_invoc);
+      return 0;
+    }
+
+    // Add a marker EOF token to the end of the token list for this argument.
+    SourceLocation EndLoc = Tok.getLocation();
+    Tok.startToken();
+    Tok.setKind(tok::eof);
+    Tok.setLocation(EndLoc);
+    Tok.setLength(0);
+    ArgTokens.push_back(Tok);
+
+    // If we expect two arguments, add both as empty.
+    if (NumActuals == 0 && MinArgsExpected == 2)
+      ArgTokens.push_back(Tok);
+
+  } else if (NumActuals > MinArgsExpected && !MI->isVariadic()) {
+    // Emit the diagnostic at the macro name in case there is a missing ).
+    // Emitting it at the , could be far away from the macro name.
+    Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+    return 0;
+  }
+
+  return MacroArgs::create(MI, ArgTokens.data(), ArgTokens.size(),
+                           isVarargsElided, *this);
+}
+
+/// ComputeDATE_TIME - Compute the current time, enter it into the specified
+/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
+/// the identifier tokens inserted.
+static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
+                             Preprocessor &PP) {
+  time_t TT = time(0);
+  struct tm *TM = localtime(&TT);
+
+  static const char * const Months[] = {
+    "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
+  };
+
+  char TmpBuffer[100];
+  sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
+          TM->tm_year+1900);
+
+  Token TmpTok;
+  TmpTok.startToken();
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  DATELoc = TmpTok.getLocation();
+
+  sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  TIMELoc = TmpTok.getLocation();
+}
+
+
+/// HasFeature - Return true if we recognize and implement the specified feature
+/// specified by the identifier.
+static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
+  const LangOptions &LangOpts = PP.getLangOptions();
+
+  return llvm::StringSwitch<bool>(II->getName())
+           .Case("blocks", LangOpts.Blocks)
+           .Case("cxx_rtti", LangOpts.RTTI)
+         //.Case("cxx_lambdas", false)
+         //.Case("cxx_nullptr", false)
+         //.Case("cxx_concepts", false)
+           .Case("cxx_decltype", LangOpts.CPlusPlus0x)
+           .Case("cxx_auto_type", LangOpts.CPlusPlus0x)
+           .Case("cxx_exceptions", LangOpts.Exceptions)
+           .Case("cxx_attributes", LangOpts.CPlusPlus0x)
+           .Case("cxx_static_assert", LangOpts.CPlusPlus0x)
+           .Case("objc_nonfragile_abi", LangOpts.ObjCNonFragileABI)
+           .Case("cxx_deleted_functions", LangOpts.CPlusPlus0x)
+         //.Case("cxx_rvalue_references", false)
+           .Case("attribute_overloadable", true)
+         //.Case("cxx_variadic_templates", false)
+           .Case("attribute_ext_vector_type", true)
+           .Case("attribute_analyzer_noreturn", true)
+           .Case("attribute_ns_returns_retained", true)
+           .Case("attribute_cf_returns_retained", true)
+           .Default(false);
+}
+
+/// EvaluateHasIncludeCommon - Process a '__has_include("path")'
+/// or '__has_include_next("path")' expression.
+/// Returns true if successful.
+static bool EvaluateHasIncludeCommon(bool &Result, Token &Tok,
+        IdentifierInfo *II, Preprocessor &PP,
+        const DirectoryLookup *LookupFrom) {
+  SourceLocation LParenLoc;
+
+  // Get '('.
+  PP.LexNonComment(Tok);
+
+  // Ensure we have a '('.
+  if (Tok.isNot(tok::l_paren)) {
+    PP.Diag(Tok.getLocation(), diag::err_pp_missing_lparen) << II->getName();
+    return false;
+  }
+
+  // Save '(' location for possible missing ')' message.
+  LParenLoc = Tok.getLocation();
+
+  // Get the file name.
+  PP.getCurrentLexer()->LexIncludeFilename(Tok);
+
+  // Reserve a buffer to get the spelling.
+  llvm::SmallString<128> FilenameBuffer;
+  llvm::StringRef Filename;
+
+  switch (Tok.getKind()) {
+  case tok::eom:
+    // If the token kind is EOM, the error has already been diagnosed.
+    return false;
+
+  case tok::angle_string_literal:
+  case tok::string_literal: {
+    FilenameBuffer.resize(Tok.getLength());
+    const char *FilenameStart = &FilenameBuffer[0];
+    unsigned Len = PP.getSpelling(Tok, FilenameStart);
+    Filename = llvm::StringRef(FilenameStart, Len);
+    break;
+  }
+
+  case tok::less:
+    // This could be a <foo/bar.h> file coming from a macro expansion.  In this
+    // case, glue the tokens together into FilenameBuffer and interpret those.
+    FilenameBuffer.push_back('<');
+    if (PP.ConcatenateIncludeName(FilenameBuffer))
+      return false;   // Found <eom> but no ">"?  Diagnostic already emitted.
+    Filename = FilenameBuffer.str();
+    break;
+  default:
+    PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
+    return false;
+  }
+
+  bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  if (Filename.empty())
+    return false;
+
+  // Search include directories.
+  const DirectoryLookup *CurDir;
+  const FileEntry *File = PP.LookupFile(Filename, isAngled, LookupFrom, CurDir);
+
+  // Get the result value.  Result = true means the file exists.
+  Result = File != 0;
+
+  // Get ')'.
+  PP.LexNonComment(Tok);
+
+  // Ensure we have a trailing ).
+  if (Tok.isNot(tok::r_paren)) {
+    PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName();
+    PP.Diag(LParenLoc, diag::note_matching) << "(";
+    return false;
+  }
+
+  return true;
+}
+
+/// EvaluateHasInclude - Process a '__has_include("path")' expression.
+/// Returns true if successful.
+static bool EvaluateHasInclude(bool &Result, Token &Tok, IdentifierInfo *II,
+                               Preprocessor &PP) {
+  return(EvaluateHasIncludeCommon(Result, Tok, II, PP, NULL));
+}
+
+/// EvaluateHasIncludeNext - Process '__has_include_next("path")' expression.
+/// Returns true if successful.
+static bool EvaluateHasIncludeNext(bool &Result, Token &Tok,
+                                   IdentifierInfo *II, Preprocessor &PP) {
+  // __has_include_next is like __has_include, except that we start
+  // searching after the current found directory.  If we can't do this,
+  // issue a diagnostic.
+  const DirectoryLookup *Lookup = PP.GetCurDirLookup();
+  if (PP.isInPrimaryFile()) {
+    Lookup = 0;
+    PP.Diag(Tok, diag::pp_include_next_in_primary);
+  } else if (Lookup == 0) {
+    PP.Diag(Tok, diag::pp_include_next_absolute_path);
+  } else {
+    // Start looking up in the next directory.
+    ++Lookup;
+  }
+
+  return(EvaluateHasIncludeCommon(Result, Tok, II, PP, Lookup));
+}
+
+/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
+/// as a builtin macro, handle it and return the next token as 'Tok'.
+void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
+  // Figure out which token this is.
+  IdentifierInfo *II = Tok.getIdentifierInfo();
+  assert(II && "Can't be a macro without id info!");
+
+  // If this is an _Pragma directive, expand it, invoke the pragma handler, then
+  // lex the token after it.
+  if (II == Ident_Pragma)
+    return Handle_Pragma(Tok);
+
+  ++NumBuiltinMacroExpanded;
+
+  llvm::SmallString<128> TmpBuffer;
+  llvm::raw_svector_ostream OS(TmpBuffer);
+
+  // Set up the return result.
+  Tok.setIdentifierInfo(0);
+  Tok.clearFlag(Token::NeedsCleaning);
+
+  if (II == Ident__LINE__) {
+    // C99 6.10.8: "__LINE__: The presumed line number (within the current
+    // source file) of the current source line (an integer constant)".  This can
+    // be affected by #line.
+    SourceLocation Loc = Tok.getLocation();
+
+    // Advance to the location of the first _, this might not be the first byte
+    // of the token if it starts with an escaped newline.
+    Loc = AdvanceToTokenCharacter(Loc, 0);
+
+    // One wrinkle here is that GCC expands __LINE__ to location of the *end* of
+    // a macro instantiation.  This doesn't matter for object-like macros, but
+    // can matter for a function-like macro that expands to contain __LINE__.
+    // Skip down through instantiation points until we find a file loc for the
+    // end of the instantiation history.
+    Loc = SourceMgr.getInstantiationRange(Loc).second;
+    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
+
+    // __LINE__ expands to a simple numeric value.
+    OS << PLoc.getLine();
+    Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
+    // C99 6.10.8: "__FILE__: The presumed name of the current source file (a
+    // character string literal)". This can be affected by #line.
+    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
+
+    // __BASE_FILE__ is a GNU extension that returns the top of the presumed
+    // #include stack instead of the current file.
+    if (II == Ident__BASE_FILE__) {
+      SourceLocation NextLoc = PLoc.getIncludeLoc();
+      while (NextLoc.isValid()) {
+        PLoc = SourceMgr.getPresumedLoc(NextLoc);
+        NextLoc = PLoc.getIncludeLoc();
+      }
+    }
+
+    // Escape this filename.  Turn '\' -> '\\' '"' -> '\"'
+    llvm::SmallString<128> FN;
+    FN += PLoc.getFilename();
+    Lexer::Stringify(FN);
+    OS << '"' << FN.str() << '"';
+    Tok.setKind(tok::string_literal);
+  } else if (II == Ident__DATE__) {
+    if (!DATELoc.isValid())
+      ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+    Tok.setKind(tok::string_literal);
+    Tok.setLength(strlen("\"Mmm dd yyyy\""));
+    Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(),
+                                                     Tok.getLocation(),
+                                                     Tok.getLength()));
+    return;
+  } else if (II == Ident__TIME__) {
+    if (!TIMELoc.isValid())
+      ComputeDATE_TIME(DATELoc, TIMELoc, *this);
+    Tok.setKind(tok::string_literal);
+    Tok.setLength(strlen("\"hh:mm:ss\""));
+    Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(),
+                                                     Tok.getLocation(),
+                                                     Tok.getLength()));
+    return;
+  } else if (II == Ident__INCLUDE_LEVEL__) {
+    // Compute the presumed include depth of this token.  This can be affected
+    // by GNU line markers.
+    unsigned Depth = 0;
+
+    PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
+    PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
+    for (; PLoc.isValid(); ++Depth)
+      PLoc = SourceMgr.getPresumedLoc(PLoc.getIncludeLoc());
+
+    // __INCLUDE_LEVEL__ expands to a simple numeric value.
+    OS << Depth;
+    Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__TIMESTAMP__) {
+    // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
+    // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
+
+    // Get the file that we are lexing out of.  If we're currently lexing from
+    // a macro, dig into the include stack.
+    const FileEntry *CurFile = 0;
+    PreprocessorLexer *TheLexer = getCurrentFileLexer();
+
+    if (TheLexer)
+      CurFile = SourceMgr.getFileEntryForID(TheLexer->getFileID());
+
+    const char *Result;
+    if (CurFile) {
+      time_t TT = CurFile->getModificationTime();
+      struct tm *TM = localtime(&TT);
+      Result = asctime(TM);
+    } else {
+      Result = "??? ??? ?? ??:??:?? ????\n";
+    }
+    // Surround the string with " and strip the trailing newline.
+    OS << '"' << llvm::StringRef(Result, strlen(Result)-1) << '"';
+    Tok.setKind(tok::string_literal);
+  } else if (II == Ident__COUNTER__) {
+    // __COUNTER__ expands to a simple numeric value.
+    OS << CounterValue++;
+    Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__has_feature ||
+             II == Ident__has_builtin) {
+    // The argument to these two builtins should be a parenthesized identifier.
+    SourceLocation StartLoc = Tok.getLocation();
+
+    bool IsValid = false;
+    IdentifierInfo *FeatureII = 0;
+
+    // Read the '('.
+    Lex(Tok);
+    if (Tok.is(tok::l_paren)) {
+      // Read the identifier
+      Lex(Tok);
+      if (Tok.is(tok::identifier)) {
+        FeatureII = Tok.getIdentifierInfo();
+
+        // Read the ')'.
+        Lex(Tok);
+        if (Tok.is(tok::r_paren))
+          IsValid = true;
+      }
+    }
+
+    bool Value = false;
+    if (!IsValid)
+      Diag(StartLoc, diag::err_feature_check_malformed);
+    else if (II == Ident__has_builtin) {
+      // Check for a builtin is trivial.
+      Value = FeatureII->getBuiltinID() != 0;
+    } else {
+      assert(II == Ident__has_feature && "Must be feature check");
+      Value = HasFeature(*this, FeatureII);
+    }
+
+    OS << (int)Value;
+    Tok.setKind(tok::numeric_constant);
+  } else if (II == Ident__has_include ||
+             II == Ident__has_include_next) {
+    // The argument to these two builtins should be a parenthesized
+    // file name string literal using angle brackets (<>) or
+    // double-quotes ("").
+    bool Value = false;
+    bool IsValid;
+    if (II == Ident__has_include)
+      IsValid = EvaluateHasInclude(Value, Tok, II, *this);
+    else
+      IsValid = EvaluateHasIncludeNext(Value, Tok, II, *this);
+    OS << (int)Value;
+    Tok.setKind(tok::numeric_constant);
+  } else {
+    assert(0 && "Unknown identifier!");
+  }
+  CreateString(OS.str().data(), OS.str().size(), Tok, Tok.getLocation());
+}

diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
new file mode 100644
index 0000000..a64008a
--- /dev/null
+++ b/lib/Lex/PTHLexer.cpp

@@ -0,0 +1,685 @@
+//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTHLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/OnDiskHashTable.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/PTHLexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PTHManager.h"
+#include "clang/Lex/Token.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <sys/stat.h>
+using namespace clang;
+using namespace clang::io;
+
+#define DISK_TOKEN_SIZE (1+1+2+4+4)
+
+//===----------------------------------------------------------------------===//
+// PTHLexer methods.
+//===----------------------------------------------------------------------===//
+
+PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
+                   const unsigned char *ppcond, PTHManager &PM)
+  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
+    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
+
+  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
+}
+
+void PTHLexer::Lex(Token& Tok) {
+LexNextToken:
+
+  //===--------------------------------------==//
+  // Read the raw token data.
+  //===--------------------------------------==//
+
+  // Shadow CurPtr into an automatic variable.
+  const unsigned char *CurPtrShadow = CurPtr;
+
+  // Read in the data for the token.
+  unsigned Word0 = ReadLE32(CurPtrShadow);
+  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
+  uint32_t FileOffset = ReadLE32(CurPtrShadow);
+
+  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
+  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
+  uint32_t Len = Word0 >> 16;
+
+  CurPtr = CurPtrShadow;
+
+  //===--------------------------------------==//
+  // Construct the token itself.
+  //===--------------------------------------==//
+
+  Tok.startToken();
+  Tok.setKind(TKind);
+  Tok.setFlag(TFlags);
+  assert(!LexingRawMode);
+  Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
+  Tok.setLength(Len);
+
+  // Handle identifiers.
+  if (Tok.isLiteral()) {
+    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
+  }
+  else if (IdentifierID) {
+    MIOpt.ReadToken();
+    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
+
+    Tok.setIdentifierInfo(II);
+
+    // Change the kind of this identifier to the appropriate token kind, e.g.
+    // turning "for" into a keyword.
+    Tok.setKind(II->getTokenID());
+
+    if (II->isHandleIdentifierCase())
+      PP->HandleIdentifier(Tok);
+    return;
+  }
+
+  //===--------------------------------------==//
+  // Process the token.
+  //===--------------------------------------==//
+  if (TKind == tok::eof) {
+    // Save the end-of-file token.
+    EofToken = Tok;
+
+    Preprocessor *PPCache = PP;
+
+    assert(!ParsingPreprocessorDirective);
+    assert(!LexingRawMode);
+
+    // FIXME: Issue diagnostics similar to Lexer.
+    if (PP->HandleEndOfFile(Tok, false))
+      return;
+
+    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
+    return PPCache->Lex(Tok);
+  }
+
+  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
+    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
+    assert(!LexingRawMode);
+    PP->HandleDirective(Tok);
+
+    if (PP->isCurrentLexer(this))
+      goto LexNextToken;
+
+    return PP->Lex(Tok);
+  }
+
+  if (TKind == tok::eom) {
+    assert(ParsingPreprocessorDirective);
+    ParsingPreprocessorDirective = false;
+    return;
+  }
+
+  MIOpt.ReadToken();
+}
+
+// FIXME: We can just grab the last token instead of storing a copy
+// into EofToken.
+void PTHLexer::getEOF(Token& Tok) {
+  assert(EofToken.is(tok::eof));
+  Tok = EofToken;
+}
+
+void PTHLexer::DiscardToEndOfLine() {
+  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+
+  // We assume that if the preprocessor wishes to discard to the end of
+  // the line that it also means to end the current preprocessor directive.
+  ParsingPreprocessorDirective = false;
+
+  // Skip tokens by only peeking at their token kind and the flags.
+  // We don't need to actually reconstruct full tokens from the token buffer.
+  // This saves some copies and it also reduces IdentifierInfo* lookup.
+  const unsigned char* p = CurPtr;
+  while (1) {
+    // Read the token kind.  Are we at the end of the file?
+    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
+    if (x == tok::eof) break;
+
+    // Read the token flags.  Are we at the start of the next line?
+    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
+    if (y & Token::StartOfLine) break;
+
+    // Skip to the next token.
+    p += DISK_TOKEN_SIZE;
+  }
+
+  CurPtr = p;
+}
+
+/// SkipBlock - Used by Preprocessor to skip the current conditional block.
+bool PTHLexer::SkipBlock() {
+  assert(CurPPCondPtr && "No cached PP conditional information.");
+  assert(LastHashTokPtr && "No known '#' token.");
+
+  const unsigned char* HashEntryI = 0;
+  uint32_t Offset;
+  uint32_t TableIdx;
+
+  do {
+    // Read the token offset from the side-table.
+    Offset = ReadLE32(CurPPCondPtr);
+
+    // Read the target table index from the side-table.
+    TableIdx = ReadLE32(CurPPCondPtr);
+
+    // Compute the actual memory address of the '#' token data for this entry.
+    HashEntryI = TokBuf + Offset;
+
+    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
+    //  contain nested blocks.  In the side-table we can jump over these
+    //  nested blocks instead of doing a linear search if the next "sibling"
+    //  entry is not at a location greater than LastHashTokPtr.
+    if (HashEntryI < LastHashTokPtr && TableIdx) {
+      // In the side-table we are still at an entry for a '#' token that
+      // is earlier than the last one we saw.  Check if the location we would
+      // stride gets us closer.
+      const unsigned char* NextPPCondPtr =
+        PPCond + TableIdx*(sizeof(uint32_t)*2);
+      assert(NextPPCondPtr >= CurPPCondPtr);
+      // Read where we should jump to.
+      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
+      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
+
+      if (HashEntryJ <= LastHashTokPtr) {
+        // Jump directly to the next entry in the side table.
+        HashEntryI = HashEntryJ;
+        Offset = TmpOffset;
+        TableIdx = ReadLE32(NextPPCondPtr);
+        CurPPCondPtr = NextPPCondPtr;
+      }
+    }
+  }
+  while (HashEntryI < LastHashTokPtr);
+  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
+  assert(TableIdx && "No jumping from #endifs.");
+
+  // Update our side-table iterator.
+  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
+  assert(NextPPCondPtr >= CurPPCondPtr);
+  CurPPCondPtr = NextPPCondPtr;
+
+  // Read where we should jump to.
+  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
+  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
+
+  // By construction NextIdx will be zero if this is a #endif.  This is useful
+  // to know to obviate lexing another token.
+  bool isEndif = NextIdx == 0;
+
+  // This case can occur when we see something like this:
+  //
+  //  #if ...
+  //   /* a comment or nothing */
+  //  #elif
+  //
+  // If we are skipping the first #if block it will be the case that CurPtr
+  // already points 'elif'.  Just return.
+
+  if (CurPtr > HashEntryI) {
+    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
+    // Did we reach a #endif?  If so, go ahead and consume that token as well.
+    if (isEndif)
+      CurPtr += DISK_TOKEN_SIZE*2;
+    else
+      LastHashTokPtr = HashEntryI;
+
+    return isEndif;
+  }
+
+  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
+  CurPtr = HashEntryI;
+
+  // Update the location of the last observed '#'.  This is useful if we
+  // are skipping multiple blocks.
+  LastHashTokPtr = CurPtr;
+
+  // Skip the '#' token.
+  assert(((tok::TokenKind)*CurPtr) == tok::hash);
+  CurPtr += DISK_TOKEN_SIZE;
+
+  // Did we reach a #endif?  If so, go ahead and consume that token as well.
+  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
+
+  return isEndif;
+}
+
+SourceLocation PTHLexer::getSourceLocation() {
+  // getSourceLocation is not on the hot path.  It is used to get the location
+  // of the next token when transitioning back to this lexer when done
+  // handling a #included file.  Just read the necessary data from the token
+  // data buffer to construct the SourceLocation object.
+  // NOTE: This is a virtual function; hence it is defined out-of-line.
+  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
+  uint32_t Offset = ReadLE32(OffsetPtr);
+  return FileStartLoc.getFileLocWithOffset(Offset);
+}
+
+//===----------------------------------------------------------------------===//
+// PTH file lookup: map from strings to file data.
+//===----------------------------------------------------------------------===//
+
+/// PTHFileLookup - This internal data structure is used by the PTHManager
+///  to map from FileEntry objects managed by FileManager to offsets within
+///  the PTH file.
+namespace {
+class PTHFileData {
+  const uint32_t TokenOff;
+  const uint32_t PPCondOff;
+public:
+  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
+    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
+
+  uint32_t getTokenOffset() const { return TokenOff; }
+  uint32_t getPPCondOffset() const { return PPCondOff; }
+};
+
+
+class PTHFileLookupCommonTrait {
+public:
+  typedef std::pair<unsigned char, const char*> internal_key_type;
+
+  static unsigned ComputeHash(internal_key_type x) {
+    return llvm::HashString(x.second);
+  }
+
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    unsigned keyLen = (unsigned) ReadUnalignedLE16(d);
+    unsigned dataLen = (unsigned) *(d++);
+    return std::make_pair(keyLen, dataLen);
+  }
+
+  static internal_key_type ReadKey(const unsigned char* d, unsigned) {
+    unsigned char k = *(d++); // Read the entry kind.
+    return std::make_pair(k, (const char*) d);
+  }
+};
+
+class PTHFileLookupTrait : public PTHFileLookupCommonTrait {
+public:
+  typedef const FileEntry* external_key_type;
+  typedef PTHFileData      data_type;
+
+  static internal_key_type GetInternalKey(const FileEntry* FE) {
+    return std::make_pair((unsigned char) 0x1, FE->getName());
+  }
+
+  static bool EqualKey(internal_key_type a, internal_key_type b) {
+    return a.first == b.first && strcmp(a.second, b.second) == 0;
+  }
+
+  static PTHFileData ReadData(const internal_key_type& k,
+                              const unsigned char* d, unsigned) {
+    assert(k.first == 0x1 && "Only file lookups can match!");
+    uint32_t x = ::ReadUnalignedLE32(d);
+    uint32_t y = ::ReadUnalignedLE32(d);
+    return PTHFileData(x, y);
+  }
+};
+
+class PTHStringLookupTrait {
+public:
+  typedef uint32_t
+          data_type;
+
+  typedef const std::pair<const char*, unsigned>
+          external_key_type;
+
+  typedef external_key_type internal_key_type;
+
+  static bool EqualKey(const internal_key_type& a,
+                       const internal_key_type& b) {
+    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
+                                  : false;
+  }
+
+  static unsigned ComputeHash(const internal_key_type& a) {
+    return llvm::HashString(llvm::StringRef(a.first, a.second));
+  }
+
+  // This hopefully will just get inlined and removed by the optimizer.
+  static const internal_key_type&
+  GetInternalKey(const external_key_type& x) { return x; }
+
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t));
+  }
+
+  static std::pair<const char*, unsigned>
+  ReadKey(const unsigned char* d, unsigned n) {
+      assert(n >= 2 && d[n-1] == '\0');
+      return std::make_pair((const char*) d, n-1);
+    }
+
+  static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
+                           unsigned) {
+    return ::ReadUnalignedLE32(d);
+  }
+};
+
+} // end anonymous namespace
+
+typedef OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
+typedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
+
+//===----------------------------------------------------------------------===//
+// PTHManager methods.
+//===----------------------------------------------------------------------===//
+
+PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
+                       const unsigned char* idDataTable,
+                       IdentifierInfo** perIDCache,
+                       void* stringIdLookup, unsigned numIds,
+                       const unsigned char* spellingBase,
+                       const char* originalSourceFile)
+: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
+  IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
+  NumIds(numIds), PP(0), SpellingBase(spellingBase),
+  OriginalSourceFile(originalSourceFile) {}
+
+PTHManager::~PTHManager() {
+  delete Buf;
+  delete (PTHFileLookup*) FileLookup;
+  delete (PTHStringIdLookup*) StringIdLookup;
+  free(PerIDCache);
+}
+
+static void InvalidPTH(Diagnostic &Diags, const char *Msg) {
+  Diags.Report(Diags.getCustomDiagID(Diagnostic::Error, Msg));
+}
+
+PTHManager* PTHManager::Create(const std::string& file, Diagnostic &Diags) {
+  // Memory map the PTH file.
+  llvm::OwningPtr<llvm::MemoryBuffer>
+  File(llvm::MemoryBuffer::getFile(file.c_str()));
+
+  if (!File) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0;
+  }
+
+  // Get the buffer ranges and check if there are at least three 32-bit
+  // words at the end of the file.
+  const unsigned char* BufBeg = (unsigned char*)File->getBufferStart();
+  const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd();
+
+  // Check the prologue of the file.
+  if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) ||
+      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0;
+  }
+
+  // Read the PTH version.
+  const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1);
+  unsigned Version = ReadLE32(p);
+
+  if (Version < PTHManager::Version) {
+    InvalidPTH(Diags,
+        Version < PTHManager::Version
+        ? "PTH file uses an older PTH format that is no longer supported"
+        : "PTH file uses a newer PTH format that cannot be read");
+    return 0;
+  }
+
+  // Compute the address of the index table at the end of the PTH file.
+  const unsigned char *PrologueOffset = p;
+
+  if (PrologueOffset >= BufEnd) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0;
+  }
+
+  // Construct the file lookup table.  This will be used for mapping from
+  // FileEntry*'s to cached tokens.
+  const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
+  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
+
+  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0; // FIXME: Proper error diagnostic?
+  }
+
+  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
+
+  // Warn if the PTH file is empty.  We still want to create a PTHManager
+  // as the PTH could be used with -include-pth.
+  if (FL->isEmpty())
+    InvalidPTH(Diags, "PTH file contains no cached source data");
+
+  // Get the location of the table mapping from persistent ids to the
+  // data needed to reconstruct identifiers.
+  const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
+  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
+
+  if (!(IData >= BufBeg && IData < BufEnd)) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0;
+  }
+
+  // Get the location of the hashtable mapping between strings and
+  // persistent IDs.
+  const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
+  const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset);
+  if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0;
+  }
+
+  llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable,
+                                                                  BufBeg));
+
+  // Get the location of the spelling cache.
+  const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
+  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
+  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
+    Diags.Report(diag::err_invalid_pth_file) << file;
+    return 0;
+  }
+
+  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
+  uint32_t NumIds = ReadLE32(IData);
+
+  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
+  // so that we in the best case only zero out memory once when the OS returns
+  // us new pages.
+  IdentifierInfo** PerIDCache = 0;
+
+  if (NumIds) {
+    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
+    if (!PerIDCache) {
+      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
+      return 0;
+    }
+  }
+
+  // Compute the address of the original source file.
+  const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
+  unsigned len = ReadUnalignedLE16(originalSourceBase);
+  if (!len) originalSourceBase = 0;
+
+  // Create the new PTHManager.
+  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
+                        SL.take(), NumIds, spellingBase,
+                        (const char*) originalSourceBase);
+}
+
+IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
+  // Look in the PTH file for the string data for the IdentifierInfo object.
+  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
+  const unsigned char* IDData =
+    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
+  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
+
+  // Allocate the object.
+  std::pair<IdentifierInfo,const unsigned char*> *Mem =
+    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
+
+  Mem->second = IDData;
+  assert(IDData[0] != '\0');
+  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
+
+  // Store the new IdentifierInfo in the cache.
+  PerIDCache[PersistentID] = II;
+  assert(II->getNameStart() && II->getNameStart()[0] != '\0');
+  return II;
+}
+
+IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
+  PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
+  // Double check our assumption that the last character isn't '\0'.
+  assert(NameEnd==NameStart || NameStart[NameEnd-NameStart-1] != '\0');
+  PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart,
+                                                         NameEnd - NameStart));
+  if (I == SL.end()) // No identifier found?
+    return 0;
+
+  // Match found.  Return the identifier!
+  assert(*I > 0);
+  return GetIdentifierInfo(*I-1);
+}
+
+PTHLexer *PTHManager::CreateLexer(FileID FID) {
+  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
+  if (!FE)
+    return 0;
+
+  // Lookup the FileEntry object in our file lookup data structure.  It will
+  // return a variant that indicates whether or not there is an offset within
+  // the PTH file that contains cached tokens.
+  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
+  PTHFileLookup::iterator I = PFL.find(FE);
+
+  if (I == PFL.end()) // No tokens available?
+    return 0;
+
+  const PTHFileData& FileData = *I;
+
+  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
+  // Compute the offset of the token data within the buffer.
+  const unsigned char* data = BufStart + FileData.getTokenOffset();
+
+  // Get the location of pp-conditional table.
+  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
+  uint32_t Len = ReadLE32(ppcond);
+  if (Len == 0) ppcond = 0;
+
+  assert(PP && "No preprocessor set yet!");
+  return new PTHLexer(*PP, FID, data, ppcond, *this);
+}
+
+//===----------------------------------------------------------------------===//
+// 'stat' caching.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class PTHStatData {
+public:
+  const bool hasStat;
+  const ino_t ino;
+  const dev_t dev;
+  const mode_t mode;
+  const time_t mtime;
+  const off_t size;
+
+  PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s)
+  : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {}
+
+  PTHStatData()
+    : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {}
+};
+
+class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
+public:
+  typedef const char* external_key_type;  // const char*
+  typedef PTHStatData data_type;
+
+  static internal_key_type GetInternalKey(const char *path) {
+    // The key 'kind' doesn't matter here because it is ignored in EqualKey.
+    return std::make_pair((unsigned char) 0x0, path);
+  }
+
+  static bool EqualKey(internal_key_type a, internal_key_type b) {
+    // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
+    // just the paths.
+    return strcmp(a.second, b.second) == 0;
+  }
+
+  static data_type ReadData(const internal_key_type& k, const unsigned char* d,
+                            unsigned) {
+
+    if (k.first /* File or Directory */) {
+      if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words.
+      ino_t ino = (ino_t) ReadUnalignedLE32(d);
+      dev_t dev = (dev_t) ReadUnalignedLE32(d);
+      mode_t mode = (mode_t) ReadUnalignedLE16(d);
+      time_t mtime = (time_t) ReadUnalignedLE64(d);
+      return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d));
+    }
+
+    // Negative stat.  Don't read anything.
+    return data_type();
+  }
+};
+
+class PTHStatCache : public StatSysCallCache {
+  typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
+  CacheTy Cache;
+
+public:
+  PTHStatCache(PTHFileLookup &FL) :
+    Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
+          FL.getBase()) {}
+
+  ~PTHStatCache() {}
+
+  int stat(const char *path, struct stat *buf) {
+    // Do the lookup for the file's data in the PTH file.
+    CacheTy::iterator I = Cache.find(path);
+
+    // If we don't get a hit in the PTH file just forward to 'stat'.
+    if (I == Cache.end()) 
+      return StatSysCallCache::stat(path, buf);
+
+    const PTHStatData& Data = *I;
+
+    if (!Data.hasStat)
+      return 1;
+
+    buf->st_ino = Data.ino;
+    buf->st_dev = Data.dev;
+    buf->st_mtime = Data.mtime;
+    buf->st_mode = Data.mode;
+    buf->st_size = Data.size;
+    return 0;
+  }
+};
+} // end anonymous namespace
+
+StatSysCallCache *PTHManager::createStatCache() {
+  return new PTHStatCache(*((PTHFileLookup*) FileLookup));
+}

diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
new file mode 100644
index 0000000..63b23b6
--- /dev/null
+++ b/lib/Lex/Pragma.cpp

@@ -0,0 +1,740 @@
+//===--- Pragma.cpp - Pragma registration and handling --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PragmaHandler/PragmaTable interfaces and implements
+// pragma related methods of the Preprocessor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include <algorithm>
+using namespace clang;
+
+// Out-of-line destructor to provide a home for the class.
+PragmaHandler::~PragmaHandler() {
+}
+
+//===----------------------------------------------------------------------===//
+// PragmaNamespace Implementation.
+//===----------------------------------------------------------------------===//
+
+
+PragmaNamespace::~PragmaNamespace() {
+  for (unsigned i = 0, e = Handlers.size(); i != e; ++i)
+    delete Handlers[i];
+}
+
+/// FindHandler - Check to see if there is already a handler for the
+/// specified name.  If not, return the handler for the null identifier if it
+/// exists, otherwise return null.  If IgnoreNull is true (the default) then
+/// the null handler isn't returned on failure to match.
+PragmaHandler *PragmaNamespace::FindHandler(const IdentifierInfo *Name,
+                                            bool IgnoreNull) const {
+  PragmaHandler *NullHandler = 0;
+  for (unsigned i = 0, e = Handlers.size(); i != e; ++i) {
+    if (Handlers[i]->getName() == Name)
+      return Handlers[i];
+
+    if (Handlers[i]->getName() == 0)
+      NullHandler = Handlers[i];
+  }
+  return IgnoreNull ? 0 : NullHandler;
+}
+
+void PragmaNamespace::RemovePragmaHandler(PragmaHandler *Handler) {
+  for (unsigned i = 0, e = Handlers.size(); i != e; ++i) {
+    if (Handlers[i] == Handler) {
+      Handlers[i] = Handlers.back();
+      Handlers.pop_back();
+      return;
+    }
+  }
+  assert(0 && "Handler not registered in this namespace");
+}
+
+void PragmaNamespace::HandlePragma(Preprocessor &PP, Token &Tok) {
+  // Read the 'namespace' that the directive is in, e.g. STDC.  Do not macro
+  // expand it, the user can have a STDC #define, that should not affect this.
+  PP.LexUnexpandedToken(Tok);
+
+  // Get the handler for this token.  If there is no handler, ignore the pragma.
+  PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo(), false);
+  if (Handler == 0) {
+    PP.Diag(Tok, diag::warn_pragma_ignored);
+    return;
+  }
+
+  // Otherwise, pass it down.
+  Handler->HandlePragma(PP, Tok);
+}
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Pragma Directive Handling.
+//===----------------------------------------------------------------------===//
+
+/// HandlePragmaDirective - The "#pragma" directive has been parsed.  Lex the
+/// rest of the pragma, passing it to the registered pragma handlers.
+void Preprocessor::HandlePragmaDirective() {
+  ++NumPragma;
+
+  // Invoke the first level of pragma handlers which reads the namespace id.
+  Token Tok;
+  PragmaHandlers->HandlePragma(*this, Tok);
+
+  // If the pragma handler didn't read the rest of the line, consume it now.
+  if (CurPPLexer && CurPPLexer->ParsingPreprocessorDirective)
+    DiscardUntilEndOfDirective();
+}
+
+/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
+/// return the first token after the directive.  The _Pragma token has just
+/// been read into 'Tok'.
+void Preprocessor::Handle_Pragma(Token &Tok) {
+  // Remember the pragma token location.
+  SourceLocation PragmaLoc = Tok.getLocation();
+
+  // Read the '('.
+  Lex(Tok);
+  if (Tok.isNot(tok::l_paren)) {
+    Diag(PragmaLoc, diag::err__Pragma_malformed);
+    return;
+  }
+
+  // Read the '"..."'.
+  Lex(Tok);
+  if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) {
+    Diag(PragmaLoc, diag::err__Pragma_malformed);
+    return;
+  }
+
+  // Remember the string.
+  std::string StrVal = getSpelling(Tok);
+
+  // Read the ')'.
+  Lex(Tok);
+  if (Tok.isNot(tok::r_paren)) {
+    Diag(PragmaLoc, diag::err__Pragma_malformed);
+    return;
+  }
+
+  SourceLocation RParenLoc = Tok.getLocation();
+
+  // The _Pragma is lexically sound.  Destringize according to C99 6.10.9.1:
+  // "The string literal is destringized by deleting the L prefix, if present,
+  // deleting the leading and trailing double-quotes, replacing each escape
+  // sequence \" by a double-quote, and replacing each escape sequence \\ by a
+  // single backslash."
+  if (StrVal[0] == 'L')  // Remove L prefix.
+    StrVal.erase(StrVal.begin());
+  assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
+         "Invalid string token!");
+
+  // Remove the front quote, replacing it with a space, so that the pragma
+  // contents appear to have a space before them.
+  StrVal[0] = ' ';
+
+  // Replace the terminating quote with a \n.
+  StrVal[StrVal.size()-1] = '\n';
+
+  // Remove escaped quotes and escapes.
+  for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) {
+    if (StrVal[i] == '\\' &&
+        (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) {
+      // \\ -> '\' and \" -> '"'.
+      StrVal.erase(StrVal.begin()+i);
+      --e;
+    }
+  }
+
+  // Plop the string (including the newline and trailing null) into a buffer
+  // where we can lex it.
+  Token TmpTok;
+  TmpTok.startToken();
+  CreateString(&StrVal[0], StrVal.size(), TmpTok);
+  SourceLocation TokLoc = TmpTok.getLocation();
+
+  // Make and enter a lexer object so that we lex and expand the tokens just
+  // like any others.
+  Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc,
+                                        StrVal.size(), *this);
+
+  EnterSourceFileWithLexer(TL, 0);
+
+  // With everything set up, lex this as a #pragma directive.
+  HandlePragmaDirective();
+
+  // Finally, return whatever came after the pragma directive.
+  return Lex(Tok);
+}
+
+
+
+/// HandlePragmaOnce - Handle #pragma once.  OnceTok is the 'once'.
+///
+void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
+  if (isInPrimaryFile()) {
+    Diag(OnceTok, diag::pp_pragma_once_in_main_file);
+    return;
+  }
+
+  // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc.
+  // Mark the file as a once-only file now.
+  HeaderInfo.MarkFileIncludeOnce(getCurrentFileLexer()->getFileEntry());
+}
+
+void Preprocessor::HandlePragmaMark() {
+  assert(CurPPLexer && "No current lexer?");
+  if (CurLexer)
+    CurLexer->ReadToEndOfLine();
+  else
+    CurPTHLexer->DiscardToEndOfLine();
+}
+
+
+/// HandlePragmaPoison - Handle #pragma GCC poison.  PoisonTok is the 'poison'.
+///
+void Preprocessor::HandlePragmaPoison(Token &PoisonTok) {
+  Token Tok;
+
+  while (1) {
+    // Read the next token to poison.  While doing this, pretend that we are
+    // skipping while reading the identifier to poison.
+    // This avoids errors on code like:
+    //   #pragma GCC poison X
+    //   #pragma GCC poison X
+    if (CurPPLexer) CurPPLexer->LexingRawMode = true;
+    LexUnexpandedToken(Tok);
+    if (CurPPLexer) CurPPLexer->LexingRawMode = false;
+
+    // If we reached the end of line, we're done.
+    if (Tok.is(tok::eom)) return;
+
+    // Can only poison identifiers.
+    if (Tok.isNot(tok::identifier)) {
+      Diag(Tok, diag::err_pp_invalid_poison);
+      return;
+    }
+
+    // Look up the identifier info for the token.  We disabled identifier lookup
+    // by saying we're skipping contents, so we need to do this manually.
+    IdentifierInfo *II = LookUpIdentifierInfo(Tok);
+
+    // Already poisoned.
+    if (II->isPoisoned()) continue;
+
+    // If this is a macro identifier, emit a warning.
+    if (II->hasMacroDefinition())
+      Diag(Tok, diag::pp_poisoning_existing_macro);
+
+    // Finally, poison it!
+    II->setIsPoisoned();
+  }
+}
+
+/// HandlePragmaSystemHeader - Implement #pragma GCC system_header.  We know
+/// that the whole directive has been parsed.
+void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
+  if (isInPrimaryFile()) {
+    Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file);
+    return;
+  }
+
+  // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc.
+  PreprocessorLexer *TheLexer = getCurrentFileLexer();
+
+  // Mark the file as a system header.
+  HeaderInfo.MarkFileSystemHeader(TheLexer->getFileEntry());
+
+
+  PresumedLoc PLoc = SourceMgr.getPresumedLoc(SysHeaderTok.getLocation());
+  unsigned FilenameLen = strlen(PLoc.getFilename());
+  unsigned FilenameID = SourceMgr.getLineTableFilenameID(PLoc.getFilename(),
+                                                         FilenameLen);
+
+  // Emit a line marker.  This will change any source locations from this point
+  // forward to realize they are in a system header.
+  // Create a line note with this information.
+  SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine(), FilenameID,
+                        false, false, true, false);
+
+  // Notify the client, if desired, that we are in a new source file.
+  if (Callbacks)
+    Callbacks->FileChanged(SysHeaderTok.getLocation(),
+                           PPCallbacks::SystemHeaderPragma, SrcMgr::C_System);
+}
+
+/// HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah.
+///
+void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
+  Token FilenameTok;
+  CurPPLexer->LexIncludeFilename(FilenameTok);
+
+  // If the token kind is EOM, the error has already been diagnosed.
+  if (FilenameTok.is(tok::eom))
+    return;
+
+  // Reserve a buffer to get the spelling.
+  llvm::SmallString<128> FilenameBuffer;
+  FilenameBuffer.resize(FilenameTok.getLength());
+
+  const char *FilenameStart = &FilenameBuffer[0];
+  unsigned Len = getSpelling(FilenameTok, FilenameStart);
+  llvm::StringRef Filename(FilenameStart, Len);
+  bool isAngled =
+    GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
+  // error.
+  if (Filename.empty())
+    return;
+
+  // Search include directories for this file.
+  const DirectoryLookup *CurDir;
+  const FileEntry *File = LookupFile(Filename, isAngled, 0, CurDir);
+  if (File == 0) {
+    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+    return;
+  }
+
+  const FileEntry *CurFile = getCurrentFileLexer()->getFileEntry();
+
+  // If this file is older than the file it depends on, emit a diagnostic.
+  if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) {
+    // Lex tokens at the end of the message and include them in the message.
+    std::string Message;
+    Lex(DependencyTok);
+    while (DependencyTok.isNot(tok::eom)) {
+      Message += getSpelling(DependencyTok) + " ";
+      Lex(DependencyTok);
+    }
+
+    Message.erase(Message.end()-1);
+    Diag(FilenameTok, diag::pp_out_of_date_dependency) << Message;
+  }
+}
+
+/// HandlePragmaComment - Handle the microsoft #pragma comment extension.  The
+/// syntax is:
+///   #pragma comment(linker, "foo")
+/// 'linker' is one of five identifiers: compiler, exestr, lib, linker, user.
+/// "foo" is a string, which is fully macro expanded, and permits string
+/// concatenation, embedded escape characters etc.  See MSDN for more details.
+void Preprocessor::HandlePragmaComment(Token &Tok) {
+  SourceLocation CommentLoc = Tok.getLocation();
+  Lex(Tok);
+  if (Tok.isNot(tok::l_paren)) {
+    Diag(CommentLoc, diag::err_pragma_comment_malformed);
+    return;
+  }
+
+  // Read the identifier.
+  Lex(Tok);
+  if (Tok.isNot(tok::identifier)) {
+    Diag(CommentLoc, diag::err_pragma_comment_malformed);
+    return;
+  }
+
+  // Verify that this is one of the 5 whitelisted options.
+  // FIXME: warn that 'exestr' is deprecated.
+  const IdentifierInfo *II = Tok.getIdentifierInfo();
+  if (!II->isStr("compiler") && !II->isStr("exestr") && !II->isStr("lib") &&
+      !II->isStr("linker") && !II->isStr("user")) {
+    Diag(Tok.getLocation(), diag::err_pragma_comment_unknown_kind);
+    return;
+  }
+
+  // Read the optional string if present.
+  Lex(Tok);
+  std::string ArgumentString;
+  if (Tok.is(tok::comma)) {
+    Lex(Tok); // eat the comma.
+
+    // We need at least one string.
+    if (Tok.isNot(tok::string_literal)) {
+      Diag(Tok.getLocation(), diag::err_pragma_comment_malformed);
+      return;
+    }
+
+    // String concatenation allows multiple strings, which can even come from
+    // macro expansion.
+    // "foo " "bar" "Baz"
+    llvm::SmallVector<Token, 4> StrToks;
+    while (Tok.is(tok::string_literal)) {
+      StrToks.push_back(Tok);
+      Lex(Tok);
+    }
+
+    // Concatenate and parse the strings.
+    StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
+    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    if (Literal.hadError)
+      return;
+    if (Literal.Pascal) {
+      Diag(StrToks[0].getLocation(), diag::err_pragma_comment_malformed);
+      return;
+    }
+
+    ArgumentString = std::string(Literal.GetString(),
+                                 Literal.GetString()+Literal.GetStringLength());
+  }
+
+  // FIXME: If the kind is "compiler" warn if the string is present (it is
+  // ignored).
+  // FIXME: 'lib' requires a comment string.
+  // FIXME: 'linker' requires a comment string, and has a specific list of
+  // things that are allowable.
+
+  if (Tok.isNot(tok::r_paren)) {
+    Diag(Tok.getLocation(), diag::err_pragma_comment_malformed);
+    return;
+  }
+  Lex(Tok);  // eat the r_paren.
+
+  if (Tok.isNot(tok::eom)) {
+    Diag(Tok.getLocation(), diag::err_pragma_comment_malformed);
+    return;
+  }
+
+  // If the pragma is lexically sound, notify any interested PPCallbacks.
+  if (Callbacks)
+    Callbacks->PragmaComment(CommentLoc, II, ArgumentString);
+}
+
+
+
+
+/// AddPragmaHandler - Add the specified pragma handler to the preprocessor.
+/// If 'Namespace' is non-null, then it is a token required to exist on the
+/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
+void Preprocessor::AddPragmaHandler(const char *Namespace,
+                                    PragmaHandler *Handler) {
+  PragmaNamespace *InsertNS = PragmaHandlers;
+
+  // If this is specified to be in a namespace, step down into it.
+  if (Namespace) {
+    IdentifierInfo *NSID = getIdentifierInfo(Namespace);
+
+    // If there is already a pragma handler with the name of this namespace,
+    // we either have an error (directive with the same name as a namespace) or
+    // we already have the namespace to insert into.
+    if (PragmaHandler *Existing = PragmaHandlers->FindHandler(NSID)) {
+      InsertNS = Existing->getIfNamespace();
+      assert(InsertNS != 0 && "Cannot have a pragma namespace and pragma"
+             " handler with the same name!");
+    } else {
+      // Otherwise, this namespace doesn't exist yet, create and insert the
+      // handler for it.
+      InsertNS = new PragmaNamespace(NSID);
+      PragmaHandlers->AddPragma(InsertNS);
+    }
+  }
+
+  // Check to make sure we don't already have a pragma for this identifier.
+  assert(!InsertNS->FindHandler(Handler->getName()) &&
+         "Pragma handler already exists for this identifier!");
+  InsertNS->AddPragma(Handler);
+}
+
+/// RemovePragmaHandler - Remove the specific pragma handler from the
+/// preprocessor. If \arg Namespace is non-null, then it should be the
+/// namespace that \arg Handler was added to. It is an error to remove
+/// a handler that has not been registered.
+void Preprocessor::RemovePragmaHandler(const char *Namespace,
+                                       PragmaHandler *Handler) {
+  PragmaNamespace *NS = PragmaHandlers;
+
+  // If this is specified to be in a namespace, step down into it.
+  if (Namespace) {
+    IdentifierInfo *NSID = getIdentifierInfo(Namespace);
+    PragmaHandler *Existing = PragmaHandlers->FindHandler(NSID);
+    assert(Existing && "Namespace containing handler does not exist!");
+
+    NS = Existing->getIfNamespace();
+    assert(NS && "Invalid namespace, registered as a regular pragma handler!");
+  }
+
+  NS->RemovePragmaHandler(Handler);
+
+  // If this is a non-default namespace and it is now empty, remove
+  // it.
+  if (NS != PragmaHandlers && NS->IsEmpty())
+    PragmaHandlers->RemovePragmaHandler(NS);
+}
+
+namespace {
+/// PragmaOnceHandler - "#pragma once" marks the file as atomically included.
+struct PragmaOnceHandler : public PragmaHandler {
+  PragmaOnceHandler(const IdentifierInfo *OnceID) : PragmaHandler(OnceID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &OnceTok) {
+    PP.CheckEndOfDirective("pragma once");
+    PP.HandlePragmaOnce(OnceTok);
+  }
+};
+
+/// PragmaMarkHandler - "#pragma mark ..." is ignored by the compiler, and the
+/// rest of the line is not lexed.
+struct PragmaMarkHandler : public PragmaHandler {
+  PragmaMarkHandler(const IdentifierInfo *MarkID) : PragmaHandler(MarkID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &MarkTok) {
+    PP.HandlePragmaMark();
+  }
+};
+
+/// PragmaPoisonHandler - "#pragma poison x" marks x as not usable.
+struct PragmaPoisonHandler : public PragmaHandler {
+  PragmaPoisonHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &PoisonTok) {
+    PP.HandlePragmaPoison(PoisonTok);
+  }
+};
+
+/// PragmaSystemHeaderHandler - "#pragma system_header" marks the current file
+/// as a system header, which silences warnings in it.
+struct PragmaSystemHeaderHandler : public PragmaHandler {
+  PragmaSystemHeaderHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &SHToken) {
+    PP.HandlePragmaSystemHeader(SHToken);
+    PP.CheckEndOfDirective("pragma");
+  }
+};
+struct PragmaDependencyHandler : public PragmaHandler {
+  PragmaDependencyHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &DepToken) {
+    PP.HandlePragmaDependency(DepToken);
+  }
+};
+
+/// PragmaDiagnosticHandler - e.g. '#pragma GCC diagnostic ignored "-Wformat"'
+/// Since clang's diagnostic supports extended functionality beyond GCC's
+/// the constructor takes a clangMode flag to tell it whether or not to allow
+/// clang's extended functionality, or whether to reject it.
+struct PragmaDiagnosticHandler : public PragmaHandler {
+private:
+  const bool ClangMode;
+public:
+  PragmaDiagnosticHandler(const IdentifierInfo *ID,
+                          const bool clangMode) : PragmaHandler(ID),
+                                                  ClangMode(clangMode) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &DiagToken) {
+    Token Tok;
+    PP.LexUnexpandedToken(Tok);
+    if (Tok.isNot(tok::identifier)) {
+      unsigned Diag = ClangMode ? diag::warn_pragma_diagnostic_clang_invalid
+                                 : diag::warn_pragma_diagnostic_gcc_invalid;
+      PP.Diag(Tok, Diag);
+      return;
+    }
+    IdentifierInfo *II = Tok.getIdentifierInfo();
+
+    diag::Mapping Map;
+    if (II->isStr("warning"))
+      Map = diag::MAP_WARNING;
+    else if (II->isStr("error"))
+      Map = diag::MAP_ERROR;
+    else if (II->isStr("ignored"))
+      Map = diag::MAP_IGNORE;
+    else if (II->isStr("fatal"))
+      Map = diag::MAP_FATAL;
+    else if (ClangMode) {
+      if (II->isStr("pop")) {
+        if (!PP.getDiagnostics().popMappings())
+          PP.Diag(Tok, diag::warn_pragma_diagnostic_clang_cannot_ppp);
+        return;
+      }
+
+      if (II->isStr("push")) {
+        PP.getDiagnostics().pushMappings();
+        return;
+      }
+
+      PP.Diag(Tok, diag::warn_pragma_diagnostic_clang_invalid);
+      return;
+    } else {
+      PP.Diag(Tok, diag::warn_pragma_diagnostic_gcc_invalid);
+      return;
+    }
+
+    PP.LexUnexpandedToken(Tok);
+
+    // We need at least one string.
+    if (Tok.isNot(tok::string_literal)) {
+      PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
+      return;
+    }
+
+    // String concatenation allows multiple strings, which can even come from
+    // macro expansion.
+    // "foo " "bar" "Baz"
+    llvm::SmallVector<Token, 4> StrToks;
+    while (Tok.is(tok::string_literal)) {
+      StrToks.push_back(Tok);
+      PP.LexUnexpandedToken(Tok);
+    }
+
+    if (Tok.isNot(tok::eom)) {
+      PP.Diag(Tok.getLocation(), diag::warn_pragma_diagnostic_invalid_token);
+      return;
+    }
+
+    // Concatenate and parse the strings.
+    StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
+    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    if (Literal.hadError)
+      return;
+    if (Literal.Pascal) {
+      unsigned Diag = ClangMode ? diag::warn_pragma_diagnostic_clang_invalid
+                                 : diag::warn_pragma_diagnostic_gcc_invalid;
+      PP.Diag(Tok, Diag);
+      return;
+    }
+
+    std::string WarningName(Literal.GetString(),
+                            Literal.GetString()+Literal.GetStringLength());
+
+    if (WarningName.size() < 3 || WarningName[0] != '-' ||
+        WarningName[1] != 'W') {
+      PP.Diag(StrToks[0].getLocation(),
+              diag::warn_pragma_diagnostic_invalid_option);
+      return;
+    }
+
+    if (PP.getDiagnostics().setDiagnosticGroupMapping(WarningName.c_str()+2,
+                                                      Map))
+      PP.Diag(StrToks[0].getLocation(),
+              diag::warn_pragma_diagnostic_unknown_warning) << WarningName;
+  }
+};
+
+/// PragmaCommentHandler - "#pragma comment ...".
+struct PragmaCommentHandler : public PragmaHandler {
+  PragmaCommentHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &CommentTok) {
+    PP.HandlePragmaComment(CommentTok);
+  }
+};
+
+// Pragma STDC implementations.
+
+enum STDCSetting {
+  STDC_ON, STDC_OFF, STDC_DEFAULT, STDC_INVALID
+};
+
+static STDCSetting LexOnOffSwitch(Preprocessor &PP) {
+  Token Tok;
+  PP.LexUnexpandedToken(Tok);
+
+  if (Tok.isNot(tok::identifier)) {
+    PP.Diag(Tok, diag::ext_stdc_pragma_syntax);
+    return STDC_INVALID;
+  }
+  IdentifierInfo *II = Tok.getIdentifierInfo();
+  STDCSetting Result;
+  if (II->isStr("ON"))
+    Result = STDC_ON;
+  else if (II->isStr("OFF"))
+    Result = STDC_OFF;
+  else if (II->isStr("DEFAULT"))
+    Result = STDC_DEFAULT;
+  else {
+    PP.Diag(Tok, diag::ext_stdc_pragma_syntax);
+    return STDC_INVALID;
+  }
+
+  // Verify that this is followed by EOM.
+  PP.LexUnexpandedToken(Tok);
+  if (Tok.isNot(tok::eom))
+    PP.Diag(Tok, diag::ext_stdc_pragma_syntax_eom);
+  return Result;
+}
+
+/// PragmaSTDC_FP_CONTRACTHandler - "#pragma STDC FP_CONTRACT ...".
+struct PragmaSTDC_FP_CONTRACTHandler : public PragmaHandler {
+  PragmaSTDC_FP_CONTRACTHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &Tok) {
+    // We just ignore the setting of FP_CONTRACT. Since we don't do contractions
+    // at all, our default is OFF and setting it to ON is an optimization hint
+    // we can safely ignore.  When we support -ffma or something, we would need
+    // to diagnose that we are ignoring FMA.
+    LexOnOffSwitch(PP);
+  }
+};
+
+/// PragmaSTDC_FENV_ACCESSHandler - "#pragma STDC FENV_ACCESS ...".
+struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler {
+  PragmaSTDC_FENV_ACCESSHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &Tok) {
+    if (LexOnOffSwitch(PP) == STDC_ON)
+      PP.Diag(Tok, diag::warn_stdc_fenv_access_not_supported);
+  }
+};
+
+/// PragmaSTDC_CX_LIMITED_RANGEHandler - "#pragma STDC CX_LIMITED_RANGE ...".
+struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler {
+  PragmaSTDC_CX_LIMITED_RANGEHandler(const IdentifierInfo *ID)
+    : PragmaHandler(ID) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &Tok) {
+    LexOnOffSwitch(PP);
+  }
+};
+
+/// PragmaSTDC_UnknownHandler - "#pragma STDC ...".
+struct PragmaSTDC_UnknownHandler : public PragmaHandler {
+  PragmaSTDC_UnknownHandler() : PragmaHandler(0) {}
+  virtual void HandlePragma(Preprocessor &PP, Token &UnknownTok) {
+    // C99 6.10.6p2, unknown forms are not allowed.
+    PP.Diag(UnknownTok, diag::ext_stdc_pragma_ignored);
+  }
+};
+
+}  // end anonymous namespace
+
+
+/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
+/// #pragma GCC poison/system_header/dependency and #pragma once.
+void Preprocessor::RegisterBuiltinPragmas() {
+  AddPragmaHandler(0, new PragmaOnceHandler(getIdentifierInfo("once")));
+  AddPragmaHandler(0, new PragmaMarkHandler(getIdentifierInfo("mark")));
+
+  // #pragma GCC ...
+  AddPragmaHandler("GCC", new PragmaPoisonHandler(getIdentifierInfo("poison")));
+  AddPragmaHandler("GCC", new PragmaSystemHeaderHandler(
+                                          getIdentifierInfo("system_header")));
+  AddPragmaHandler("GCC", new PragmaDependencyHandler(
+                                          getIdentifierInfo("dependency")));
+  AddPragmaHandler("GCC", new PragmaDiagnosticHandler(
+                                              getIdentifierInfo("diagnostic"),
+                                              false));
+  // #pragma clang ...
+  AddPragmaHandler("clang", new PragmaPoisonHandler(
+                                          getIdentifierInfo("poison")));
+  AddPragmaHandler("clang", new PragmaSystemHeaderHandler(
+                                          getIdentifierInfo("system_header")));
+  AddPragmaHandler("clang", new PragmaDependencyHandler(
+                                          getIdentifierInfo("dependency")));
+  AddPragmaHandler("clang", new PragmaDiagnosticHandler(
+                                          getIdentifierInfo("diagnostic"),
+                                          true));
+
+  AddPragmaHandler("STDC", new PragmaSTDC_FP_CONTRACTHandler(
+                                             getIdentifierInfo("FP_CONTRACT")));
+  AddPragmaHandler("STDC", new PragmaSTDC_FENV_ACCESSHandler(
+                                             getIdentifierInfo("FENV_ACCESS")));
+  AddPragmaHandler("STDC", new PragmaSTDC_CX_LIMITED_RANGEHandler(
+                                        getIdentifierInfo("CX_LIMITED_RANGE")));
+  AddPragmaHandler("STDC", new PragmaSTDC_UnknownHandler());
+
+  // MS extensions.
+  if (Features.Microsoft)
+    AddPragmaHandler(0, new PragmaCommentHandler(getIdentifierInfo("comment")));
+}

diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
new file mode 100644
index 0000000..df0e702
--- /dev/null
+++ b/lib/Lex/Preprocessor.cpp

@@ -0,0 +1,596 @@
+//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Preprocessor interface.
+//
+//===----------------------------------------------------------------------===//
+//
+// Options to support:
+//   -H       - Print the name of each header file used.
+//   -d[DNI] - Dump various things.
+//   -fworking-directory - #line's with preprocessor's working dir.
+//   -fpreprocessed
+//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
+//   -W*
+//   -w
+//
+// Messages to emit:
+//   "Multiple include guards may be useful for:\n"
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+#include "MacroArgs.h"
+#include "clang/Lex/ExternalPreprocessorSource.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Pragma.h"
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+using namespace clang;
+
+//===----------------------------------------------------------------------===//
+ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
+
+Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
+                           const TargetInfo &target, SourceManager &SM,
+                           HeaderSearch &Headers,
+                           IdentifierInfoLookup* IILookup,
+                           bool OwnsHeaders)
+  : Diags(&diags), Features(opts), Target(target),FileMgr(Headers.getFileMgr()),
+    SourceMgr(SM), HeaderInfo(Headers), ExternalSource(0),
+    Identifiers(opts, IILookup), BuiltinInfo(Target), CodeCompletionFile(0), 
+    CurPPLexer(0), CurDirLookup(0), Callbacks(0), MacroArgCache(0) {
+  ScratchBuf = new ScratchBuffer(SourceMgr);
+  CounterValue = 0; // __COUNTER__ starts at 0.
+  OwnsHeaderSearch = OwnsHeaders;
+
+  // Clear stats.
+  NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
+  NumIf = NumElse = NumEndif = 0;
+  NumEnteredSourceFiles = 0;
+  NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
+  NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
+  MaxIncludeStackDepth = 0;
+  NumSkipped = 0;
+
+  // Default to discarding comments.
+  KeepComments = false;
+  KeepMacroComments = false;
+
+  // Macro expansion is enabled.
+  DisableMacroExpansion = false;
+  InMacroArgs = false;
+  NumCachedTokenLexers = 0;
+
+  CachedLexPos = 0;
+
+  // We haven't read anything from the external source.
+  ReadMacrosFromExternalSource = false;
+      
+  // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
+  // This gets unpoisoned where it is allowed.
+  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
+
+  // Initialize the pragma handlers.
+  PragmaHandlers = new PragmaNamespace(0);
+  RegisterBuiltinPragmas();
+
+  // Initialize builtin macros like __LINE__ and friends.
+  RegisterBuiltinMacros();
+}
+
+Preprocessor::~Preprocessor() {
+  assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
+
+  while (!IncludeMacroStack.empty()) {
+    delete IncludeMacroStack.back().TheLexer;
+    delete IncludeMacroStack.back().TheTokenLexer;
+    IncludeMacroStack.pop_back();
+  }
+
+  // Free any macro definitions.
+  for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
+       Macros.begin(), E = Macros.end(); I != E; ++I) {
+    // We don't need to free the MacroInfo objects directly.  These
+    // will be released when the BumpPtrAllocator 'BP' object gets
+    // destroyed.  We still need to run the dtor, however, to free
+    // memory alocated by MacroInfo.
+    I->second->Destroy(BP);
+    I->first->setHasMacroDefinition(false);
+  }
+
+  // Free any cached macro expanders.
+  for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i)
+    delete TokenLexerCache[i];
+  
+  // Free any cached MacroArgs.
+  for (MacroArgs *ArgList = MacroArgCache; ArgList; )
+    ArgList = ArgList->deallocate();
+
+  // Release pragma information.
+  delete PragmaHandlers;
+
+  // Delete the scratch buffer info.
+  delete ScratchBuf;
+
+  // Delete the header search info, if we own it.
+  if (OwnsHeaderSearch)
+    delete &HeaderInfo;
+
+  delete Callbacks;
+}
+
+void Preprocessor::setPTHManager(PTHManager* pm) {
+  PTH.reset(pm);
+  FileMgr.addStatCache(PTH->createStatCache());
+}
+
+void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
+  llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
+               << getSpelling(Tok) << "'";
+
+  if (!DumpFlags) return;
+
+  llvm::errs() << "\t";
+  if (Tok.isAtStartOfLine())
+    llvm::errs() << " [StartOfLine]";
+  if (Tok.hasLeadingSpace())
+    llvm::errs() << " [LeadingSpace]";
+  if (Tok.isExpandDisabled())
+    llvm::errs() << " [ExpandDisabled]";
+  if (Tok.needsCleaning()) {
+    const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
+    llvm::errs() << " [UnClean='" << std::string(Start, Start+Tok.getLength())
+                 << "']";
+  }
+
+  llvm::errs() << "\tLoc=<";
+  DumpLocation(Tok.getLocation());
+  llvm::errs() << ">";
+}
+
+void Preprocessor::DumpLocation(SourceLocation Loc) const {
+  Loc.dump(SourceMgr);
+}
+
+void Preprocessor::DumpMacro(const MacroInfo &MI) const {
+  llvm::errs() << "MACRO: ";
+  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
+    DumpToken(MI.getReplacementToken(i));
+    llvm::errs() << "  ";
+  }
+  llvm::errs() << "\n";
+}
+
+void Preprocessor::PrintStats() {
+  llvm::errs() << "\n*** Preprocessor Stats:\n";
+  llvm::errs() << NumDirectives << " directives found:\n";
+  llvm::errs() << "  " << NumDefined << " #define.\n";
+  llvm::errs() << "  " << NumUndefined << " #undef.\n";
+  llvm::errs() << "  #include/#include_next/#import:\n";
+  llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
+  llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
+  llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
+  llvm::errs() << "  " << NumElse << " #else/#elif.\n";
+  llvm::errs() << "  " << NumEndif << " #endif.\n";
+  llvm::errs() << "  " << NumPragma << " #pragma.\n";
+  llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
+
+  llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
+             << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
+             << NumFastMacroExpanded << " on the fast path.\n";
+  llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
+             << " token paste (##) operations performed, "
+             << NumFastTokenPaste << " on the fast path.\n";
+}
+
+Preprocessor::macro_iterator 
+Preprocessor::macro_begin(bool IncludeExternalMacros) const { 
+  if (IncludeExternalMacros && ExternalSource && 
+      !ReadMacrosFromExternalSource) {
+    ReadMacrosFromExternalSource = true;
+    ExternalSource->ReadDefinedMacros();
+  }
+  
+  return Macros.begin(); 
+}
+
+Preprocessor::macro_iterator 
+Preprocessor::macro_end(bool IncludeExternalMacros) const { 
+  if (IncludeExternalMacros && ExternalSource && 
+      !ReadMacrosFromExternalSource) {
+    ReadMacrosFromExternalSource = true;
+    ExternalSource->ReadDefinedMacros();
+  }
+  
+  return Macros.end(); 
+}
+
+bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 
+                                          unsigned TruncateAtLine, 
+                                          unsigned TruncateAtColumn) {
+  using llvm::MemoryBuffer;
+
+  CodeCompletionFile = File;
+
+  // Okay to clear out the code-completion point by passing NULL.
+  if (!CodeCompletionFile)
+    return false;
+
+  // Load the actual file's contents.
+  const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File);
+  if (!Buffer)
+    return true;
+
+  // Find the byte position of the truncation point.
+  const char *Position = Buffer->getBufferStart();
+  for (unsigned Line = 1; Line < TruncateAtLine; ++Line) {
+    for (; *Position; ++Position) {
+      if (*Position != '\r' && *Position != '\n')
+        continue;
+      
+      // Eat \r\n or \n\r as a single line.
+      if ((Position[1] == '\r' || Position[1] == '\n') &&
+          Position[0] != Position[1])
+        ++Position;
+      ++Position;
+      break;
+    }
+  }
+  
+  Position += TruncateAtColumn - 1;
+  
+  // Truncate the buffer.
+  if (Position < Buffer->getBufferEnd()) {
+    MemoryBuffer *TruncatedBuffer 
+      = MemoryBuffer::getMemBufferCopy(Buffer->getBufferStart(), Position, 
+                                       Buffer->getBufferIdentifier());
+    SourceMgr.overrideFileContents(File, TruncatedBuffer);
+  }
+
+  return false;
+}
+
+bool Preprocessor::isCodeCompletionFile(SourceLocation FileLoc) const {
+  return CodeCompletionFile && FileLoc.isFileID() &&
+    SourceMgr.getFileEntryForID(SourceMgr.getFileID(FileLoc))
+      == CodeCompletionFile;
+}
+
+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Preprocessor::getSpelling(const Token &Tok,
+                                      const SourceManager &SourceMgr,
+                                      const LangOptions &Features) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+  // If this token contains nothing interesting, return it directly.
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  if (!Tok.needsCleaning())
+    return std::string(TokStart, TokStart+Tok.getLength());
+
+  std::string Result;
+  Result.reserve(Tok.getLength());
+
+  // Otherwise, hard case, relex the characters into the string.
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Ptr += CharSize;
+  }
+  assert(Result.size() != unsigned(Tok.getLength()) &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  return Result;
+}
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Preprocessor::getSpelling(const Token &Tok) const {
+  return getSpelling(Tok, SourceMgr, Features);
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string.  The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long.  The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy).  The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Preprocessor::getSpelling(const Token &Tok,
+                                   const char *&Buffer) const {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+
+  // If this token is an identifier, just return the string from the identifier
+  // table, which is very quick.
+  if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    Buffer = II->getNameStart();
+    return II->getLength();
+  }
+
+  // Otherwise, compute the start of the token in the input lexer buffer.
+  const char *TokStart = 0;
+
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+
+  if (TokStart == 0)
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning()) {
+    Buffer = TokStart;
+    return Tok.getLength();
+  }
+
+  // Otherwise, hard case, relex the characters into the string.
+  char *OutBuf = const_cast<char*>(Buffer);
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    Ptr += CharSize;
+  }
+  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+
+  return OutBuf-Buffer;
+}
+
+/// CreateString - Plop the specified string into a scratch buffer and return a
+/// location for it.  If specified, the source location provides a source
+/// location for the token.
+void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
+                                SourceLocation InstantiationLoc) {
+  Tok.setLength(Len);
+
+  const char *DestPtr;
+  SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
+
+  if (InstantiationLoc.isValid())
+    Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc,
+                                           InstantiationLoc, Len);
+  Tok.setLocation(Loc);
+
+  // If this is a literal token, set the pointer data.
+  if (Tok.isLiteral())
+    Tok.setLiteralData(DestPtr);
+}
+
+
+/// AdvanceToTokenCharacter - Given a location that specifies the start of a
+/// token, return a new location that specifies a character within the token.
+SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart,
+                                                     unsigned CharNo) {
+  // Figure out how many physical characters away the specified instantiation
+  // character is.  This needs to take into consideration newlines and
+  // trigraphs.
+  const char *TokPtr = SourceMgr.getCharacterData(TokStart);
+
+  // If they request the first char of the token, we're trivially done.
+  if (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))
+    return TokStart;
+
+  unsigned PhysOffset = 0;
+
+  // The usual case is that tokens don't contain anything interesting.  Skip
+  // over the uninteresting characters.  If a token only consists of simple
+  // chars, this method is extremely fast.
+  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
+    if (CharNo == 0)
+      return TokStart.getFileLocWithOffset(PhysOffset);
+    ++TokPtr, --CharNo, ++PhysOffset;
+  }
+
+  // If we have a character that may be a trigraph or escaped newline, use a
+  // lexer to parse it correctly.
+  for (; CharNo; --CharNo) {
+    unsigned Size;
+    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+    TokPtr += Size;
+    PhysOffset += Size;
+  }
+
+  // Final detail: if we end up on an escaped newline, we want to return the
+  // location of the actual byte of the token.  For example foo\<newline>bar
+  // advanced by 3 should return the location of b, not of \\.  One compounding
+  // detail of this is that the escape may be made by a trigraph.
+  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
+    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+
+  return TokStart.getFileLocWithOffset(PhysOffset);
+}
+
+SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc, 
+                                                 unsigned Offset) {
+  if (Loc.isInvalid() || !Loc.isFileID())
+    return SourceLocation();
+
+  unsigned Len = Lexer::MeasureTokenLength(Loc, getSourceManager(), Features);
+  if (Len > Offset)
+    Len = Len - Offset;
+  else
+    return Loc;
+  
+  return AdvanceToTokenCharacter(Loc, Len);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Preprocessor Initialization Methods
+//===----------------------------------------------------------------------===//
+
+
+/// EnterMainSourceFile - Enter the specified FileID as the main source file,
+/// which implicitly adds the builtin defines etc.
+void Preprocessor::EnterMainSourceFile() {
+  // We do not allow the preprocessor to reenter the main file.  Doing so will
+  // cause FileID's to accumulate information from both runs (e.g. #line
+  // information) and predefined macros aren't guaranteed to be set properly.
+  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
+  FileID MainFileID = SourceMgr.getMainFileID();
+
+  // Enter the main file source buffer.
+  std::string ErrorStr;
+  bool Res = EnterSourceFile(MainFileID, 0, ErrorStr);
+  assert(!Res && "Entering main file should not fail!");
+
+  // Tell the header info that the main file was entered.  If the file is later
+  // #imported, it won't be re-entered.
+  if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
+    HeaderInfo.IncrementIncludeCount(FE);
+
+  // Preprocess Predefines to populate the initial preprocessor state.
+  llvm::MemoryBuffer *SB =
+    llvm::MemoryBuffer::getMemBufferCopy(Predefines.data(),
+                                         Predefines.data() + Predefines.size(),
+                                         "<built-in>");
+  assert(SB && "Cannot fail to create predefined source buffer");
+  FileID FID = SourceMgr.createFileIDForMemBuffer(SB);
+  assert(!FID.isInvalid() && "Could not create FileID for predefines?");
+
+  // Start parsing the predefines.
+  Res = EnterSourceFile(FID, 0, ErrorStr);
+  assert(!Res && "Entering predefines should not fail!");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Lexer Event Handling.
+//===----------------------------------------------------------------------===//
+
+/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
+/// identifier information for the token and install it into the token.
+IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
+                                                   const char *BufPtr) const {
+  assert(Identifier.is(tok::identifier) && "Not an identifier!");
+  assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
+
+  // Look up this token, see if it is a macro, or if it is a language keyword.
+  IdentifierInfo *II;
+  if (BufPtr && !Identifier.needsCleaning()) {
+    // No cleaning needed, just use the characters from the lexed buffer.
+    II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength()));
+  } else {
+    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
+    llvm::SmallVector<char, 64> IdentifierBuffer;
+    IdentifierBuffer.resize(Identifier.getLength());
+    const char *TmpBuf = &IdentifierBuffer[0];
+    unsigned Size = getSpelling(Identifier, TmpBuf);
+    II = getIdentifierInfo(llvm::StringRef(TmpBuf, Size));
+  }
+  Identifier.setIdentifierInfo(II);
+  return II;
+}
+
+
+/// HandleIdentifier - This callback is invoked when the lexer reads an
+/// identifier.  This callback looks up the identifier in the map and/or
+/// potentially macro expands it or turns it into a named token (like 'for').
+///
+/// Note that callers of this method are guarded by checking the
+/// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
+/// IdentifierInfo methods that compute these properties will need to change to
+/// match.
+void Preprocessor::HandleIdentifier(Token &Identifier) {
+  assert(Identifier.getIdentifierInfo() &&
+         "Can't handle identifiers without identifier info!");
+
+  IdentifierInfo &II = *Identifier.getIdentifierInfo();
+
+  // If this identifier was poisoned, and if it was not produced from a macro
+  // expansion, emit an error.
+  if (II.isPoisoned() && CurPPLexer) {
+    if (&II != Ident__VA_ARGS__)   // We warn about __VA_ARGS__ with poisoning.
+      Diag(Identifier, diag::err_pp_used_poisoned_id);
+    else
+      Diag(Identifier, diag::ext_pp_bad_vaargs_use);
+  }
+
+  // If this is a macro to be expanded, do it.
+  if (MacroInfo *MI = getMacroInfo(&II)) {
+    if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) {
+      if (MI->isEnabled()) {
+        if (!HandleMacroExpandedIdentifier(Identifier, MI))
+          return;
+      } else {
+        // C99 6.10.3.4p2 says that a disabled macro may never again be
+        // expanded, even if it's in a context where it could be expanded in the
+        // future.
+        Identifier.setFlag(Token::DisableExpand);
+      }
+    }
+  }
+
+  // C++ 2.11p2: If this is an alternative representation of a C++ operator,
+  // then we act as if it is the actual operator and not the textual
+  // representation of it.
+  if (II.isCPlusPlusOperatorKeyword())
+    Identifier.setIdentifierInfo(0);
+
+  // If this is an extension token, diagnose its use.
+  // We avoid diagnosing tokens that originate from macro definitions.
+  // FIXME: This warning is disabled in cases where it shouldn't be,
+  // like "#define TY typeof", "TY(1) x".
+  if (II.isExtensionToken() && !DisableMacroExpansion)
+    Diag(Identifier, diag::ext_token_used);
+}
+
+void Preprocessor::AddCommentHandler(CommentHandler *Handler) {
+  assert(Handler && "NULL comment handler");
+  assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
+         CommentHandlers.end() && "Comment handler already registered");
+  CommentHandlers.push_back(Handler);
+}
+
+void Preprocessor::RemoveCommentHandler(CommentHandler *Handler) {
+  std::vector<CommentHandler *>::iterator Pos
+  = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
+  assert(Pos != CommentHandlers.end() && "Comment handler not registered");
+  CommentHandlers.erase(Pos);
+}
+
+bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
+  bool AnyPendingTokens = false;
+  for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
+       HEnd = CommentHandlers.end();
+       H != HEnd; ++H) {
+    if ((*H)->HandleComment(*this, Comment))
+      AnyPendingTokens = true;
+  }
+  if (!AnyPendingTokens || getCommentRetentionState())
+    return false;
+  Lex(result);
+  return true;
+}
+
+CommentHandler::~CommentHandler() { }

diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
new file mode 100644
index 0000000..e005c49
--- /dev/null
+++ b/lib/Lex/PreprocessorLexer.cpp

@@ -0,0 +1,45 @@
+//===--- PreprocessorLexer.cpp - C Language Family Lexer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the PreprocessorLexer and Token interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "clang/Basic/SourceManager.h"
+using namespace clang;
+
+/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
+/// (potentially) macro expand the filename.
+void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
+  assert(ParsingPreprocessorDirective &&
+         ParsingFilename == false &&
+         "Must be in a preprocessing directive!");
+
+  // We are now parsing a filename!
+  ParsingFilename = true;
+
+  // Lex the filename.
+  IndirectLex(FilenameTok);
+
+  // We should have obtained the filename now.
+  ParsingFilename = false;
+
+  // No filename?
+  if (FilenameTok.is(tok::eom))
+    PP->Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+}
+
+/// getFileEntry - Return the FileEntry corresponding to this FileID.  Like
+/// getFileID(), this only works for lexers with attached preprocessors.
+const FileEntry *PreprocessorLexer::getFileEntry() const {
+  return PP->getSourceManager().getFileEntryForID(getFileID());
+}

diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
new file mode 100644
index 0000000..0e98c17
--- /dev/null
+++ b/lib/Lex/ScratchBuffer.cpp

@@ -0,0 +1,73 @@
+//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the ScratchBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/ScratchBuffer.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cstring>
+using namespace clang;
+
+// ScratchBufSize - The size of each chunk of scratch memory.  Slightly less
+//than a page, almost certainly enough for anything. :)
+static const unsigned ScratchBufSize = 4060;
+
+ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
+  // Set BytesUsed so that the first call to getToken will require an alloc.
+  BytesUsed = ScratchBufSize;
+}
+
+/// getToken - Splat the specified text into a temporary MemoryBuffer and
+/// return a SourceLocation that refers to the token.  This is just like the
+/// method below, but returns a location that indicates the physloc of the
+/// token.
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+                                       const char *&DestPtr) {
+  if (BytesUsed+Len+2 > ScratchBufSize)
+    AllocScratchBuffer(Len+2);
+
+  // Prefix the token with a \n, so that it looks like it is the first thing on
+  // its own virtual line in caret diagnostics.
+  CurBuffer[BytesUsed++] = '\n';
+
+  // Return a pointer to the character data.
+  DestPtr = CurBuffer+BytesUsed;
+
+  // Copy the token data into the buffer.
+  memcpy(CurBuffer+BytesUsed, Buf, Len);
+
+  // Remember that we used these bytes.
+  BytesUsed += Len+1;
+
+  // Add a NUL terminator to the token.  This keeps the tokens separated, in
+  // case they get relexed, and puts them on their own virtual lines in case a
+  // diagnostic points to one.
+  CurBuffer[BytesUsed-1] = '\0';
+
+  return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len-1);
+}
+
+void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
+  // Only pay attention to the requested length if it is larger than our default
+  // page size.  If it is, we allocate an entire chunk for it.  This is to
+  // support gigantic tokens, which almost certainly won't happen. :)
+  if (RequestLen < ScratchBufSize)
+    RequestLen = ScratchBufSize;
+
+  llvm::MemoryBuffer *Buf =
+    llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>");
+  FileID FID = SourceMgr.createFileIDForMemBuffer(Buf);
+  BufferStartLoc = SourceMgr.getLocForStartOfFile(FID);
+  CurBuffer = const_cast<char*>(Buf->getBufferStart());
+  BytesUsed = 1;
+  CurBuffer[0] = '0';  // Start out with a \0 for cleanliness.
+}

diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
new file mode 100644
index 0000000..0795164
--- /dev/null
+++ b/lib/Lex/TokenConcatenation.cpp

@@ -0,0 +1,221 @@
+//===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenConcatenation class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenConcatenation.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+
+/// StartsWithL - Return true if the spelling of this token starts with 'L'.
+bool TokenConcatenation::StartsWithL(const Token &Tok) const {
+  if (!Tok.needsCleaning()) {
+    SourceManager &SM = PP.getSourceManager();
+    return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+  }
+
+  if (Tok.getLength() < 256) {
+    char Buffer[256];
+    const char *TokPtr = Buffer;
+    PP.getSpelling(Tok, TokPtr);
+    return TokPtr[0] == 'L';
+  }
+
+  return PP.getSpelling(Tok)[0] == 'L';
+}
+
+/// IsIdentifierL - Return true if the spelling of this token is literally
+/// 'L'.
+bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
+  if (!Tok.needsCleaning()) {
+    if (Tok.getLength() != 1)
+      return false;
+    SourceManager &SM = PP.getSourceManager();
+    return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
+  }
+
+  if (Tok.getLength() < 256) {
+    char Buffer[256];
+    const char *TokPtr = Buffer;
+    if (PP.getSpelling(Tok, TokPtr) != 1)
+      return false;
+    return TokPtr[0] == 'L';
+  }
+
+  return PP.getSpelling(Tok) == "L";
+}
+
+TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
+  memset(TokenInfo, 0, sizeof(TokenInfo));
+
+  // These tokens have custom code in AvoidConcat.
+  TokenInfo[tok::identifier      ] |= aci_custom;
+  TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
+  TokenInfo[tok::period          ] |= aci_custom_firstchar;
+  TokenInfo[tok::amp             ] |= aci_custom_firstchar;
+  TokenInfo[tok::plus            ] |= aci_custom_firstchar;
+  TokenInfo[tok::minus           ] |= aci_custom_firstchar;
+  TokenInfo[tok::slash           ] |= aci_custom_firstchar;
+  TokenInfo[tok::less            ] |= aci_custom_firstchar;
+  TokenInfo[tok::greater         ] |= aci_custom_firstchar;
+  TokenInfo[tok::pipe            ] |= aci_custom_firstchar;
+  TokenInfo[tok::percent         ] |= aci_custom_firstchar;
+  TokenInfo[tok::colon           ] |= aci_custom_firstchar;
+  TokenInfo[tok::hash            ] |= aci_custom_firstchar;
+  TokenInfo[tok::arrow           ] |= aci_custom_firstchar;
+
+  // These tokens change behavior if followed by an '='.
+  TokenInfo[tok::amp         ] |= aci_avoid_equal;           // &=
+  TokenInfo[tok::plus        ] |= aci_avoid_equal;           // +=
+  TokenInfo[tok::minus       ] |= aci_avoid_equal;           // -=
+  TokenInfo[tok::slash       ] |= aci_avoid_equal;           // /=
+  TokenInfo[tok::less        ] |= aci_avoid_equal;           // <=
+  TokenInfo[tok::greater     ] |= aci_avoid_equal;           // >=
+  TokenInfo[tok::pipe        ] |= aci_avoid_equal;           // |=
+  TokenInfo[tok::percent     ] |= aci_avoid_equal;           // %=
+  TokenInfo[tok::star        ] |= aci_avoid_equal;           // *=
+  TokenInfo[tok::exclaim     ] |= aci_avoid_equal;           // !=
+  TokenInfo[tok::lessless    ] |= aci_avoid_equal;           // <<=
+  TokenInfo[tok::greaterequal] |= aci_avoid_equal;           // >>=
+  TokenInfo[tok::caret       ] |= aci_avoid_equal;           // ^=
+  TokenInfo[tok::equal       ] |= aci_avoid_equal;           // ==
+}
+
+/// GetFirstChar - Get the first character of the token \arg Tok,
+/// avoiding calls to getSpelling where possible.
+static char GetFirstChar(Preprocessor &PP, const Token &Tok) {
+  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    // Avoid spelling identifiers, the most common form of token.
+    return II->getNameStart()[0];
+  } else if (!Tok.needsCleaning()) {
+    if (Tok.isLiteral() && Tok.getLiteralData()) {
+      return *Tok.getLiteralData();
+    } else {
+      SourceManager &SM = PP.getSourceManager();
+      return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
+    }
+  } else if (Tok.getLength() < 256) {
+    char Buffer[256];
+    const char *TokPtr = Buffer;
+    PP.getSpelling(Tok, TokPtr);
+    return TokPtr[0];
+  } else {
+    return PP.getSpelling(Tok)[0];
+  }
+}
+
+/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
+/// the two individual tokens to be lexed as a single token, return true
+/// (which causes a space to be printed between them).  This allows the output
+/// of -E mode to be lexed to the same token stream as lexing the input
+/// directly would.
+///
+/// This code must conservatively return true if it doesn't want to be 100%
+/// accurate.  This will cause the output to include extra space characters,
+/// but the resulting output won't have incorrect concatenations going on.
+/// Examples include "..", which we print with a space between, because we
+/// don't want to track enough to tell "x.." from "...".
+bool TokenConcatenation::AvoidConcat(const Token &PrevTok,
+                                     const Token &Tok) const {
+  // First, check to see if the tokens were directly adjacent in the original
+  // source.  If they were, it must be okay to stick them together: if there
+  // were an issue, the tokens would have been lexed differently.
+  if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() &&
+      PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) ==
+        Tok.getLocation())
+    return false;
+
+  tok::TokenKind PrevKind = PrevTok.getKind();
+  if (PrevTok.getIdentifierInfo())  // Language keyword or named operator.
+    PrevKind = tok::identifier;
+
+  // Look up information on when we should avoid concatenation with prevtok.
+  unsigned ConcatInfo = TokenInfo[PrevKind];
+
+  // If prevtok never causes a problem for anything after it, return quickly.
+  if (ConcatInfo == 0) return false;
+
+  if (ConcatInfo & aci_avoid_equal) {
+    // If the next token is '=' or '==', avoid concatenation.
+    if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
+      return true;
+    ConcatInfo &= ~aci_avoid_equal;
+  }
+
+  if (ConcatInfo == 0) return false;
+
+  // Basic algorithm: we look at the first character of the second token, and
+  // determine whether it, if appended to the first token, would form (or
+  // would contribute) to a larger token if concatenated.
+  char FirstChar = 0;
+  if (ConcatInfo & aci_custom) {
+    // If the token does not need to know the first character, don't get it.
+  } else {
+    FirstChar = GetFirstChar(PP, Tok);
+  }
+
+  switch (PrevKind) {
+  default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
+  case tok::identifier:   // id+id or id+number or id+L"foo".
+    // id+'.'... will not append.
+    if (Tok.is(tok::numeric_constant))
+      return GetFirstChar(PP, Tok) != '.';
+
+    if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
+     Tok.is(tok::wide_char_literal)*/)
+      return true;
+
+    // If this isn't identifier + string, we're done.
+    if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
+      return false;
+
+    // FIXME: need a wide_char_constant!
+
+    // If the string was a wide string L"foo" or wide char L'f', it would
+    // concat with the previous identifier into fooL"bar".  Avoid this.
+    if (StartsWithL(Tok))
+      return true;
+
+    // Otherwise, this is a narrow character or string.  If the *identifier*
+    // is a literal 'L', avoid pasting L "foo" -> L"foo".
+    return IsIdentifierL(PrevTok);
+  case tok::numeric_constant:
+    return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
+    FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+  case tok::period:          // ..., .*, .1234
+    return FirstChar == '.' || isdigit(FirstChar) ||
+    (PP.getLangOptions().CPlusPlus && FirstChar == '*');
+  case tok::amp:             // &&
+    return FirstChar == '&';
+  case tok::plus:            // ++
+    return FirstChar == '+';
+  case tok::minus:           // --, ->, ->*
+    return FirstChar == '-' || FirstChar == '>';
+  case tok::slash:           //, /*, //
+    return FirstChar == '*' || FirstChar == '/';
+  case tok::less:            // <<, <<=, <:, <%
+    return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
+  case tok::greater:         // >>, >>=
+    return FirstChar == '>';
+  case tok::pipe:            // ||
+    return FirstChar == '|';
+  case tok::percent:         // %>, %:
+    return FirstChar == '>' || FirstChar == ':';
+  case tok::colon:           // ::, :>
+    return FirstChar == '>' ||
+    (PP.getLangOptions().CPlusPlus && FirstChar == ':');
+  case tok::hash:            // ##, #@, %:%:
+    return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
+  case tok::arrow:           // ->*
+    return PP.getLangOptions().CPlusPlus && FirstChar == '*';
+  }
+}

diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
new file mode 100644
index 0000000..5d95eb3
--- /dev/null
+++ b/lib/Lex/TokenLexer.cpp

@@ -0,0 +1,541 @@
+//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TokenLexer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/TokenLexer.h"
+#include "MacroArgs.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace clang;
+
+
+/// Create a TokenLexer for the specified macro with the specified actual
+/// arguments.  Note that this ctor takes ownership of the ActualArgs pointer.
+void TokenLexer::Init(Token &Tok, SourceLocation ILEnd, MacroArgs *Actuals) {
+  // If the client is reusing a TokenLexer, make sure to free any memory
+  // associated with it.
+  destroy();
+
+  Macro = PP.getMacroInfo(Tok.getIdentifierInfo());
+  ActualArgs = Actuals;
+  CurToken = 0;
+
+  InstantiateLocStart = Tok.getLocation();
+  InstantiateLocEnd = ILEnd;
+  AtStartOfLine = Tok.isAtStartOfLine();
+  HasLeadingSpace = Tok.hasLeadingSpace();
+  Tokens = &*Macro->tokens_begin();
+  OwnsTokens = false;
+  DisableMacroExpansion = false;
+  NumTokens = Macro->tokens_end()-Macro->tokens_begin();
+
+  // If this is a function-like macro, expand the arguments and change
+  // Tokens to point to the expanded tokens.
+  if (Macro->isFunctionLike() && Macro->getNumArgs())
+    ExpandFunctionArguments();
+
+  // Mark the macro as currently disabled, so that it is not recursively
+  // expanded.  The macro must be disabled only after argument pre-expansion of
+  // function-like macro arguments occurs.
+  Macro->DisableMacro();
+}
+
+
+
+/// Create a TokenLexer for the specified token stream.  This does not
+/// take ownership of the specified token vector.
+void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
+                      bool disableMacroExpansion, bool ownsTokens) {
+  // If the client is reusing a TokenLexer, make sure to free any memory
+  // associated with it.
+  destroy();
+
+  Macro = 0;
+  ActualArgs = 0;
+  Tokens = TokArray;
+  OwnsTokens = ownsTokens;
+  DisableMacroExpansion = disableMacroExpansion;
+  NumTokens = NumToks;
+  CurToken = 0;
+  InstantiateLocStart = InstantiateLocEnd = SourceLocation();
+  AtStartOfLine = false;
+  HasLeadingSpace = false;
+
+  // Set HasLeadingSpace/AtStartOfLine so that the first token will be
+  // returned unmodified.
+  if (NumToks != 0) {
+    AtStartOfLine   = TokArray[0].isAtStartOfLine();
+    HasLeadingSpace = TokArray[0].hasLeadingSpace();
+  }
+}
+
+
+void TokenLexer::destroy() {
+  // If this was a function-like macro that actually uses its arguments, delete
+  // the expanded tokens.
+  if (OwnsTokens) {
+    delete [] Tokens;
+    Tokens = 0;
+    OwnsTokens = false;
+  }
+
+  // TokenLexer owns its formal arguments.
+  if (ActualArgs) ActualArgs->destroy(PP);
+}
+
+/// Expand the arguments of a function-like macro so that we can quickly
+/// return preexpanded tokens from Tokens.
+void TokenLexer::ExpandFunctionArguments() {
+  llvm::SmallVector<Token, 128> ResultToks;
+
+  // Loop through 'Tokens', expanding them into ResultToks.  Keep
+  // track of whether we change anything.  If not, no need to keep them.  If so,
+  // we install the newly expanded sequence as the new 'Tokens' list.
+  bool MadeChange = false;
+
+  // NextTokGetsSpace - When this is true, the next token appended to the
+  // output list will get a leading space, regardless of whether it had one to
+  // begin with or not.  This is used for placemarker support.
+  bool NextTokGetsSpace = false;
+
+  for (unsigned i = 0, e = NumTokens; i != e; ++i) {
+    // If we found the stringify operator, get the argument stringified.  The
+    // preprocessor already verified that the following token is a macro name
+    // when the #define was parsed.
+    const Token &CurTok = Tokens[i];
+    if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) {
+      int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
+      assert(ArgNo != -1 && "Token following # is not an argument?");
+
+      Token Res;
+      if (CurTok.is(tok::hash))  // Stringify
+        Res = ActualArgs->getStringifiedArgument(ArgNo, PP);
+      else {
+        // 'charify': don't bother caching these.
+        Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
+                                           PP, true);
+      }
+
+      // The stringified/charified string leading space flag gets set to match
+      // the #/#@ operator.
+      if (CurTok.hasLeadingSpace() || NextTokGetsSpace)
+        Res.setFlag(Token::LeadingSpace);
+
+      ResultToks.push_back(Res);
+      MadeChange = true;
+      ++i;  // Skip arg name.
+      NextTokGetsSpace = false;
+      continue;
+    }
+
+    // Otherwise, if this is not an argument token, just add the token to the
+    // output buffer.
+    IdentifierInfo *II = CurTok.getIdentifierInfo();
+    int ArgNo = II ? Macro->getArgumentNum(II) : -1;
+    if (ArgNo == -1) {
+      // This isn't an argument, just add it.
+      ResultToks.push_back(CurTok);
+
+      if (NextTokGetsSpace) {
+        ResultToks.back().setFlag(Token::LeadingSpace);
+        NextTokGetsSpace = false;
+      }
+      continue;
+    }
+
+    // An argument is expanded somehow, the result is different than the
+    // input.
+    MadeChange = true;
+
+    // Otherwise, this is a use of the argument.  Find out if there is a paste
+    // (##) operator before or after the argument.
+    bool PasteBefore =
+      !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
+    bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
+
+    // If it is not the LHS/RHS of a ## operator, we must pre-expand the
+    // argument and substitute the expanded tokens into the result.  This is
+    // C99 6.10.3.1p1.
+    if (!PasteBefore && !PasteAfter) {
+      const Token *ResultArgToks;
+
+      // Only preexpand the argument if it could possibly need it.  This
+      // avoids some work in common cases.
+      const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
+      if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
+        ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0];
+      else
+        ResultArgToks = ArgTok;  // Use non-preexpanded tokens.
+
+      // If the arg token expanded into anything, append it.
+      if (ResultArgToks->isNot(tok::eof)) {
+        unsigned FirstResult = ResultToks.size();
+        unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
+        ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
+
+        // If any tokens were substituted from the argument, the whitespace
+        // before the first token should match the whitespace of the arg
+        // identifier.
+        ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
+                                             CurTok.hasLeadingSpace() ||
+                                             NextTokGetsSpace);
+        NextTokGetsSpace = false;
+      } else {
+        // If this is an empty argument, and if there was whitespace before the
+        // formal token, make sure the next token gets whitespace before it.
+        NextTokGetsSpace = CurTok.hasLeadingSpace();
+      }
+      continue;
+    }
+
+    // Okay, we have a token that is either the LHS or RHS of a paste (##)
+    // argument.  It gets substituted as its non-pre-expanded tokens.
+    const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
+    unsigned NumToks = MacroArgs::getArgLength(ArgToks);
+    if (NumToks) {  // Not an empty argument?
+      // If this is the GNU ", ## __VA_ARG__" extension, and we just learned
+      // that __VA_ARG__ expands to multiple tokens, avoid a pasting error when
+      // the expander trys to paste ',' with the first token of the __VA_ARG__
+      // expansion.
+      if (PasteBefore && ResultToks.size() >= 2 &&
+          ResultToks[ResultToks.size()-2].is(tok::comma) &&
+          (unsigned)ArgNo == Macro->getNumArgs()-1 &&
+          Macro->isVariadic()) {
+        // Remove the paste operator, report use of the extension.
+        PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+        ResultToks.pop_back();
+      }
+
+      ResultToks.append(ArgToks, ArgToks+NumToks);
+
+      // If this token (the macro argument) was supposed to get leading
+      // whitespace, transfer this information onto the first token of the
+      // expansion.
+      //
+      // Do not do this if the paste operator occurs before the macro argument,
+      // as in "A ## MACROARG".  In valid code, the first token will get
+      // smooshed onto the preceding one anyway (forming AMACROARG).  In
+      // assembler-with-cpp mode, invalid pastes are allowed through: in this
+      // case, we do not want the extra whitespace to be added.  For example,
+      // we want ". ## foo" -> ".foo" not ". foo".
+      if ((CurTok.hasLeadingSpace() || NextTokGetsSpace) &&
+          !PasteBefore)
+        ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace);
+
+      NextTokGetsSpace = false;
+      continue;
+    }
+
+    // If an empty argument is on the LHS or RHS of a paste, the standard (C99
+    // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur.  We
+    // implement this by eating ## operators when a LHS or RHS expands to
+    // empty.
+    NextTokGetsSpace |= CurTok.hasLeadingSpace();
+    if (PasteAfter) {
+      // Discard the argument token and skip (don't copy to the expansion
+      // buffer) the paste operator after it.
+      NextTokGetsSpace |= Tokens[i+1].hasLeadingSpace();
+      ++i;
+      continue;
+    }
+
+    // If this is on the RHS of a paste operator, we've already copied the
+    // paste operator to the ResultToks list.  Remove it.
+    assert(PasteBefore && ResultToks.back().is(tok::hashhash));
+    NextTokGetsSpace |= ResultToks.back().hasLeadingSpace();
+    ResultToks.pop_back();
+
+    // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
+    // and if the macro had at least one real argument, and if the token before
+    // the ## was a comma, remove the comma.
+    if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__
+        ActualArgs->isVarargsElidedUse() &&       // Argument elided.
+        !ResultToks.empty() && ResultToks.back().is(tok::comma)) {
+      // Never add a space, even if the comma, ##, or arg had a space.
+      NextTokGetsSpace = false;
+      // Remove the paste operator, report use of the extension.
+      PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
+      ResultToks.pop_back();
+    }
+    continue;
+  }
+
+  // If anything changed, install this as the new Tokens list.
+  if (MadeChange) {
+    assert(!OwnsTokens && "This would leak if we already own the token list");
+    // This is deleted in the dtor.
+    NumTokens = ResultToks.size();
+    llvm::BumpPtrAllocator &Alloc = PP.getPreprocessorAllocator();
+    Token *Res =
+      static_cast<Token *>(Alloc.Allocate(sizeof(Token)*ResultToks.size(),
+                                          llvm::alignof<Token>()));
+    if (NumTokens)
+      memcpy(Res, &ResultToks[0], NumTokens*sizeof(Token));
+    Tokens = Res;
+
+    // The preprocessor bump pointer owns these tokens, not us.
+    OwnsTokens = false;
+  }
+}
+
+/// Lex - Lex and return a token from this macro stream.
+///
+void TokenLexer::Lex(Token &Tok) {
+  // Lexing off the end of the macro, pop this macro off the expansion stack.
+  if (isAtEnd()) {
+    // If this is a macro (not a token stream), mark the macro enabled now
+    // that it is no longer being expanded.
+    if (Macro) Macro->EnableMacro();
+
+    // Pop this context off the preprocessors lexer stack and get the next
+    // token.  This will delete "this" so remember the PP instance var.
+    Preprocessor &PPCache = PP;
+    if (PP.HandleEndOfTokenLexer(Tok))
+      return;
+
+    // HandleEndOfTokenLexer may not return a token.  If it doesn't, lex
+    // whatever is next.
+    return PPCache.Lex(Tok);
+  }
+
+  // If this is the first token of the expanded result, we inherit spacing
+  // properties later.
+  bool isFirstToken = CurToken == 0;
+
+  // Get the next token to return.
+  Tok = Tokens[CurToken++];
+
+  bool TokenIsFromPaste = false;
+
+  // If this token is followed by a token paste (##) operator, paste the tokens!
+  if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)) {
+    // When handling the microsoft /##/ extension, the final token is
+    // returned by PasteTokens, not the pasted token.
+    if (PasteTokens(Tok))
+      return;
+    
+    TokenIsFromPaste = true;
+  }
+
+  // The token's current location indicate where the token was lexed from.  We
+  // need this information to compute the spelling of the token, but any
+  // diagnostics for the expanded token should appear as if they came from
+  // InstantiationLoc.  Pull this information together into a new SourceLocation
+  // that captures all of this.
+  if (InstantiateLocStart.isValid()) {   // Don't do this for token streams.
+    SourceManager &SM = PP.getSourceManager();
+    Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),
+                                              InstantiateLocStart,
+                                              InstantiateLocEnd,
+                                              Tok.getLength()));
+  }
+
+  // If this is the first token, set the lexical properties of the token to
+  // match the lexical properties of the macro identifier.
+  if (isFirstToken) {
+    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+  }
+
+  // Handle recursive expansion!
+  if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != 0) {
+    // Change the kind of this identifier to the appropriate token kind, e.g.
+    // turning "for" into a keyword.
+    IdentifierInfo *II = Tok.getIdentifierInfo();
+    Tok.setKind(II->getTokenID());
+
+    // If this identifier was poisoned and from a paste, emit an error.  This
+    // won't be handled by Preprocessor::HandleIdentifier because this is coming
+    // from a macro expansion.
+    if (II->isPoisoned() && TokenIsFromPaste) {
+      // We warn about __VA_ARGS__ with poisoning.
+      if (II->isStr("__VA_ARGS__"))
+        PP.Diag(Tok, diag::ext_pp_bad_vaargs_use);
+      else
+        PP.Diag(Tok, diag::err_pp_used_poisoned_id);
+    }
+
+    if (!DisableMacroExpansion && II->isHandleIdentifierCase())
+      PP.HandleIdentifier(Tok);
+  }
+
+  // Otherwise, return a normal token.
+}
+
+/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
+/// operator.  Read the ## and RHS, and paste the LHS/RHS together.  If there
+/// are more ## after it, chomp them iteratively.  Return the result as Tok.
+/// If this returns true, the caller should immediately return the token.
+bool TokenLexer::PasteTokens(Token &Tok) {
+  llvm::SmallVector<char, 128> Buffer;
+  const char *ResultTokStrPtr = 0;
+  do {
+    // Consume the ## operator.
+    SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
+    ++CurToken;
+    assert(!isAtEnd() && "No token on the RHS of a paste operator!");
+
+    // Get the RHS token.
+    const Token &RHS = Tokens[CurToken];
+
+    // Allocate space for the result token.  This is guaranteed to be enough for
+    // the two tokens.
+    Buffer.resize(Tok.getLength() + RHS.getLength());
+
+    // Get the spelling of the LHS token in Buffer.
+    const char *BufPtr = &Buffer[0];
+    unsigned LHSLen = PP.getSpelling(Tok, BufPtr);
+    if (BufPtr != &Buffer[0])   // Really, we want the chars in Buffer!
+      memcpy(&Buffer[0], BufPtr, LHSLen);
+
+    BufPtr = &Buffer[LHSLen];
+    unsigned RHSLen = PP.getSpelling(RHS, BufPtr);
+    if (BufPtr != &Buffer[LHSLen])   // Really, we want the chars in Buffer!
+      memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
+
+    // Trim excess space.
+    Buffer.resize(LHSLen+RHSLen);
+
+    // Plop the pasted result (including the trailing newline and null) into a
+    // scratch buffer where we can lex it.
+    Token ResultTokTmp;
+    ResultTokTmp.startToken();
+
+    // Claim that the tmp token is a string_literal so that we can get the
+    // character pointer back from CreateString in getLiteralData().
+    ResultTokTmp.setKind(tok::string_literal);
+    PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
+    SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
+    ResultTokStrPtr = ResultTokTmp.getLiteralData();
+
+    // Lex the resultant pasted token into Result.
+    Token Result;
+
+    if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) {
+      // Common paste case: identifier+identifier = identifier.  Avoid creating
+      // a lexer and other overhead.
+      PP.IncrementPasteCounter(true);
+      Result.startToken();
+      Result.setKind(tok::identifier);
+      Result.setLocation(ResultTokLoc);
+      Result.setLength(LHSLen+RHSLen);
+    } else {
+      PP.IncrementPasteCounter(false);
+
+      assert(ResultTokLoc.isFileID() &&
+             "Should be a raw location into scratch buffer");
+      SourceManager &SourceMgr = PP.getSourceManager();
+      FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
+
+      const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
+
+      // Make a lexer to lex this string from.  Lex just this one token.
+      // Make a lexer object so that we lex and expand the paste result.
+      Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
+               PP.getLangOptions(), ScratchBufStart,
+               ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
+
+      // Lex a token in raw mode.  This way it won't look up identifiers
+      // automatically, lexing off the end will return an eof token, and
+      // warnings are disabled.  This returns true if the result token is the
+      // entire buffer.
+      bool isInvalid = !TL.LexFromRawLexer(Result);
+
+      // If we got an EOF token, we didn't form even ONE token.  For example, we
+      // did "/ ## /" to get "//".
+      isInvalid |= Result.is(tok::eof);
+
+      // If pasting the two tokens didn't form a full new token, this is an
+      // error.  This occurs with "x ## +"  and other stuff.  Return with Tok
+      // unmodified and with RHS as the next token to lex.
+      if (isInvalid) {
+        // Test for the Microsoft extension of /##/ turning into // here on the
+        // error path.
+        if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) &&
+            RHS.is(tok::slash)) {
+          HandleMicrosoftCommentPaste(Tok);
+          return true;
+        }
+
+        // Do not emit the warning when preprocessing assembler code.
+        if (!PP.getLangOptions().AsmPreprocessor) {
+          // Explicitly convert the token location to have proper instantiation
+          // information so that the user knows where it came from.
+          SourceManager &SM = PP.getSourceManager();
+          SourceLocation Loc =
+            SM.createInstantiationLoc(PasteOpLoc, InstantiateLocStart,
+                                      InstantiateLocEnd, 2);
+          PP.Diag(Loc, diag::err_pp_bad_paste)
+            << std::string(Buffer.begin(), Buffer.end());
+        }
+
+        // Do not consume the RHS.
+        --CurToken;
+      }
+
+      // Turn ## into 'unknown' to avoid # ## # from looking like a paste
+      // operator.
+      if (Result.is(tok::hashhash))
+        Result.setKind(tok::unknown);
+    }
+
+    // Transfer properties of the LHS over the the Result.
+    Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine());
+    Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace());
+
+    // Finally, replace LHS with the result, consume the RHS, and iterate.
+    ++CurToken;
+    Tok = Result;
+  } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
+
+  // Now that we got the result token, it will be subject to expansion.  Since
+  // token pasting re-lexes the result token in raw mode, identifier information
+  // isn't looked up.  As such, if the result is an identifier, look up id info.
+  if (Tok.is(tok::identifier)) {
+    // Look up the identifier info for the token.  We disabled identifier lookup
+    // by saying we're skipping contents, so we need to do this manually.
+    IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr);
+    Tok.setIdentifierInfo(II);
+  }
+  return false;
+}
+
+/// isNextTokenLParen - If the next token lexed will pop this macro off the
+/// expansion stack, return 2.  If the next unexpanded token is a '(', return
+/// 1, otherwise return 0.
+unsigned TokenLexer::isNextTokenLParen() const {
+  // Out of tokens?
+  if (isAtEnd())
+    return 2;
+  return Tokens[CurToken].is(tok::l_paren);
+}
+
+
+/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
+/// together to form a comment that comments out everything in the current
+/// macro, other active macros, and anything left on the current physical
+/// source line of the instantiated buffer.  Handle this by returning the
+/// first token on the next line.
+void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
+  // We 'comment out' the rest of this macro by just ignoring the rest of the
+  // tokens that have not been lexed yet, if any.
+
+  // Since this must be a macro, mark the macro enabled now that it is no longer
+  // being expanded.
+  assert(Macro && "Token streams can't paste comments");
+  Macro->EnableMacro();
+
+  PP.HandleMicrosoftCommentPaste(Tok);
+}
commit	f8fd82ba49827db0f6a6ba00c55a7b56b12a19fa	[log] [tgz]
author	Shih-wei Liao <sliao@google.com>	Wed Feb 10 11:10:31 2010 -0800
committer	Shih-wei Liao <sliao@google.com>	Wed Feb 10 11:10:31 2010 -0800
tree	07718e23380bc08b11f5039b0d185512c657f5e9