Check in the long promised SourceLocation rewrite.  This lays the
ground work for implementing #line, and fixes the "out of macro ID's" 
problem.

There is nothing particularly tricky about the code, other than the
very performance sensitive SourceManager::getFileID() method.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62978 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h
index 8def5c5..6da9a87 100644
--- a/include/clang/Basic/SourceLocation.h
+++ b/include/clang/Basic/SourceLocation.h
@@ -45,12 +45,12 @@
   bool operator>(const FileID &RHS) const { return RHS < *this; }
   bool operator>=(const FileID &RHS) const { return RHS <= *this; }
   
-  static FileID getSentinel() { return Create(~0U); }
+  static FileID getSentinel() { return get(~0U); }
   unsigned getHashValue() const { return ID; }
   
 private:
   friend class SourceManager;
-  static FileID Create(unsigned V) {
+  static FileID get(unsigned V) {
     FileID F;
     F.ID = V;
     return F;
@@ -66,33 +66,14 @@
   unsigned ID;
   friend class SourceManager;
   enum {
-    // FileID Layout:
-    // bit 31: 0 -> FileID, 1 -> MacroID (invalid for FileID)
-    //     30...17 -> ChunkID of location, index into SourceManager table.
-    ChunkIDBits  = 14,
-    //      0...16 -> Index into the chunk of the specified ChunkID.
-    FilePosBits = 32-1-ChunkIDBits,
-    
-    // MacroID Layout:
-    // bit 31: 1 -> MacroID, 0 -> FileID (invalid for MacroID)
-
-    // bit 29,30: unused.
-    
-    // bits 28...9 -> MacroID number.
-    MacroIDBits       = 20,
-    // bits 8...0  -> Macro spelling offset
-    MacroSpellingOffsBits = 9,
-    
-    
-    // Useful constants.
-    ChunkSize = (1 << FilePosBits)
+    MacroIDBit = 1U << 31
   };
 public:
 
   SourceLocation() : ID(0) {}  // 0 is an invalid FileID.
   
-  bool isFileID() const { return (ID >> 31) == 0; }
-  bool isMacroID() const { return (ID >> 31) != 0; }
+  bool isFileID() const  { return (ID & MacroIDBit) == 0; }
+  bool isMacroID() const { return (ID & MacroIDBit) != 0; }
   
   /// isValid - Return true if this is a valid SourceLocation object.  Invalid
   /// SourceLocations are often used when events have no corresponding location
@@ -102,86 +83,34 @@
   bool isInvalid() const { return ID == 0; }
   
 private:
-  /// getChunkID - Return the chunk identifier for this SourceLocation.  This
-  /// ChunkID can be used with the SourceManager object to obtain an entire
-  /// include stack for a file position reference.
-  unsigned getChunkID() const {
-    assert(isFileID() && "can't get the file id of a non-file sloc!");
-    return ID >> FilePosBits;
+  /// getOffset - Return the index for SourceManager's SLocEntryTable table,
+  /// note that this is not an index *into* it though.
+  unsigned getOffset() const {
+    return ID & ~MacroIDBit;
   }
 
-  unsigned getMacroID() const {
-    assert(isMacroID() && "Is not a macro id!");
-    return (ID >> MacroSpellingOffsBits) & ((1 << MacroIDBits)-1);
-  }
-  
-  static SourceLocation getFileLoc(unsigned ChunkID, unsigned FilePos) {
+  static SourceLocation getFileLoc(unsigned ID) {
+    assert((ID & MacroIDBit) == 0 && "Ran out of source locations!");
     SourceLocation L;
-    // If a FilePos is larger than (1<<FilePosBits), the SourceManager makes
-    // enough consequtive ChunkIDs that we have one for each chunk.
-    if (FilePos >= ChunkSize) {
-      ChunkID += FilePos >> FilePosBits;
-      FilePos &= ChunkSize-1;
-    }
-    
-    // FIXME: Find a way to handle out of ChunkID bits!  Maybe MaxFileID is an
-    // escape of some sort?
-    assert(ChunkID < (1 << ChunkIDBits) && "Out of ChunkID's");
-    
-    L.ID = (ChunkID << FilePosBits) | FilePos;
+    L.ID = ID;
     return L;
   }
   
-  static bool isValidMacroSpellingOffs(int Val) {
-    if (Val >= 0)
-      return Val < (1 << (MacroSpellingOffsBits-1));
-    return -Val <= (1 << (MacroSpellingOffsBits-1));
-  }
-  
-  static SourceLocation getMacroLoc(unsigned MacroID, int SpellingOffs) {
-    assert(MacroID < (1 << MacroIDBits) && "Too many macros!");
-    assert(isValidMacroSpellingOffs(SpellingOffs) &&"spelling offs too large!");
-    
-    // Mask off sign bits.
-    SpellingOffs &= (1 << MacroSpellingOffsBits)-1;
-    
+  static SourceLocation getMacroLoc(unsigned ID) {
+    assert((ID & MacroIDBit) == 0 && "Ran out of source locations!");
     SourceLocation L;
-    L.ID = (1 << 31) |
-           (MacroID << MacroSpellingOffsBits) |
-           SpellingOffs;
+    L.ID = MacroIDBit | ID;
     return L;
   }
-
-  /// getRawFilePos - Return the byte offset from the start of the file-chunk
-  /// referred to by ChunkID.  This method should not be used to get the offset
-  /// from the start of the file, instead you should use
-  /// SourceManager::getDecomposedFileLoc.  This method will be 
-  //  incorrect for large files.
-  unsigned getRawFilePos() const { 
-    assert(isFileID() && "can't get the file id of a non-file sloc!");
-    return ID & (ChunkSize-1);
-  }
-
-  int getMacroSpellingOffs() const {
-    assert(isMacroID() && "Is not a macro id!");
-    int Val = ID & ((1 << MacroSpellingOffsBits)-1);
-    // Sign extend it properly.
-    unsigned ShAmt = sizeof(int)*8 - MacroSpellingOffsBits;
-    return (Val << ShAmt) >> ShAmt;
-  }
 public:
   
   /// getFileLocWithOffset - Return a source location with the specified offset
   /// from this file SourceLocation.
   SourceLocation getFileLocWithOffset(int Offset) const {
-    unsigned ChunkID = getChunkID();
-    Offset += getRawFilePos();
-    // Handle negative offsets correctly.
-    while (Offset < 0) {
-      --ChunkID;
-      Offset += ChunkSize;
-    }
-    return getFileLoc(ChunkID, Offset);
+    assert(((getOffset()+Offset) & MacroIDBit) == 0 && "invalid location");
+    SourceLocation L;
+    L.ID = ID+Offset;
+    return L;
   }
   
   /// getRawEncoding - When a SourceLocation itself cannot be used, this returns
diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h
index c8362d8..17c39f8 100644
--- a/include/clang/Basic/SourceManager.h
+++ b/include/clang/Basic/SourceManager.h
@@ -116,99 +116,113 @@
     ContentCache &operator=(const ContentCache& RHS);    
   };  
 
-  /// FileIDInfo - Information about a FileID, basically just the logical file
-  /// that it represents and include stack information.  A File SourceLocation
-  /// is a byte offset from the start of this.
+  /// FileInfo - Information about a FileID, basically just the logical file
+  /// that it represents and include stack information.
   ///
-  /// FileID's are used to compute the location of a character in memory as well
-  /// as the instantiation source location, which can be differ from the
-  /// spelling location.  It is different when #line's are active or when macros
-  /// have been expanded.
+  /// Each FileInfo has include stack information, indicating where it came
+  /// from.  This information encodes the #include chain that a token was
+  /// instantiated from.  The main include file has an invalid IncludeLoc.
   ///
-  /// Each FileID has include stack information, indicating where it came from.
-  /// For the primary translation unit, it comes from SourceLocation() aka 0.
-  /// This information encodes the #include chain that a token was instantiated
-  /// from.
+  /// FileInfos contain a "ContentCache *", with the contents of the file.
   ///
-  /// FileIDInfos contain a "ContentCache *", describing the source file, 
-  /// and a Chunk number, which allows a SourceLocation to index into very
-  /// large files (those which there are not enough FilePosBits to address).
-  ///
-  struct FileIDInfo {
-  private:
+  class FileInfo {
     /// IncludeLoc - The location of the #include that brought in this file.
-    /// This SourceLocation object has an invalid SLOC for the main file.
-    SourceLocation IncludeLoc;
-    
-    /// ChunkNo - Really large buffers are broken up into chunks that are
-    /// each (1 << SourceLocation::FilePosBits) in size.  This specifies the
-    /// chunk number of this FileID.
-    unsigned ChunkNo : 30;
-    
-    /// FileCharacteristic - This is an instance of CharacteristicKind,
-    /// indicating whether this is a system header dir or not.
-    unsigned FileCharacteristic : 2;
+    /// This is an invalid SLOC for the main file (top of the #include chain).
+    unsigned IncludeLoc;  // Really a SourceLocation
     
     /// Content - Information about the source buffer itself.
     const ContentCache *Content;
 
+    /// FileCharacteristic - This is an instance of CharacteristicKind,
+    /// indicating whether this is a system header dir or not.
+    unsigned FileCharacteristic : 2;
   public:
-    /// get - Return a FileIDInfo object.
-    static FileIDInfo get(SourceLocation IL, unsigned CN, 
-                          const ContentCache *Con,
-                          CharacteristicKind FileCharacter) {
-      FileIDInfo X;
-      X.IncludeLoc = IL;
-      X.ChunkNo = CN;
+    /// get - Return a FileInfo object.
+    static FileInfo get(SourceLocation IL, const ContentCache *Con,
+                        CharacteristicKind FileCharacter) {
+      FileInfo X;
+      X.IncludeLoc = IL.getRawEncoding();
       X.Content = Con;
       X.FileCharacteristic = FileCharacter;
       return X;
     }
     
-    SourceLocation getIncludeLoc() const { return IncludeLoc; }
-    unsigned getChunkNo() const { return ChunkNo; }
+    SourceLocation getIncludeLoc() const {
+      return SourceLocation::getFromRawEncoding(IncludeLoc);
+    }
     const ContentCache* getContentCache() const { return Content; }
-
+    
     /// getCharacteristic - Return whether this is a system header or not.
     CharacteristicKind getFileCharacteristic() const { 
       return (CharacteristicKind)FileCharacteristic;
     }
-    
-    /// Emit - Emit this FileIDInfo to Bitcode.
-    void Emit(llvm::Serializer& S) const;
-    
-    /// ReadVal - Reconstitute a FileIDInfo from Bitcode.
-    static FileIDInfo ReadVal(llvm::Deserializer& S);
   };
   
-  /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
-  /// Each MacroIDInfo encodes the Instantiation location - where the macro was
-  /// instantiated, and the SpellingLoc - where the actual character data for
-  /// the token came from.  An actual macro SourceLocation stores deltas from
-  /// these positions.
-  class MacroIDInfo {
-    SourceLocation InstantiationLoc, SpellingLoc;
+  /// InstantiationInfo - Each InstantiationInfo encodes the Instantiation
+  /// location - where the token was ultimately instantiated, and the
+  /// SpellingLoc - where the actual character data for the token came from.
+  class InstantiationInfo {
+    unsigned InstantiationLoc, SpellingLoc; // Really these are SourceLocations.
   public:
-    SourceLocation getInstantiationLoc() const { return InstantiationLoc; }
-    SourceLocation getSpellingLoc() const { return SpellingLoc; }
+    SourceLocation getInstantiationLoc() const {
+      return SourceLocation::getFromRawEncoding(InstantiationLoc);
+    }
+    SourceLocation getSpellingLoc() const {
+      return SourceLocation::getFromRawEncoding(SpellingLoc);
+    }
     
-    /// get - Return a MacroID for a macro expansion.  VL specifies
+    /// get - Return a InstantiationInfo for an expansion.  VL specifies
     /// the instantiation location (where the macro is expanded), and SL
     /// specifies the spelling location (where the characters from the token
     /// come from).  Both VL and PL refer to normal File SLocs.
-    static MacroIDInfo get(SourceLocation VL, SourceLocation SL) {
-      MacroIDInfo X;
-      X.InstantiationLoc = VL;
-      X.SpellingLoc = SL;
+    static InstantiationInfo get(SourceLocation IL, SourceLocation SL) {
+      InstantiationInfo X;
+      X.InstantiationLoc = IL.getRawEncoding();
+      X.SpellingLoc = SL.getRawEncoding();
       return X;
     }
-    
-    /// Emit - Emit this MacroIDInfo to Bitcode.
-    void Emit(llvm::Serializer& S) const;
-    
-    /// ReadVal - Reconstitute a MacroIDInfo from Bitcode.
-    static MacroIDInfo ReadVal(llvm::Deserializer& S);
   };
+  
+  /// SLocEntry - This is a discriminated union of FileInfo and
+  /// InstantiationInfo.  SourceManager keeps an array of these objects, and
+  /// they are uniquely identified by the FileID datatype.
+  class SLocEntry {
+    unsigned Offset;   // low bit is set for instantiation info.
+    union {
+      FileInfo File;
+      InstantiationInfo Instantiation;
+    };
+  public:
+    unsigned getOffset() const { return Offset >> 1; }
+    
+    bool isInstantiation() const { return Offset & 1; }
+    bool isFile() const { return !isInstantiation(); }
+    
+    const FileInfo &getFile() const {
+      assert(isFile() && "Not a file SLocEntry!");
+      return File;
+    }
+
+    const InstantiationInfo &getInstantiation() const {
+      assert(isInstantiation() && "Not an instantiation SLocEntry!");
+      return Instantiation;
+    }
+    
+    static SLocEntry get(unsigned Offset, const FileInfo &FI) {
+      SLocEntry E;
+      E.Offset = Offset << 1;
+      E.File = FI;
+      return E;
+    }
+
+    static SLocEntry get(unsigned Offset, const InstantiationInfo &II) {
+      SLocEntry E;
+      E.Offset = (Offset << 1) | 1;
+      E.Instantiation = II;
+      return E;
+    }
+  };
+  
 }  // end SrcMgr namespace.
 } // end clang namespace
 
@@ -247,12 +261,17 @@
   /// stored ContentCache objects are NULL, as they do not refer to a file.
   std::list<SrcMgr::ContentCache> MemBufferInfos;
   
-  /// FileIDs - Information about each FileID.  FileID #0 is not valid, so all
-  /// entries are off by one.
-  std::vector<SrcMgr::FileIDInfo> FileIDs;
+  /// SLocEntryTable - This is an array of SLocEntry's that we have created.
+  /// FileID is an index into this vector.  This array is sorted by the offset.
+  std::vector<SrcMgr::SLocEntry> SLocEntryTable;
+  /// NextOffset - This is the next available offset that a new SLocEntry can
+  /// start at.  It is SLocEntryTable.back().getOffset()+size of back() entry.
+  unsigned NextOffset;
   
-  /// MacroIDs - Information about each MacroID.
-  std::vector<SrcMgr::MacroIDInfo> MacroIDs;
+  /// LastFileIDLookup - This is a one-entry cache to speed up getFileID.
+  /// LastFileIDLookup records the last FileID looked up or created, because it
+  /// is very common to look up many tokens from the same file.
+  mutable FileID LastFileIDLookup;
   
   /// LastLineNo - These ivars serve as a cache used in the getLineNumber
   /// method which is used to speedup getLineNumber calls to nearby locations.
@@ -264,19 +283,28 @@
   /// MainFileID - The file ID for the main source file of the translation unit.
   FileID MainFileID;
 
+  // Statistics for -print-stats.
+  mutable unsigned NumLinearScans, NumBinaryProbes;
+  
   // SourceManager doesn't support copy construction.
   explicit SourceManager(const SourceManager&);
   void operator=(const SourceManager&);  
 public:
-  SourceManager() {}
+  SourceManager() : NumLinearScans(0), NumBinaryProbes(0) {
+    clearIDTables();
+  }
   ~SourceManager() {}
   
   void clearIDTables() {
     MainFileID = FileID();
-    FileIDs.clear();
-    MacroIDs.clear();
+    SLocEntryTable.clear();
     LastLineNoFileIDQuery = FileID();
     LastLineNoContentCache = 0;
+    LastFileIDLookup = FileID();
+    
+    // Use up FileID #0 as an invalid instantiation.
+    NextOffset = 0;
+    createInstantiationLoc(SourceLocation(), SourceLocation(), 1);
   }
 
   //===--------------------------------------------------------------------===//
@@ -295,7 +323,7 @@
   }
   
   //===--------------------------------------------------------------------===//
-  // Methods to create new FileID's.
+  // Methods to create new FileID's and instantiations.
   //===--------------------------------------------------------------------===//
   
   /// createFileID - Create a new FileID that represents the specified file
@@ -303,7 +331,7 @@
   /// error and translates NULL into standard input.
   FileID createFileID(const FileEntry *SourceFile, SourceLocation IncludePos,
                       SrcMgr::CharacteristicKind FileCharacter) {
-    const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
+    const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile);
     if (IR == 0) return FileID();    // Error opening file?
     return createFileID(IR, IncludePos, FileCharacter);
   }
@@ -325,6 +353,13 @@
     return MainFileID;
   }
 
+  /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
+  /// that a token at Loc should actually be referenced from InstantiationLoc.
+  /// TokLength is the length of the token being instantiated.
+  SourceLocation createInstantiationLoc(SourceLocation Loc,
+                                        SourceLocation InstantiationLoc,
+                                        unsigned TokLength);
+  
   //===--------------------------------------------------------------------===//
   // FileID manipulation methods.
   //===--------------------------------------------------------------------===//
@@ -332,12 +367,12 @@
   /// getBuffer - Return the buffer for the specified FileID.
   ///
   const llvm::MemoryBuffer *getBuffer(FileID FID) const {
-    return getContentCache(FID)->getBuffer();
+    return getSLocEntry(FID).getFile().getContentCache()->getBuffer();
   }
   
   /// getFileEntryForID - Returns the FileEntry record for the provided FileID.
   const FileEntry *getFileEntryForID(FileID FID) const {
-    return getContentCache(FID)->Entry;
+    return getSLocEntry(FID).getFile().getContentCache()->Entry;
   }
   
   /// getBufferData - Return a pointer to the start and end of the source buffer
@@ -349,26 +384,112 @@
   // SourceLocation manipulation methods.
   //===--------------------------------------------------------------------===//
   
+  /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very
+  /// hot method that is used for all SourceManager queries that start with a
+  /// SourceLocation object.  It is responsible for finding the entry in
+  /// SLocEntryTable which contains the specified location.
+  ///
+  FileID getFileID(SourceLocation SpellingLoc) const {
+    unsigned SLocOffset = SpellingLoc.getOffset();
+    
+    // If our one-entry cache covers this offset, just return it.
+    if (isOffsetInFileID(LastFileIDLookup, SLocOffset))
+      return LastFileIDLookup;
+
+    return getFileIDSlow(SLocOffset);
+  }
+  
   /// getLocForStartOfFile - Return the source location corresponding to the
   /// first byte of the specified file.
   SourceLocation getLocForStartOfFile(FileID FID) const {
-    return SourceLocation::getFileLoc(FID.ID, 0);
+    assert(FID.ID < SLocEntryTable.size() && SLocEntryTable[FID.ID].isFile());
+    unsigned FileOffset = SLocEntryTable[FID.ID].getOffset();
+    return SourceLocation::getFileLoc(FileOffset);
   }
   
-  /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
-  /// that a token at Loc should actually be referenced from InstantiationLoc.
-  SourceLocation getInstantiationLoc(SourceLocation Loc,
-                                     SourceLocation InstantiationLoc);
-  
-   /// getIncludeLoc - Return the location of the #include for the specified
+  /// getIncludeLoc - Return the location of the #include for the specified
   /// SourceLocation.  If this is a macro expansion, this transparently figures
   /// out which file includes the file being expanded into.
   SourceLocation getIncludeLoc(SourceLocation ID) const {
-    return getFIDInfo(getInstantiationLoc(ID).getChunkID())->getIncludeLoc();
+    return getSLocEntry(getFileID(getInstantiationLoc(ID)))
+                    .getFile().getIncludeLoc();
   }
   
+  /// Given a SourceLocation object, return the instantiation location
+  /// referenced by the ID.
+  SourceLocation getInstantiationLoc(SourceLocation Loc) const {
+    // File locations work!
+    if (Loc.isFileID()) return Loc;
+    
+    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
+    Loc = getSLocEntry(LocInfo.first).getInstantiation().getInstantiationLoc();
+    return Loc.getFileLocWithOffset(LocInfo.second);
+  }
+  
+  /// getSpellingLoc - Given a SourceLocation object, return the spelling
+  /// location referenced by the ID.  This is the place where the characters
+  /// that make up the lexed token can be found.
+  SourceLocation getSpellingLoc(SourceLocation Loc) const {
+    // File locations work!
+    if (Loc.isFileID()) return Loc;
+    
+    std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
+    Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
+    return Loc.getFileLocWithOffset(LocInfo.second);
+  }
+
+  /// getDecomposedLoc - Decompose the specified location into a raw FileID +
+  /// Offset pair.  The first element is the FileID, the second is the
+  /// offset from the start of the buffer of the location.
+  std::pair<FileID, unsigned> getDecomposedLoc(SourceLocation Loc) const {
+    FileID FID = getFileID(Loc);
+    return std::make_pair(FID, Loc.getOffset()-getSLocEntry(FID).getOffset());
+  }
+  
+  /// getDecomposedInstantiationLoc - Decompose the specified location into a
+  /// raw FileID + Offset pair.  If the location is an instantiation record,
+  /// walk through it until we find the final location instantiated.
+  std::pair<FileID, unsigned>
+  getDecomposedInstantiationLoc(SourceLocation Loc) const {
+    FileID FID = getFileID(Loc);
+    const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
+    
+    unsigned Offset = Loc.getOffset()-E->getOffset();
+    if (Loc.isFileID())
+      return std::make_pair(FID, Offset);
+    
+    return getDecomposedInstantiationLocSlowCase(E, Offset);
+  }
+
+  /// getDecomposedSpellingLoc - Decompose the specified location into a raw
+  /// FileID + Offset pair.  If the location is an instantiation record, walk
+  /// through it until we find its spelling record.
+  std::pair<FileID, unsigned>
+  getDecomposedSpellingLoc(SourceLocation Loc) const {
+    FileID FID = getFileID(Loc);
+    const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
+    
+    unsigned Offset = Loc.getOffset()-E->getOffset();
+    if (Loc.isFileID())
+      return std::make_pair(FID, Offset);
+    return getDecomposedSpellingLocSlowCase(E, Offset);
+  }    
+  
+  /// getFullFilePos - This (efficient) method returns the offset from the start
+  /// of the file that the specified spelling SourceLocation represents.  This
+  /// returns the location of the actual character data, not the instantiation
+  /// position.
+  unsigned getFullFilePos(SourceLocation SpellingLoc) const {
+    return getDecomposedLoc(SpellingLoc).second;
+  }
+  
+  
+  //===--------------------------------------------------------------------===//
+  // Queries about the code at a SourceLocation.
+  //===--------------------------------------------------------------------===//
+  
   /// getCharacterData - Return a pointer to the start of the specified location
-  /// in the appropriate MemoryBuffer.
+  /// in the appropriate spelling MemoryBuffer.
   const char *getCharacterData(SourceLocation SL) const;
   
   /// getColumnNumber - Return the column # for the specified file position.
@@ -391,7 +512,7 @@
   /// line offsets for the MemoryBuffer, so this is not cheap: use only when
   /// about to emit a diagnostic.
   unsigned getLineNumber(SourceLocation Loc) const;
-
+  
   unsigned getInstantiationLineNumber(SourceLocation Loc) const {
     return getLineNumber(getInstantiationLoc(Loc));
   }
@@ -399,65 +520,18 @@
     return getLineNumber(getSpellingLoc(Loc));
   }
   
+  // FIXME: This should handle #line.
+  SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const {
+    FileID FID = getFileID(getSpellingLoc(Loc));
+    return getSLocEntry(FID).getFile().getFileCharacteristic();
+  }
+  
   /// getSourceName - This method returns the name of the file or buffer that
   /// the SourceLocation specifies.  This can be modified with #line directives,
   /// etc.
   const char *getSourceName(SourceLocation Loc) const;
-
-  /// Given a SourceLocation object, return the instantiation location
-  /// referenced by the ID.
-  SourceLocation getInstantiationLoc(SourceLocation Loc) const {
-    // File locations work.
-    if (Loc.isFileID()) return Loc;
-    
-    return MacroIDs[Loc.getMacroID()].getInstantiationLoc();
-  }
   
-  /// getSpellingLoc - Given a SourceLocation object, return the spelling
-  /// location referenced by the ID.  This is the place where the characters
-  /// that make up the lexed token can be found.
-  SourceLocation getSpellingLoc(SourceLocation Loc) const {
-    // File locations work!
-    if (Loc.isFileID()) return Loc;
-    
-    // Look up the macro token's spelling location.
-    SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getSpellingLoc();
-    return PLoc.getFileLocWithOffset(Loc.getMacroSpellingOffs());
-  }
-
-  /// getDecomposedFileLoc - Decompose the specified file location into a raw
-  /// FileID + Offset pair.  The first element is the FileID, the second is the
-  /// offset from the start of the buffer of the location.
-  std::pair<FileID, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
-    assert(Loc.isFileID() && "Isn't a File SourceLocation");
-    
-    // TODO: Add a flag "is first chunk" to SLOC.
-    const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getChunkID());
-      
-    // If this file has been split up into chunks, factor in the chunk number
-    // that the FileID references.
-    unsigned ChunkNo = FIDInfo->getChunkNo();
-    unsigned Offset = Loc.getRawFilePos();
-    Offset += (ChunkNo << SourceLocation::FilePosBits);
-
-    assert(Loc.getChunkID() >= ChunkNo && "Unexpected offset");
-    
-    return std::make_pair(FileID::Create(Loc.getChunkID()-ChunkNo), Offset);
-  }
   
-  /// getFileID - Return the FileID for a SourceLocation.
-  ///
-  FileID getFileID(SourceLocation SpellingLoc) const {
-    return getDecomposedFileLoc(SpellingLoc).first;
-  }    
-  
-  /// getFullFilePos - This (efficient) method returns the offset from the start
-  /// of the file that the specified spelling SourceLocation represents.  This
-  /// returns the location of the actual character data, not the instantiation
-  /// position.
-  unsigned getFullFilePos(SourceLocation SpellingLoc) const {
-    return getDecomposedFileLoc(SpellingLoc).second;
-  }
   
   /// isFromSameFile - Returns true if both SourceLocations correspond to
   ///  the same file.
@@ -470,15 +544,11 @@
   bool isFromMainFile(SourceLocation Loc) const {
     return getFileID(Loc) == getMainFileID();
   } 
-
+  
   /// isInSystemHeader - Returns if a SourceLocation is in a system header.
   bool isInSystemHeader(SourceLocation Loc) const {
     return getFileCharacteristic(Loc) != SrcMgr::C_User;
   }
-  SrcMgr::CharacteristicKind getFileCharacteristic(SourceLocation Loc) const {
-    return getFIDInfo(getSpellingLoc(Loc).getChunkID())
-                  ->getFileCharacteristic();
-  }
   
   //===--------------------------------------------------------------------===//
   // Other miscellaneous methods.
@@ -503,6 +573,19 @@
 private:
   friend struct SrcMgr::ContentCache; // Used for deserialization.
   
+  /// isOffsetInFileID - Return true if the specified FileID contains the
+  /// specified SourceLocation offset.  This is a very hot method.
+  inline bool isOffsetInFileID(FileID FID, unsigned SLocOffset) const {
+    const SrcMgr::SLocEntry &Entry = getSLocEntry(FID);
+    // If the entry is after the offset, it can't contain it.
+    if (SLocOffset < Entry.getOffset()) return false;
+    
+    // If this is the last entry than it does.  Otherwise, the entry after it
+    // has to not include it.
+    if (FID.ID+1 == SLocEntryTable.size()) return true;
+    return SLocOffset < SLocEntryTable[FID.ID+1].getOffset();
+  }
+  
   /// createFileID - Create a new fileID for the specified ContentCache and
   ///  include position.  This works regardless of whether the ContentCache
   ///  corresponds to a file or some other input source.
@@ -510,33 +593,27 @@
                       SourceLocation IncludePos,
                       SrcMgr::CharacteristicKind DirCharacter);
     
-  /// getContentCache - Create or return a cached ContentCache for the specified
-  ///  file.  This returns null on failure.
-  const SrcMgr::ContentCache* getContentCache(const FileEntry *SourceFile);
+  const SrcMgr::ContentCache *
+    getOrCreateContentCache(const FileEntry *SourceFile);
 
   /// createMemBufferContentCache - Create a new ContentCache for the specified
   ///  memory buffer.
   const SrcMgr::ContentCache* 
   createMemBufferContentCache(const llvm::MemoryBuffer *Buf);
 
-  const SrcMgr::FileIDInfo *getFIDInfo(unsigned FID) const {
-    assert(FID-1 < FileIDs.size() && "Invalid FileID!");
-    return &FileIDs[FID-1];
-  }
-  const SrcMgr::FileIDInfo *getFIDInfo(FileID FID) const {
-    return getFIDInfo(FID.ID);
+  const SrcMgr::SLocEntry &getSLocEntry(FileID FID) const {
+    assert(FID.ID < SLocEntryTable.size() && "Invalid id");
+    return SLocEntryTable[FID.ID];
   }
   
-  const SrcMgr::ContentCache *getContentCache(FileID FID) const {
-    return getContentCache(getFIDInfo(FID.ID));
-  }
-  
-  /// Return the ContentCache structure for the specified FileID.  
-  ///  This is always the physical reference for the ID.
-  const SrcMgr::ContentCache*
-  getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
-    return FIDInfo->getContentCache();
-  }  
+  FileID getFileIDSlow(unsigned SLocOffset) const;
+
+  std::pair<FileID, unsigned>
+  getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
+                                        unsigned Offset) const;  
+  std::pair<FileID, unsigned>
+  getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
+                                   unsigned Offset) const;
 };
 
 
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 42cdd91..296fca1 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -190,7 +190,7 @@
 
   /// getSourceLocation - Return a source location identifier for the specified
   /// offset in the current file.
-  SourceLocation getSourceLocation(const char *Loc) const;
+  SourceLocation getSourceLocation(const char *Loc, unsigned TokLen = 1) const;
     
   /// getSourceLocation - Return a source location for the next character in
   /// the current file.
@@ -228,8 +228,9 @@
   /// TokEnd.
   void FormTokenWithChars(Token &Result, const char *TokEnd, 
                           tok::TokenKind Kind) {
-    Result.setLocation(getSourceLocation(BufferPtr));
-    Result.setLength(TokEnd-BufferPtr);
+    unsigned TokLen = TokEnd-BufferPtr;
+    Result.setLength(TokLen);
+    Result.setLocation(getSourceLocation(BufferPtr, TokLen));
     Result.setKind(Kind);
     BufferPtr = TokEnd;
   }
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp
index 35c350e..e30e2a8 100644
--- a/lib/Basic/SourceManager.cpp
+++ b/lib/Basic/SourceManager.cpp
@@ -24,6 +24,10 @@
 using namespace SrcMgr;
 using llvm::MemoryBuffer;
 
+//===--------------------------------------------------------------------===//
+// SourceManager Helper Classes
+//===--------------------------------------------------------------------===//
+
 // This (temporary) directive toggles between lazy and eager creation of
 // MemBuffers.  This directive is not permanent, and is here to test a few
 // potential optimizations in PTH.  Once it is clear whether eager or lazy
@@ -62,12 +66,16 @@
   return Buffer;
 }
 
+//===--------------------------------------------------------------------===//
+// Private 'Create' methods.
+//===--------------------------------------------------------------------===//
 
-/// getFileInfo - Create or return a cached FileInfo for the specified file.
-///
-const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
-
+/// getOrCreateContentCache - Create or return a cached ContentCache for the
+/// specified file.
+const ContentCache *
+SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
   assert(FileEnt && "Didn't specify a file entry to use?");
+  
   // Do we already have information about this file?
   std::set<ContentCache>::iterator I = 
     FileInfos.lower_bound(ContentCache(FileEnt));
@@ -107,47 +115,34 @@
   return &Entry;
 }
 
+//===----------------------------------------------------------------------===//
+// Methods to create new FileID's and instantiations.
+//===----------------------------------------------------------------------===//
 
 /// createFileID - Create a new fileID for the specified ContentCache and
 /// include position.  This works regardless of whether the ContentCache
 /// corresponds to a file or some other input source.
 FileID SourceManager::createFileID(const ContentCache *File,
-                                     SourceLocation IncludePos,
-                                     SrcMgr::CharacteristicKind FileCharacter) {
-  // If FileEnt is really large (e.g. it's a large .i file), we may not be able
-  // to fit an arbitrary position in the file in the FilePos field.  To handle
-  // this, we create one FileID for each chunk of the file that fits in a
-  // FilePos field.
+                                   SourceLocation IncludePos,
+                                   SrcMgr::CharacteristicKind FileCharacter) {
+  SLocEntryTable.push_back(SLocEntry::get(NextOffset, 
+                                          FileInfo::get(IncludePos, File,
+                                                        FileCharacter)));
   unsigned FileSize = File->getSize();
-  if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
-    FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter));
-    assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) &&
-           "Ran out of file ID's!");
-    return FileID::Create(FileIDs.size());
-  }
+  assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
+  NextOffset += FileSize+1;
   
-  // Create one FileID for each chunk of the file.
-  unsigned Result = FileIDs.size()+1;
-
-  unsigned ChunkNo = 0;
-  while (1) {
-    FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
-                                      FileCharacter));
-
-    if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
-    FileSize -= (1 << SourceLocation::FilePosBits);
-  }
-
-  assert(FileIDs.size() < (1 << SourceLocation::ChunkIDBits) &&
-         "Ran out of file ID's!");
-  return FileID::Create(Result);
+  // Set LastFileIDLookup to the newly created file.  The next getFileID call is
+  // almost guaranteed to be from that file.
+  return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
 }
 
-/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
+/// createInstantiationLoc - Return a new SourceLocation that encodes the fact
 /// that a token from SpellingLoc should actually be referenced from
 /// InstantiationLoc.
-SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc,
-                                                  SourceLocation InstantLoc) {
+SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
+                                                     SourceLocation InstantLoc,
+                                                     unsigned TokLength) {
   // The specified source location may be a mapped location, due to a macro
   // instantiation or #line directive.  Strip off this information to find out
   // where the characters are actually located.
@@ -155,29 +150,13 @@
   
   // Resolve InstantLoc down to a real instantiation location.
   InstantLoc = getInstantiationLoc(InstantLoc);
-  
-  
-  // If the last macro id is close to the currently requested location, try to
-  // reuse it.  This implements a small cache.
-  for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
-    MacroIDInfo &LastOne = MacroIDs[i];
-    
-    // The instanitation point and source SpellingLoc have to exactly match to
-    // reuse (for now).  We could allow "nearby" instantiations in the future.
-    if (LastOne.getInstantiationLoc() != InstantLoc ||
-        LastOne.getSpellingLoc().getChunkID() != SpellingLoc.getChunkID())
-      continue;
-  
-    // Check to see if the spellloc of the token came from near enough to reuse.
-    int SpellDelta = SpellingLoc.getRawFilePos() -
-                     LastOne.getSpellingLoc().getRawFilePos();
-    if (SourceLocation::isValidMacroSpellingOffs(SpellDelta))
-      return SourceLocation::getMacroLoc(i, SpellDelta);
-  }
-  
- 
-  MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc));
-  return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
+
+  SLocEntryTable.push_back(SLocEntry::get(NextOffset, 
+                                          InstantiationInfo::get(InstantLoc,
+                                                                 SpellingLoc)));
+  assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
+  NextOffset += TokLength+1;
+  return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
 }
 
 /// getBufferData - Return a pointer to the start and end of the source buffer
@@ -189,19 +168,153 @@
 }
 
 
+//===--------------------------------------------------------------------===//
+// SourceLocation manipulation methods.
+//===--------------------------------------------------------------------===//
+
+/// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
+/// method that is used for all SourceManager queries that start with a
+/// SourceLocation object.  It is responsible for finding the entry in
+/// SLocEntryTable which contains the specified location.
+///
+FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
+  assert(SLocOffset && "Invalid FileID");
+  
+  // After the first and second level caches, I see two common sorts of
+  // behavior: 1) a lot of searched FileID's are "near" the cached file location
+  // or are "near" the cached instantiation location.  2) others are just
+  // completely random and may be a very long way away.
+  //
+  // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
+  // then we fall back to a less cache efficient, but more scalable, binary
+  // search to find the location.
+  
+  // See if this is near the file point - worst case we start scanning from the
+  // most newly created FileID.
+  std::vector<SrcMgr::SLocEntry>::const_iterator I;
+  
+  if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
+    // Neither loc prunes our search.
+    I = SLocEntryTable.end();
+  } else {
+    // Perhaps it is near the file point.
+    I = SLocEntryTable.begin()+LastFileIDLookup.ID;
+  }
+
+  // Find the FileID that contains this.  "I" is an iterator that points to a
+  // FileID whose offset is known to be larger than SLocOffset.
+  unsigned NumProbes = 0;
+  while (1) {
+    --I;
+    if (I->getOffset() <= SLocOffset) {
+#if 0
+      printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
+             I-SLocEntryTable.begin(),
+             I->isInstantiation() ? "inst" : "file",
+             LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
+#endif
+      FileID Res = FileID::get(I-SLocEntryTable.begin());
+      
+      // If this isn't an instantiation, remember it.  We have good locality
+      // across FileID lookups.
+      if (!I->isInstantiation())
+        LastFileIDLookup = Res;
+      NumLinearScans += NumProbes+1;
+      return Res;
+    }
+    if (++NumProbes == 8)
+      break;
+  }
+  
+  // Convert "I" back into an index.  We know that it is an entry whose index is
+  // larger than the offset we are looking for.
+  unsigned GreaterIndex = I-SLocEntryTable.begin();
+  // LessIndex - This is the lower bound of the range that we're searching.
+  // We know that the offset corresponding to the FileID is is less than
+  // SLocOffset.
+  unsigned LessIndex = 0;
+  NumProbes = 0;
+  while (1) {
+    unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
+    unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
+    
+    ++NumProbes;
+    
+    // If the offset of the midpoint is too large, chop the high side of the
+    // range to the midpoint.
+    if (MidOffset > SLocOffset) {
+      GreaterIndex = MiddleIndex;
+      continue;
+    }
+    
+    // If the middle index contains the value, succeed and return.
+    if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
+#if 0
+      printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
+             I-SLocEntryTable.begin(),
+             I->isInstantiation() ? "inst" : "file",
+             LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
+#endif
+      FileID Res = FileID::get(MiddleIndex);
+
+      // If this isn't an instantiation, remember it.  We have good locality
+      // across FileID lookups.
+      if (!I->isInstantiation())
+        LastFileIDLookup = Res;
+      NumBinaryProbes += NumProbes;
+      return Res;
+    }
+    
+    // Otherwise, move the low-side up to the middle index.
+    LessIndex = MiddleIndex;
+  }
+}
+
+std::pair<FileID, unsigned>
+SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
+                                                     unsigned Offset) const {
+  // If this is an instantiation record, walk through all the instantiation
+  // points.
+  FileID FID;
+  SourceLocation Loc;
+  do {
+    Loc = E->getInstantiation().getInstantiationLoc();
+    
+    FID = getFileID(Loc);
+    E = &getSLocEntry(FID);
+    Offset += Loc.getOffset()-E->getOffset();
+  } while (Loc.isFileID());
+  
+  return std::make_pair(FID, Offset);
+}
+
+std::pair<FileID, unsigned>
+SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
+                                                unsigned Offset) const {
+  // If this is an instantiation record, get and return the spelling.
+  SourceLocation Loc = E->getInstantiation().getSpellingLoc();
+  FileID FID = getFileID(Loc);
+  E = &getSLocEntry(FID);
+  Offset += Loc.getOffset()-E->getOffset();
+  assert(Loc.isFileID() && "Should only have one spelling link");
+  return std::make_pair(FID, Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Queries about the code at a SourceLocation.
+//===----------------------------------------------------------------------===//
 
 /// getCharacterData - Return a pointer to the start of the specified location
 /// in the appropriate MemoryBuffer.
 const char *SourceManager::getCharacterData(SourceLocation SL) const {
   // Note that this is a hot function in the getSpelling() path, which is
   // heavily used by -E mode.
-  SL = getSpellingLoc(SL);
-  
-  std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(SL);
+  std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
   
   // Note that calling 'getBuffer()' may lazily page in a source file.
-  return getContentCache(LocInfo.first)->getBuffer()->getBufferStart() + 
-         LocInfo.second;
+  return getSLocEntry(LocInfo.first).getFile().getContentCache()
+              ->getBuffer()->getBufferStart() + LocInfo.second;
 }
 
 
@@ -209,9 +322,10 @@
 /// this is significantly cheaper to compute than the line number.  This returns
 /// zero if the column number isn't known.
 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
-  if (Loc.getChunkID() == 0) return 0;
+  if (Loc.isInvalid()) return 0;
+  assert(Loc.isFileID() && "Don't know what part of instantiation loc to get");
   
-  std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc);
+  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
   unsigned FilePos = LocInfo.second;
   
   const char *Buf = getBuffer(LocInfo.first)->getBufferStart();
@@ -222,21 +336,6 @@
   return FilePos-LineStart+1;
 }
 
-/// getSourceName - This method returns the name of the file or buffer that
-/// the SourceLocation specifies.  This can be modified with #line directives,
-/// etc.
-const char *SourceManager::getSourceName(SourceLocation Loc) const {
-  if (Loc.getChunkID() == 0) return "";
-  
-  Loc = getSpellingLoc(Loc);
-  unsigned ChunkID = Loc.getChunkID();
-  const SrcMgr::ContentCache *C = getFIDInfo(ChunkID)->getContentCache();
-  
-  // To get the source name, first consult the FileEntry (if one exists) before
-  // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer.
-  return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
-}
-
 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
 static void ComputeLineNumbers(ContentCache* FI) {  
   // Note that calling 'getBuffer()' may lazily page in the file.
@@ -287,16 +386,17 @@
 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
 /// about to emit a diagnostic.
 unsigned SourceManager::getLineNumber(SourceLocation Loc) const {
-  if (Loc.getChunkID() == 0) return 0;
+  if (Loc.isInvalid()) return 0;
+  assert(Loc.isFileID() && "Don't know what part of instantiation loc to get");
 
+  std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
+  
   ContentCache *Content;
-  
-  std::pair<FileID, unsigned> LocInfo = getDecomposedFileLoc(Loc);
-  
   if (LastLineNoFileIDQuery == LocInfo.first)
     Content = LastLineNoContentCache;
   else
-    Content = const_cast<ContentCache*>(getContentCache(LocInfo.first));
+    Content = const_cast<ContentCache*>(getSLocEntry(LocInfo.first)
+                                        .getFile().getContentCache());
   
   // If this is the first use of line information for this buffer, compute the
   /// SourceLineCache for it on demand.
@@ -375,15 +475,32 @@
   return LineNo;
 }
 
+/// getSourceName - This method returns the name of the file or buffer that
+/// the SourceLocation specifies.  This can be modified with #line directives,
+/// etc.
+const char *SourceManager::getSourceName(SourceLocation Loc) const {
+  if (Loc.isInvalid()) return "";
+  
+  const SrcMgr::ContentCache *C =
+  getSLocEntry(getFileID(getSpellingLoc(Loc))).getFile().getContentCache();
+  
+  // To get the source name, first consult the FileEntry (if one exists) before
+  // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer.
+  return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
+}
+
+//===----------------------------------------------------------------------===//
+// Other miscellaneous methods.
+//===----------------------------------------------------------------------===//
+
+
 /// PrintStats - Print statistics to stderr.
 ///
 void SourceManager::PrintStats() const {
   llvm::cerr << "\n*** Source Manager Stats:\n";
   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
-             << " mem buffers mapped, " << FileIDs.size() 
-             << " file ID's allocated.\n";
-  llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
-             << MacroIDs.size() << " macro expansion FileID's.\n";
+             << " mem buffers mapped, " << SLocEntryTable.size() 
+             << " SLocEntry's allocated.\n";
     
   unsigned NumLineNumsComputed = 0;
   unsigned NumFileBytesMapped = 0;
@@ -395,6 +512,8 @@
   
   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
              << NumLineNumsComputed << " files with line #'s computed.\n";
+  llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
+             << NumBinaryProbes << " binary.\n";
 }
 
 //===----------------------------------------------------------------------===//
@@ -450,49 +569,23 @@
       D.RegisterPtr(PtrID,NULL);
     else
       // Get the ContextCache object and register it with the deserializer.
-      D.RegisterPtr(PtrID,SMgr.getContentCache(E));
+      D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
+    return;
   }
-  else {
-    // Register the ContextCache object with the deserializer.
-    SMgr.MemBufferInfos.push_back(ContentCache());
-    ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
-    D.RegisterPtr(&Entry);
-    
-    // Create the buffer.
-    unsigned Size = D.ReadInt();
-    Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
-    
-    // Read the contents of the buffer.
-    char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
-    for (unsigned i = 0; i < Size ; ++i)
-      p[i] = D.ReadInt();    
-  }    
-}
-
-void FileIDInfo::Emit(llvm::Serializer& S) const {
-  S.Emit(IncludeLoc);
-  S.EmitInt(ChunkNo);
-  S.EmitPtr(Content);  
-}
-
-FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
-  FileIDInfo I;
-  I.IncludeLoc = SourceLocation::ReadVal(D);
-  I.ChunkNo = D.ReadInt();
-  D.ReadPtr(I.Content,false);
-  return I;
-}
-
-void MacroIDInfo::Emit(llvm::Serializer& S) const {
-  S.Emit(InstantiationLoc);
-  S.Emit(SpellingLoc);
-}
-
-MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
-  MacroIDInfo I;
-  I.InstantiationLoc = SourceLocation::ReadVal(D);
-  I.SpellingLoc = SourceLocation::ReadVal(D);
-  return I;
+  
+  // Register the ContextCache object with the deserializer.
+  SMgr.MemBufferInfos.push_back(ContentCache());
+  ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
+  D.RegisterPtr(&Entry);
+  
+  // Create the buffer.
+  unsigned Size = D.ReadInt();
+  Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
+  
+  // Read the contents of the buffer.
+  char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
+  for (unsigned i = 0; i < Size ; ++i)
+    p[i] = D.ReadInt();    
 }
 
 void SourceManager::Emit(llvm::Serializer& S) const {
@@ -516,13 +609,7 @@
   
   S.ExitBlock();
   
-  // Emit: FileIDs
-  S.EmitInt(FileIDs.size());  
-  std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
-  
-  // Emit: MacroIDs
-  S.EmitInt(MacroIDs.size());  
-  std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
+  // FIXME: Emit SLocEntryTable.
   
   S.ExitBlock();
 }
@@ -533,7 +620,7 @@
   D.RegisterPtr(M);
   
   // Read: the FileID of the main source file of the translation unit.
-  M->MainFileID = FileID::Create(D.ReadInt());
+  M->MainFileID = FileID::get(D.ReadInt());
   
   std::vector<char> Buf;
     
@@ -549,17 +636,7 @@
     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
   }
   
-  // Read: FileIDs.
-  unsigned Size = D.ReadInt();
-  M->FileIDs.reserve(Size);
-  for (; Size > 0 ; --Size)
-    M->FileIDs.push_back(FileIDInfo::ReadVal(D));
-  
-  // Read: MacroIDs.
-  Size = D.ReadInt();
-  M->MacroIDs.reserve(Size);
-  for (; Size > 0 ; --Size)
-    M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
+  // FIXME: Read SLocEntryTable.
   
   return M;
 }
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 3174a05..9e8d1aa 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -169,8 +169,8 @@
 
   // Set the SourceLocation with the remapping information.  This ensures that
   // GetMappedTokenLoc will remap the tokens as they are lexed.
-  L->FileLoc = SM.getInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),
-                                      InstantiationLoc);
+  L->FileLoc = SM.createInstantiationLoc(SM.getLocForStartOfFile(SpellingFID),
+                                         InstantiationLoc, TokLen);
   
   // Ensure that the lexer thinks it is inside a directive, so that end \n will
   // return an EOM token.
@@ -214,16 +214,15 @@
 /// that are part of that.
 unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
                                    const SourceManager &SM) {
-  // If this comes from a macro expansion, we really do want the macro name, not
-  // the token this macro expanded to.
-  Loc = SM.getInstantiationLoc(Loc);
-  
   // TODO: this could be special cased for common tokens like identifiers, ')',
   // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
   // all obviously single-char tokens.  This could use 
   // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
   // something.
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedFileLoc(Loc);
+
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedInstantiationLoc(Loc);
   std::pair<const char *,const char *> Buffer = SM.getBufferData(LocInfo.first);
   const char *StrData = Buffer.first+LocInfo.second;
 
@@ -310,10 +309,11 @@
 /// path of the hot getSourceLocation method.  Do not allow it to be inlined.
 static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
                                         SourceLocation FileLoc,
-                                        unsigned CharNo) DISABLE_INLINE;
+                                        unsigned CharNo,
+                                        unsigned TokLen) DISABLE_INLINE;
 static SourceLocation GetMappedTokenLoc(Preprocessor &PP,
                                         SourceLocation FileLoc,
-                                        unsigned CharNo) {
+                                        unsigned CharNo, unsigned TokLen) {
   // Otherwise, we're lexing "mapped tokens".  This is used for things like
   // _Pragma handling.  Combine the instantiation location of FileLoc with the
   // spelling location.
@@ -324,12 +324,13 @@
   SourceLocation InstLoc = SourceMgr.getInstantiationLoc(FileLoc);
   SourceLocation SpellingLoc = SourceMgr.getSpellingLoc(FileLoc);
   SpellingLoc = SpellingLoc.getFileLocWithOffset(CharNo);
-  return SourceMgr.getInstantiationLoc(SpellingLoc, InstLoc);
+  return SourceMgr.createInstantiationLoc(SpellingLoc, InstLoc, TokLen);
 }
 
 /// getSourceLocation - Return a source location identifier for the specified
 /// offset in the current file.
-SourceLocation Lexer::getSourceLocation(const char *Loc) const {
+SourceLocation Lexer::getSourceLocation(const char *Loc,
+                                        unsigned TokLen) const {
   assert(Loc >= BufferStart && Loc <= BufferEnd &&
          "Location out of range for this buffer!");
 
@@ -342,7 +343,7 @@
   // Otherwise, this is the _Pragma lexer case, which pretends that all of the
   // tokens are lexed from where the _Pragma was defined.
   assert(PP && "This doesn't work on raw lexers");
-  return GetMappedTokenLoc(*PP, FileLoc, CharNo);
+  return GetMappedTokenLoc(*PP, FileLoc, CharNo, TokLen);
 }
 
 /// Diag - Forwarding function for diagnostics.  This translate a source
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 6c4096d..63caafa 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -221,7 +221,7 @@
     
   } else if (MI->getNumTokens() == 1 &&
              isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
-                                           *this)){
+                                           *this)) {
     // Otherwise, if this macro expands into a single trivially-expanded
     // token: expand it now.  This handles common cases like 
     // "#define VAL 42".
@@ -247,7 +247,8 @@
     // Update the tokens location to include both its instantiation and physical
     // locations.
     SourceLocation Loc =
-      SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc);
+      SourceMgr.createInstantiationLoc(Identifier.getLocation(), InstantiateLoc,
+                                       Identifier.getLength());
     Identifier.setLocation(Loc);
     
     // If this is #define X X, we must mark the result as unexpandible.
@@ -480,13 +481,15 @@
       ComputeDATE_TIME(DATELoc, TIMELoc, *this);
     Tok.setKind(tok::string_literal);
     Tok.setLength(strlen("\"Mmm dd yyyy\""));
-    Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation()));
+    Tok.setLocation(SourceMgr.createInstantiationLoc(DATELoc, Tok.getLocation(),
+                                                     Tok.getLength()));
   } else if (II == Ident__TIME__) {
     if (!TIMELoc.isValid())
       ComputeDATE_TIME(DATELoc, TIMELoc, *this);
     Tok.setKind(tok::string_literal);
     Tok.setLength(strlen("\"hh:mm:ss\""));
-    Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation()));
+    Tok.setLocation(SourceMgr.createInstantiationLoc(TIMELoc, Tok.getLocation(),
+                                                     Tok.getLength()));
   } else if (II == Ident__INCLUDE_LEVEL__) {
     Diag(Tok, diag::ext_pp_include_level);
 
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index ec76a29..f6994e0 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -321,7 +321,7 @@
 unsigned PTHManager::getSpelling(SourceLocation Loc, const char *&Buffer) {
   SourceManager &SM = PP->getSourceManager();
   Loc = SM.getSpellingLoc(Loc);
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedFileLoc(Loc);
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
   return getSpelling(LocInfo.first, LocInfo.second, Buffer);
 }
 
@@ -407,8 +407,7 @@
 
 unsigned PTHLexer::getSpelling(SourceLocation Loc, const char *&Buffer) {
   SourceManager &SM = PP->getSourceManager();
-  Loc = SM.getSpellingLoc(Loc);
-  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedFileLoc(Loc);
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedSpellingLoc(Loc);
 
   FileID FID = LocInfo.first;
   unsigned FPos = LocInfo.second;
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index bef81ca..695a536 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -50,7 +50,7 @@
 SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
                                        SourceLocation SourceLoc) {
   // Map the physloc to the specified sourceloc.
-  return SourceMgr.getInstantiationLoc(getToken(Buf, Len), SourceLoc);
+  return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len);
 }
 
 void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index dd5352c..ea4ce66 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -314,8 +314,9 @@
   // that captures all of this.
   if (InstantiateLoc.isValid()) {   // Don't do this for token streams.
     SourceManager &SrcMgr = PP.getSourceManager();
-    Tok.setLocation(SrcMgr.getInstantiationLoc(Tok.getLocation(), 
-                                               InstantiateLoc));
+    Tok.setLocation(SrcMgr.createInstantiationLoc(Tok.getLocation(), 
+                                                  InstantiateLoc,
+                                                  Tok.getLength()));
   }
   
   // If this is the first token, set the lexical properties of the token to
@@ -398,7 +399,7 @@
              "Should be a raw location into scratch buffer");
       SourceManager &SourceMgr = PP.getSourceManager();
       std::pair<FileID, unsigned> LocInfo =
-        SourceMgr.getDecomposedFileLoc(ResultTokLoc);
+        SourceMgr.getDecomposedLoc(ResultTokLoc);
       
       const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first;
       
diff --git a/lib/Rewrite/HTMLRewrite.cpp b/lib/Rewrite/HTMLRewrite.cpp
index 0401466..b5f0fc3 100644
--- a/lib/Rewrite/HTMLRewrite.cpp
+++ b/lib/Rewrite/HTMLRewrite.cpp
@@ -441,8 +441,7 @@
     
     // Ignore tokens whose instantiation location was not the main file.
     SourceLocation LLoc = SourceMgr.getInstantiationLoc(Tok.getLocation());
-    std::pair<FileID, unsigned> LLocInfo = 
-      SourceMgr.getDecomposedFileLoc(LLoc);
+    std::pair<FileID, unsigned> LLocInfo = SourceMgr.getDecomposedLoc(LLoc);
     
     if (LLocInfo.first != FID) {
       PP.Lex(Tok);
diff --git a/lib/Rewrite/Rewriter.cpp b/lib/Rewrite/Rewriter.cpp
index e92bd7d..551fa1d 100644
--- a/lib/Rewrite/Rewriter.cpp
+++ b/lib/Rewrite/Rewriter.cpp
@@ -151,7 +151,7 @@
 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc,
                                               FileID &FID) const {
   assert(Loc.isValid() && "Invalid location");
-  std::pair<FileID,unsigned> V = SourceMgr->getDecomposedFileLoc(Loc);
+  std::pair<FileID,unsigned> V = SourceMgr->getDecomposedLoc(Loc);
   FID = V.first;
   return V.second;
 }