Chris Lattner | 4b00965 | 2007-07-25 00:24:17 +0000 | [diff] [blame] | 1 | //===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by Chris Lattner and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines the SourceManager interface. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_CLANG_SOURCEMANAGER_H |
| 15 | #define LLVM_CLANG_SOURCEMANAGER_H |
| 16 | |
| 17 | #include "clang/Basic/SourceLocation.h" |
| 18 | #include <vector> |
| 19 | #include <map> |
| 20 | #include <list> |
| 21 | #include <cassert> |
| 22 | |
| 23 | namespace llvm { |
| 24 | class MemoryBuffer; |
| 25 | } |
| 26 | |
| 27 | namespace clang { |
| 28 | |
| 29 | class SourceManager; |
| 30 | class FileEntry; |
| 31 | class IdentifierTokenInfo; |
| 32 | |
| 33 | /// SrcMgr - Private classes that are part of the SourceManager implementation. |
| 34 | /// |
| 35 | namespace SrcMgr { |
| 36 | /// FileInfo - Once instance of this struct is kept for every file loaded or |
| 37 | /// used. This object owns the MemoryBuffer object. |
| 38 | struct FileInfo { |
| 39 | /// Buffer - The actual buffer containing the characters from the input |
| 40 | /// file. |
| 41 | const llvm::MemoryBuffer *Buffer; |
| 42 | |
| 43 | /// SourceLineCache - A new[]'d array of offsets for each source line. This |
| 44 | /// is lazily computed. |
| 45 | /// |
| 46 | unsigned *SourceLineCache; |
| 47 | |
| 48 | /// NumLines - The number of lines in this FileInfo. This is only valid if |
| 49 | /// SourceLineCache is non-null. |
| 50 | unsigned NumLines; |
| 51 | }; |
| 52 | |
| 53 | typedef std::pair<const FileEntry * const, FileInfo> InfoRec; |
| 54 | |
| 55 | /// FileIDInfo - Information about a FileID, basically just the logical file |
| 56 | /// that it represents and include stack information. A File SourceLocation |
| 57 | /// is a byte offset from the start of this. |
| 58 | /// |
| 59 | /// FileID's are used to compute the location of a character in memory as well |
| 60 | /// as the logical source location, which can be differ from the physical |
| 61 | /// location. It is different when #line's are active or when macros have |
| 62 | /// been expanded. |
| 63 | /// |
| 64 | /// Each FileID has include stack information, indicating where it came from. |
| 65 | /// For the primary translation unit, it comes from SourceLocation() aka 0. |
| 66 | /// This information encodes the #include chain that a token was instantiated |
| 67 | /// from. |
| 68 | /// |
| 69 | /// FileIDInfos contain a "InfoRec *", describing the source file, and a Chunk |
| 70 | /// number, which allows a SourceLocation to index into very large files |
| 71 | /// (those which there are not enough FilePosBits to address). |
| 72 | /// |
| 73 | struct FileIDInfo { |
| 74 | private: |
| 75 | /// IncludeLoc - The location of the #include that brought in this file. |
| 76 | /// This SourceLocation object has an invalid SLOC for the main file. |
| 77 | SourceLocation IncludeLoc; |
| 78 | |
| 79 | /// ChunkNo - Really large buffers are broken up into chunks that are |
| 80 | /// each (1 << SourceLocation::FilePosBits) in size. This specifies the |
| 81 | /// chunk number of this FileID. |
| 82 | unsigned ChunkNo; |
| 83 | |
| 84 | /// FileInfo - Information about the source buffer itself. |
| 85 | /// |
| 86 | const InfoRec *Info; |
| 87 | public: |
| 88 | |
| 89 | /// get - Return a FileIDInfo object. |
| 90 | static FileIDInfo get(SourceLocation IL, unsigned CN, const InfoRec *Inf) { |
| 91 | FileIDInfo X; |
| 92 | X.IncludeLoc = IL; |
| 93 | X.ChunkNo = CN; |
| 94 | X.Info = Inf; |
| 95 | return X; |
| 96 | } |
| 97 | |
| 98 | SourceLocation getIncludeLoc() const { return IncludeLoc; } |
| 99 | unsigned getChunkNo() const { return ChunkNo; } |
| 100 | const InfoRec *getInfo() const { return Info; } |
| 101 | }; |
| 102 | |
| 103 | /// MacroIDInfo - Macro SourceLocations refer to these records by their ID. |
| 104 | /// Each MacroIDInfo encodes the Instantiation location - where the macro was |
| 105 | /// instantiated, and the PhysicalLoc - where the actual character data for |
| 106 | /// the token came from. An actual macro SourceLocation stores deltas from |
| 107 | /// these positions. |
| 108 | class MacroIDInfo { |
| 109 | SourceLocation InstantiationLoc, PhysicalLoc; |
| 110 | public: |
| 111 | SourceLocation getInstantiationLoc() const { return InstantiationLoc; } |
| 112 | SourceLocation getPhysicalLoc() const { return PhysicalLoc; } |
| 113 | |
| 114 | /// get - Return a MacroID for a macro expansion. IL specifies |
| 115 | /// the instantiation location, and PL specifies the physical location |
| 116 | /// (where the characters from the token come from). Both IL and PL refer |
| 117 | /// to normal File SLocs. |
| 118 | static MacroIDInfo get(SourceLocation IL, SourceLocation PL) { |
| 119 | MacroIDInfo X; |
| 120 | X.InstantiationLoc = IL; |
| 121 | X.PhysicalLoc = PL; |
| 122 | return X; |
| 123 | } |
| 124 | }; |
| 125 | } // end SrcMgr namespace. |
| 126 | |
| 127 | |
| 128 | /// SourceManager - This file handles loading and caching of source files into |
| 129 | /// memory. This object owns the MemoryBuffer objects for all of the loaded |
| 130 | /// files and assigns unique FileID's for each unique #include chain. |
| 131 | /// |
| 132 | /// The SourceManager can be queried for information about SourceLocation |
| 133 | /// objects, turning them into either physical or logical locations. Physical |
| 134 | /// locations represent where the bytes corresponding to a token came from and |
| 135 | /// logical locations represent where the location is in the user's view. In |
| 136 | /// the case of a macro expansion, for example, the physical location indicates |
| 137 | /// where the expanded token came from and the logical location specifies where |
| 138 | /// it was expanded. Logical locations are also influenced by #line directives, |
| 139 | /// etc. |
| 140 | class SourceManager { |
| 141 | /// FileInfos - Memoized information about all of the files tracked by this |
| 142 | /// SourceManager. |
| 143 | std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos; |
| 144 | |
| 145 | /// MemBufferInfos - Information about various memory buffers that we have |
| 146 | /// read in. This is a list, instead of a vector, because we need pointers to |
| 147 | /// the FileInfo objects to be stable. |
| 148 | std::list<SrcMgr::InfoRec> MemBufferInfos; |
| 149 | |
| 150 | /// FileIDs - Information about each FileID. FileID #0 is not valid, so all |
| 151 | /// entries are off by one. |
| 152 | std::vector<SrcMgr::FileIDInfo> FileIDs; |
| 153 | |
| 154 | /// MacroIDs - Information about each MacroID. |
| 155 | std::vector<SrcMgr::MacroIDInfo> MacroIDs; |
| 156 | |
| 157 | /// LastLineNo - These ivars serve as a cache used in the getLineNumber |
| 158 | /// method which is used to speedup getLineNumber calls to nearby locations. |
| 159 | unsigned LastLineNoFileIDQuery; |
| 160 | SrcMgr::FileInfo *LastLineNoFileInfo; |
| 161 | unsigned LastLineNoFilePos; |
| 162 | unsigned LastLineNoResult; |
| 163 | public: |
| 164 | SourceManager() : LastLineNoFileIDQuery(~0U) {} |
| 165 | ~SourceManager(); |
| 166 | |
| 167 | void clearIDTables() { |
| 168 | FileIDs.clear(); |
| 169 | MacroIDs.clear(); |
| 170 | LastLineNoFileIDQuery = ~0U; |
| 171 | LastLineNoFileInfo = 0; |
| 172 | } |
| 173 | |
| 174 | /// createFileID - Create a new FileID that represents the specified file |
| 175 | /// being #included from the specified IncludePosition. This returns 0 on |
| 176 | /// error and translates NULL into standard input. |
| 177 | unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ |
| 178 | const SrcMgr::InfoRec *IR = getInfoRec(SourceFile); |
| 179 | if (IR == 0) return 0; // Error opening file? |
| 180 | return createFileID(IR, IncludePos); |
| 181 | } |
| 182 | |
| 183 | /// createFileIDForMemBuffer - Create a new FileID that represents the |
| 184 | /// specified memory buffer. This does no caching of the buffer and takes |
| 185 | /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. |
| 186 | unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { |
| 187 | return createFileID(createMemBufferInfoRec(Buffer), SourceLocation()); |
| 188 | } |
| 189 | |
| 190 | /// getInstantiationLoc - Return a new SourceLocation that encodes the fact |
| 191 | /// that a token at Loc should actually be referenced from InstantiationLoc. |
| 192 | SourceLocation getInstantiationLoc(SourceLocation Loc, |
| 193 | SourceLocation InstantiationLoc); |
| 194 | |
| 195 | /// getBuffer - Return the buffer for the specified FileID. |
| 196 | /// |
| 197 | const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { |
| 198 | return getFileInfo(FileID)->Buffer; |
| 199 | } |
| 200 | |
| 201 | /// getIncludeLoc - Return the location of the #include for the specified |
| 202 | /// SourceLocation. If this is a macro expansion, this transparently figures |
| 203 | /// out which file includes the file being expanded into. |
| 204 | SourceLocation getIncludeLoc(SourceLocation ID) const { |
| 205 | return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc(); |
| 206 | } |
| 207 | |
| 208 | /// getCharacterData - Return a pointer to the start of the specified location |
| 209 | /// in the appropriate MemoryBuffer. |
| 210 | const char *getCharacterData(SourceLocation SL) const; |
| 211 | |
| 212 | /// getColumnNumber - Return the column # for the specified file position. |
| 213 | /// This is significantly cheaper to compute than the line number. This |
| 214 | /// returns zero if the column number isn't known. This may only be called on |
| 215 | /// a file sloc, so you must choose a physical or logical location before |
| 216 | /// calling this method. |
| 217 | unsigned getColumnNumber(SourceLocation Loc) const; |
| 218 | |
| 219 | unsigned getPhysicalColumnNumber(SourceLocation Loc) const { |
| 220 | return getColumnNumber(getPhysicalLoc(Loc)); |
| 221 | } |
| 222 | unsigned getLogicalColumnNumber(SourceLocation Loc) const { |
| 223 | return getColumnNumber(getLogicalLoc(Loc)); |
| 224 | } |
| 225 | |
| 226 | |
| 227 | /// getLineNumber - Given a SourceLocation, return the physical line number |
| 228 | /// for the position indicated. This requires building and caching a table of |
| 229 | /// line offsets for the MemoryBuffer, so this is not cheap: use only when |
| 230 | /// about to emit a diagnostic. |
| 231 | unsigned getLineNumber(SourceLocation Loc); |
| 232 | |
| 233 | unsigned getLogicalLineNumber(SourceLocation Loc) { |
| 234 | return getLineNumber(getLogicalLoc(Loc)); |
| 235 | } |
| 236 | unsigned getPhysicalLineNumber(SourceLocation Loc) { |
| 237 | return getLineNumber(getPhysicalLoc(Loc)); |
| 238 | } |
| 239 | |
| 240 | /// getSourceName - This method returns the name of the file or buffer that |
| 241 | /// the SourceLocation specifies. This can be modified with #line directives, |
| 242 | /// etc. |
Chris Lattner | 37f04117 | 2007-08-30 05:59:30 +0000 | [diff] [blame^] | 243 | const char *getSourceName(SourceLocation Loc) const; |
Chris Lattner | 4b00965 | 2007-07-25 00:24:17 +0000 | [diff] [blame] | 244 | |
| 245 | /// Given a SourceLocation object, return the logical location referenced by |
| 246 | /// the ID. This logical location is subject to #line directives, etc. |
| 247 | SourceLocation getLogicalLoc(SourceLocation Loc) const { |
| 248 | // File locations are both physical and logical. |
| 249 | if (Loc.isFileID()) return Loc; |
| 250 | |
| 251 | SourceLocation ILoc = MacroIDs[Loc.getMacroID()].getInstantiationLoc(); |
| 252 | return ILoc.getFileLocWithOffset(Loc.getMacroLogOffs()); |
| 253 | } |
| 254 | |
| 255 | /// getPhysicalLoc - Given a SourceLocation object, return the physical |
| 256 | /// location referenced by the ID. |
| 257 | SourceLocation getPhysicalLoc(SourceLocation Loc) const { |
| 258 | // File locations are both physical and logical. |
| 259 | if (Loc.isFileID()) return Loc; |
| 260 | |
| 261 | SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc(); |
| 262 | return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs()); |
| 263 | } |
| 264 | |
| 265 | /// getFileEntryForLoc - Return the FileEntry record for the physloc of the |
| 266 | /// specified SourceLocation, if one exists. |
| 267 | const FileEntry *getFileEntryForLoc(SourceLocation Loc) const { |
| 268 | Loc = getPhysicalLoc(Loc); |
| 269 | unsigned FileID = Loc.getFileID(); |
| 270 | assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); |
| 271 | return FileIDs[FileID-1].getInfo()->first; |
| 272 | } |
| 273 | |
| 274 | /// PrintStats - Print statistics to stderr. |
| 275 | /// |
| 276 | void PrintStats() const; |
| 277 | private: |
| 278 | /// createFileID - Create a new fileID for the specified InfoRec and include |
| 279 | /// position. This works regardless of whether the InfoRec corresponds to a |
| 280 | /// file or some other input source. |
| 281 | unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos); |
| 282 | |
| 283 | /// getInfoRec - Create or return a cached FileInfo for the specified file. |
| 284 | /// This returns null on failure. |
| 285 | const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile); |
| 286 | |
| 287 | /// createMemBufferInfoRec - Create a new info record for the specified memory |
| 288 | /// buffer. This does no caching. |
| 289 | const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf); |
| 290 | |
| 291 | const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const { |
| 292 | assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); |
| 293 | return &FileIDs[FileID-1]; |
| 294 | } |
| 295 | |
| 296 | const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const { |
| 297 | return getInfoRec(getFIDInfo(FileID)); |
| 298 | } |
| 299 | |
| 300 | SrcMgr::FileInfo *getFileInfo(unsigned FileID) const { |
| 301 | if (const SrcMgr::InfoRec *IR = getInfoRec(FileID)) |
| 302 | return const_cast<SrcMgr::FileInfo *>(&IR->second); |
| 303 | return 0; |
| 304 | } |
| 305 | |
| 306 | /// Return the InfoRec structure for the specified FileID. This is always the |
| 307 | /// physical reference for the ID. |
| 308 | const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const { |
| 309 | return FIDInfo->getInfo(); |
| 310 | } |
| 311 | |
| 312 | |
| 313 | /// getFullFilePos - This (efficient) method returns the offset from the start |
| 314 | /// of the file that the specified physical SourceLocation represents. This |
| 315 | /// returns the location of the physical character data, not the logical file |
| 316 | /// position. |
| 317 | unsigned getFullFilePos(SourceLocation PhysLoc) const { |
| 318 | // TODO: Add a flag "is first chunk" to SLOC. |
| 319 | const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(PhysLoc.getFileID()); |
| 320 | |
| 321 | // If this file has been split up into chunks, factor in the chunk number |
| 322 | // that the FileID references. |
| 323 | unsigned ChunkNo = FIDInfo->getChunkNo(); |
| 324 | return PhysLoc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits); |
| 325 | } |
| 326 | }; |
| 327 | |
| 328 | |
| 329 | } // end namespace clang |
| 330 | |
| 331 | #endif |