blob: 672a2c18b547ada2c006fb203b014f8ba0dad1bc [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
Ted Kremenekdd364ea2007-10-30 21:08:08 +000018#include "llvm/Bitcode/SerializationFwd.h"
Chris Lattner4b009652007-07-25 00:24:17 +000019#include <vector>
Ted Kremenekdd364ea2007-10-30 21:08:08 +000020#include <set>
Chris Lattner4b009652007-07-25 00:24:17 +000021#include <list>
22#include <cassert>
23
24namespace llvm {
25class MemoryBuffer;
26}
27
28namespace clang {
29
30class SourceManager;
31class FileEntry;
32class IdentifierTokenInfo;
33
34/// SrcMgr - Private classes that are part of the SourceManager implementation.
35///
36namespace SrcMgr {
Ted Kremenekdd364ea2007-10-30 21:08:08 +000037 /// ContentCache - Once instance of this struct is kept for every file
38 /// loaded or used. This object owns the MemoryBuffer object.
39 struct ContentCache {
40 /// Reference to the file entry. This reference does not own
41 /// the FileEntry object. It is possible for this to be NULL if
42 /// the ContentCache encapsulates an imaginary text buffer.
43 const FileEntry* Entry;
44
Chris Lattner4b009652007-07-25 00:24:17 +000045 /// Buffer - The actual buffer containing the characters from the input
Ted Kremenekdd364ea2007-10-30 21:08:08 +000046 /// file. This is owned by the FileInfo object.
47 const llvm::MemoryBuffer* Buffer;
Chris Lattner4b009652007-07-25 00:24:17 +000048
49 /// SourceLineCache - A new[]'d array of offsets for each source line. This
Ted Kremenekdd364ea2007-10-30 21:08:08 +000050 /// is lazily computed. This is owned by the FileInfo object.
51 unsigned* SourceLineCache;
Chris Lattner4b009652007-07-25 00:24:17 +000052
53 /// NumLines - The number of lines in this FileInfo. This is only valid if
54 /// SourceLineCache is non-null.
55 unsigned NumLines;
Ted Kremenekdd364ea2007-10-30 21:08:08 +000056
57 ContentCache(const FileEntry* e = NULL)
58 : Entry(e), Buffer(NULL), SourceLineCache(NULL), NumLines(0) {}
59
60 ~ContentCache();
Ted Kremenek7670cca2007-10-30 22:57:35 +000061
62 /// The copy ctor does not allow copies where source object has either
63 /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory
64 /// is not transfered, so this is a logical error.
65 ContentCache(const ContentCache& RHS) : Buffer(NULL),SourceLineCache(NULL) {
66 Entry = RHS.Entry;
67
68 assert (RHS.Buffer == NULL && RHS.SourceLineCache == NULL
69 && "Passed ContentCache object cannot own a buffer.");
70
71 NumLines = RHS.NumLines;
72 }
73
74 private:
75 // Disable assignments.
76 ContentCache& operator=(const ContentCache& RHS);
Ted Kremenekdd364ea2007-10-30 21:08:08 +000077 };
Chris Lattner4b009652007-07-25 00:24:17 +000078
79 /// FileIDInfo - Information about a FileID, basically just the logical file
80 /// that it represents and include stack information. A File SourceLocation
81 /// is a byte offset from the start of this.
82 ///
83 /// FileID's are used to compute the location of a character in memory as well
84 /// as the logical source location, which can be differ from the physical
85 /// location. It is different when #line's are active or when macros have
86 /// been expanded.
87 ///
88 /// Each FileID has include stack information, indicating where it came from.
89 /// For the primary translation unit, it comes from SourceLocation() aka 0.
90 /// This information encodes the #include chain that a token was instantiated
91 /// from.
92 ///
Ted Kremenek7670cca2007-10-30 22:57:35 +000093 /// FileIDInfos contain a "ContentCache *", describing the source file,
94 /// and a Chunk number, which allows a SourceLocation to index into very
95 /// large files (those which there are not enough FilePosBits to address).
Chris Lattner4b009652007-07-25 00:24:17 +000096 ///
97 struct FileIDInfo {
98 private:
99 /// IncludeLoc - The location of the #include that brought in this file.
100 /// This SourceLocation object has an invalid SLOC for the main file.
101 SourceLocation IncludeLoc;
102
103 /// ChunkNo - Really large buffers are broken up into chunks that are
104 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the
105 /// chunk number of this FileID.
106 unsigned ChunkNo;
107
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000108 /// Content - Information about the source buffer itself.
109 const ContentCache* Content;
Chris Lattner4b009652007-07-25 00:24:17 +0000110
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000111 public:
Chris Lattner4b009652007-07-25 00:24:17 +0000112 /// get - Return a FileIDInfo object.
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000113 static FileIDInfo get(SourceLocation IL, unsigned CN,
114 const ContentCache *Con) {
Chris Lattner4b009652007-07-25 00:24:17 +0000115 FileIDInfo X;
116 X.IncludeLoc = IL;
117 X.ChunkNo = CN;
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000118 X.Content = Con;
Chris Lattner4b009652007-07-25 00:24:17 +0000119 return X;
120 }
121
122 SourceLocation getIncludeLoc() const { return IncludeLoc; }
123 unsigned getChunkNo() const { return ChunkNo; }
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000124 const ContentCache* getContentCache() const { return Content; }
Chris Lattner4b009652007-07-25 00:24:17 +0000125 };
126
127 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
128 /// Each MacroIDInfo encodes the Instantiation location - where the macro was
129 /// instantiated, and the PhysicalLoc - where the actual character data for
130 /// the token came from. An actual macro SourceLocation stores deltas from
131 /// these positions.
132 class MacroIDInfo {
Chris Lattnerb8a39d92007-11-09 23:59:17 +0000133 SourceLocation VirtualLoc, PhysicalLoc;
Chris Lattner4b009652007-07-25 00:24:17 +0000134 public:
Chris Lattnerb8a39d92007-11-09 23:59:17 +0000135 SourceLocation getVirtualLoc() const { return VirtualLoc; }
Chris Lattner4b009652007-07-25 00:24:17 +0000136 SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
137
Chris Lattnerb8a39d92007-11-09 23:59:17 +0000138 /// get - Return a MacroID for a macro expansion. VL specifies
139 /// the instantiation location (where the macro is expanded), and PL
140 /// specifies the physical location (where the characters from the token
141 /// come from). Both VL and PL refer to normal File SLocs.
142 static MacroIDInfo get(SourceLocation VL, SourceLocation PL) {
Chris Lattner4b009652007-07-25 00:24:17 +0000143 MacroIDInfo X;
Chris Lattnerb8a39d92007-11-09 23:59:17 +0000144 X.VirtualLoc = VL;
Chris Lattner4b009652007-07-25 00:24:17 +0000145 X.PhysicalLoc = PL;
146 return X;
147 }
148 };
149} // end SrcMgr namespace.
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000150} // end clang namespace
Chris Lattner4b009652007-07-25 00:24:17 +0000151
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000152namespace std {
153template <> struct less<clang::SrcMgr::ContentCache> {
154 inline bool operator()(const clang::SrcMgr::ContentCache& L,
155 const clang::SrcMgr::ContentCache& R) const {
156 return L.Entry < R.Entry;
157 }
158};
159} // end std namespace
Chris Lattner4b009652007-07-25 00:24:17 +0000160
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000161namespace clang {
162
Chris Lattner4b009652007-07-25 00:24:17 +0000163/// SourceManager - This file handles loading and caching of source files into
164/// memory. This object owns the MemoryBuffer objects for all of the loaded
165/// files and assigns unique FileID's for each unique #include chain.
166///
167/// The SourceManager can be queried for information about SourceLocation
168/// objects, turning them into either physical or logical locations. Physical
169/// locations represent where the bytes corresponding to a token came from and
170/// logical locations represent where the location is in the user's view. In
171/// the case of a macro expansion, for example, the physical location indicates
172/// where the expanded token came from and the logical location specifies where
173/// it was expanded. Logical locations are also influenced by #line directives,
174/// etc.
175class SourceManager {
176 /// FileInfos - Memoized information about all of the files tracked by this
Ted Kremenek7670cca2007-10-30 22:57:35 +0000177 /// SourceManager. This set allows us to merge ContentCache entries based
178 /// on their FileEntry*. All ContentCache objects will thus have unique,
179 /// non-null, FileEntry pointers.
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000180 std::set<SrcMgr::ContentCache> FileInfos;
Chris Lattner4b009652007-07-25 00:24:17 +0000181
182 /// MemBufferInfos - Information about various memory buffers that we have
183 /// read in. This is a list, instead of a vector, because we need pointers to
Ted Kremenek7670cca2007-10-30 22:57:35 +0000184 /// the FileInfo objects to be stable. All FileEntry* within the
185 /// stored ContentCache objects are NULL, as they do not refer to a file.
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000186 std::list<SrcMgr::ContentCache> MemBufferInfos;
Chris Lattner4b009652007-07-25 00:24:17 +0000187
188 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all
189 /// entries are off by one.
190 std::vector<SrcMgr::FileIDInfo> FileIDs;
191
192 /// MacroIDs - Information about each MacroID.
193 std::vector<SrcMgr::MacroIDInfo> MacroIDs;
194
195 /// LastLineNo - These ivars serve as a cache used in the getLineNumber
196 /// method which is used to speedup getLineNumber calls to nearby locations.
197 unsigned LastLineNoFileIDQuery;
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000198 SrcMgr::ContentCache *LastLineNoContentCache;
Chris Lattner4b009652007-07-25 00:24:17 +0000199 unsigned LastLineNoFilePos;
200 unsigned LastLineNoResult;
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000201
Chris Lattner4b009652007-07-25 00:24:17 +0000202public:
203 SourceManager() : LastLineNoFileIDQuery(~0U) {}
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000204 ~SourceManager() {}
Chris Lattner4b009652007-07-25 00:24:17 +0000205
206 void clearIDTables() {
207 FileIDs.clear();
208 MacroIDs.clear();
209 LastLineNoFileIDQuery = ~0U;
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000210 LastLineNoContentCache = 0;
Chris Lattner4b009652007-07-25 00:24:17 +0000211 }
212
213 /// createFileID - Create a new FileID that represents the specified file
214 /// being #included from the specified IncludePosition. This returns 0 on
215 /// error and translates NULL into standard input.
216 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000217 const SrcMgr::ContentCache *IR = getContentCache(SourceFile);
Chris Lattner4b009652007-07-25 00:24:17 +0000218 if (IR == 0) return 0; // Error opening file?
219 return createFileID(IR, IncludePos);
220 }
221
222 /// createFileIDForMemBuffer - Create a new FileID that represents the
223 /// specified memory buffer. This does no caching of the buffer and takes
224 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
225 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000226 return createFileID(createMemBufferContentCache(Buffer), SourceLocation());
Chris Lattner4b009652007-07-25 00:24:17 +0000227 }
228
229 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
230 /// that a token at Loc should actually be referenced from InstantiationLoc.
231 SourceLocation getInstantiationLoc(SourceLocation Loc,
232 SourceLocation InstantiationLoc);
233
234 /// getBuffer - Return the buffer for the specified FileID.
235 ///
236 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000237 return getContentCache(FileID)->Buffer;
Chris Lattner4b009652007-07-25 00:24:17 +0000238 }
239
Chris Lattner569faa62007-10-11 18:38:32 +0000240 /// getBufferData - Return a pointer to the start and end of the character
241 /// data for the specified FileID.
242 std::pair<const char*, const char*> getBufferData(unsigned FileID) const;
243
Chris Lattner4b009652007-07-25 00:24:17 +0000244 /// getIncludeLoc - Return the location of the #include for the specified
245 /// SourceLocation. If this is a macro expansion, this transparently figures
246 /// out which file includes the file being expanded into.
247 SourceLocation getIncludeLoc(SourceLocation ID) const {
248 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
249 }
250
251 /// getCharacterData - Return a pointer to the start of the specified location
252 /// in the appropriate MemoryBuffer.
253 const char *getCharacterData(SourceLocation SL) const;
254
255 /// getColumnNumber - Return the column # for the specified file position.
256 /// This is significantly cheaper to compute than the line number. This
257 /// returns zero if the column number isn't known. This may only be called on
258 /// a file sloc, so you must choose a physical or logical location before
259 /// calling this method.
260 unsigned getColumnNumber(SourceLocation Loc) const;
261
262 unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
263 return getColumnNumber(getPhysicalLoc(Loc));
264 }
265 unsigned getLogicalColumnNumber(SourceLocation Loc) const {
266 return getColumnNumber(getLogicalLoc(Loc));
267 }
268
269
270 /// getLineNumber - Given a SourceLocation, return the physical line number
271 /// for the position indicated. This requires building and caching a table of
272 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
273 /// about to emit a diagnostic.
274 unsigned getLineNumber(SourceLocation Loc);
275
276 unsigned getLogicalLineNumber(SourceLocation Loc) {
277 return getLineNumber(getLogicalLoc(Loc));
278 }
279 unsigned getPhysicalLineNumber(SourceLocation Loc) {
280 return getLineNumber(getPhysicalLoc(Loc));
281 }
282
283 /// getSourceName - This method returns the name of the file or buffer that
284 /// the SourceLocation specifies. This can be modified with #line directives,
285 /// etc.
Chris Lattner37f041172007-08-30 05:59:30 +0000286 const char *getSourceName(SourceLocation Loc) const;
Chris Lattner4b009652007-07-25 00:24:17 +0000287
288 /// Given a SourceLocation object, return the logical location referenced by
289 /// the ID. This logical location is subject to #line directives, etc.
290 SourceLocation getLogicalLoc(SourceLocation Loc) const {
291 // File locations are both physical and logical.
292 if (Loc.isFileID()) return Loc;
293
Chris Lattnerb8a39d92007-11-09 23:59:17 +0000294 return MacroIDs[Loc.getMacroID()].getVirtualLoc();
Chris Lattner4b009652007-07-25 00:24:17 +0000295 }
296
297 /// getPhysicalLoc - Given a SourceLocation object, return the physical
298 /// location referenced by the ID.
299 SourceLocation getPhysicalLoc(SourceLocation Loc) const {
300 // File locations are both physical and logical.
301 if (Loc.isFileID()) return Loc;
302
303 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
304 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
305 }
306
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000307 /// getContentCacheForLoc - Return the ContentCache for the physloc of the
Chris Lattner4b009652007-07-25 00:24:17 +0000308 /// specified SourceLocation, if one exists.
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000309 const SrcMgr::ContentCache* getContentCacheForLoc(SourceLocation Loc) const {
Chris Lattner4b009652007-07-25 00:24:17 +0000310 Loc = getPhysicalLoc(Loc);
311 unsigned FileID = Loc.getFileID();
312 assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000313 return FileIDs[FileID-1].getContentCache();
314 }
315
316 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
317 /// specified SourceLocation, if one exists.
318 const FileEntry* getFileEntryForLoc(SourceLocation Loc) const {
319 return getContentCacheForLoc(Loc)->Entry;
Chris Lattner4b009652007-07-25 00:24:17 +0000320 }
321
Chris Lattner136df562007-10-12 20:24:19 +0000322 /// getDecomposedFileLoc - Decompose the specified file location into a raw
323 /// FileID + Offset pair. The first element is the FileID, the second is the
324 /// offset from the start of the buffer of the location.
325 std::pair<unsigned, unsigned> getDecomposedFileLoc(SourceLocation Loc) const {
326 assert(Loc.isFileID() && "Isn't a File SourceLocation");
327
328 // TODO: Add a flag "is first chunk" to SLOC.
329 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
330
331 // If this file has been split up into chunks, factor in the chunk number
332 // that the FileID references.
333 unsigned ChunkNo = FIDInfo->getChunkNo();
334 unsigned Offset = Loc.getRawFilePos();
335 Offset += (ChunkNo << SourceLocation::FilePosBits);
336
337 return std::pair<unsigned,unsigned>(Loc.getFileID()-ChunkNo, Offset);
338 }
339
Chris Lattner4b009652007-07-25 00:24:17 +0000340 /// PrintStats - Print statistics to stderr.
341 ///
342 void PrintStats() const;
Chris Lattner4b009652007-07-25 00:24:17 +0000343
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000344private:
345 /// createFileID - Create a new fileID for the specified ContentCache and
346 /// include position. This works regardless of whether the ContentCache
347 /// corresponds to a file or some other input source.
348 unsigned createFileID(const SrcMgr::ContentCache* File,
349 SourceLocation IncludePos);
350
351 /// getContentCache - Create or return a cached ContentCache for the specified
352 /// file. This returns null on failure.
353 const SrcMgr::ContentCache* getContentCache(const FileEntry* SourceFile);
354
355 /// createMemBufferContentCache - Create a new ContentCache for the specified
356 /// memory buffer.
357 const SrcMgr::ContentCache*
358 createMemBufferContentCache(const llvm::MemoryBuffer* Buf);
359
360 const SrcMgr::FileIDInfo* getFIDInfo(unsigned FileID) const {
Chris Lattner4b009652007-07-25 00:24:17 +0000361 assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
362 return &FileIDs[FileID-1];
363 }
364
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000365 const SrcMgr::ContentCache *getContentCache(unsigned FileID) const {
366 return getContentCache(getFIDInfo(FileID));
Chris Lattner4b009652007-07-25 00:24:17 +0000367 }
368
Ted Kremenekdd364ea2007-10-30 21:08:08 +0000369 /// Return the ContentCache structure for the specified FileID.
370 /// This is always the physical reference for the ID.
371 const SrcMgr::ContentCache*
372 getContentCache(const SrcMgr::FileIDInfo* FIDInfo) const {
373 return FIDInfo->getContentCache();
374 }
Chris Lattner4b009652007-07-25 00:24:17 +0000375
376 /// getFullFilePos - This (efficient) method returns the offset from the start
377 /// of the file that the specified physical SourceLocation represents. This
378 /// returns the location of the physical character data, not the logical file
379 /// position.
380 unsigned getFullFilePos(SourceLocation PhysLoc) const {
Chris Lattner136df562007-10-12 20:24:19 +0000381 return getDecomposedFileLoc(PhysLoc).second;
Chris Lattner4b009652007-07-25 00:24:17 +0000382 }
383};
384
385
386} // end namespace clang
387
388#endif