blob: f479b94e5d6ecd24a9a0d91e62a67c71ea2362cb [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SOURCEMANAGER_H
15#define LLVM_CLANG_SOURCEMANAGER_H
16
17#include "clang/Basic/SourceLocation.h"
18#include <vector>
19#include <map>
20#include <list>
21#include <cassert>
22
23namespace llvm {
24class MemoryBuffer;
25}
26
27namespace clang {
28
29class SourceManager;
30class FileEntry;
31class IdentifierTokenInfo;
32
33/// SrcMgr - Private classes that are part of the SourceManager implementation.
34///
35namespace SrcMgr {
36 /// FileInfo - Once instance of this struct is kept for every file loaded or
37 /// used. This object owns the MemoryBuffer object.
38 struct FileInfo {
39 /// Buffer - The actual buffer containing the characters from the input
40 /// file.
41 const llvm::MemoryBuffer *Buffer;
42
43 /// SourceLineCache - A new[]'d array of offsets for each source line. This
44 /// is lazily computed.
45 ///
46 unsigned *SourceLineCache;
47
48 /// NumLines - The number of lines in this FileInfo. This is only valid if
49 /// SourceLineCache is non-null.
50 unsigned NumLines;
51 };
52
53 typedef std::pair<const FileEntry * const, FileInfo> InfoRec;
54
55 /// FileIDInfo - Information about a FileID, basically just the logical file
56 /// that it represents and include stack information. A File SourceLocation
57 /// is a byte offset from the start of this.
58 ///
59 /// FileID's are used to compute the location of a character in memory as well
60 /// as the logical source location, which can be differ from the physical
61 /// location. It is different when #line's are active or when macros have
62 /// been expanded.
63 ///
64 /// Each FileID has include stack information, indicating where it came from.
65 /// For the primary translation unit, it comes from SourceLocation() aka 0.
66 /// This information encodes the #include chain that a token was instantiated
67 /// from.
68 ///
69 /// FileIDInfos contain a "InfoRec *", describing the source file, and a Chunk
70 /// number, which allows a SourceLocation to index into very large files
71 /// (those which there are not enough FilePosBits to address).
72 ///
73 struct FileIDInfo {
74 private:
75 /// IncludeLoc - The location of the #include that brought in this file.
76 /// This SourceLocation object has an invalid SLOC for the main file.
77 SourceLocation IncludeLoc;
78
79 /// ChunkNo - Really large buffers are broken up into chunks that are
80 /// each (1 << SourceLocation::FilePosBits) in size. This specifies the
81 /// chunk number of this FileID.
82 unsigned ChunkNo;
83
84 /// FileInfo - Information about the source buffer itself.
85 ///
86 const InfoRec *Info;
87 public:
88
89 /// get - Return a FileIDInfo object.
90 static FileIDInfo get(SourceLocation IL, unsigned CN, const InfoRec *Inf) {
91 FileIDInfo X;
92 X.IncludeLoc = IL;
93 X.ChunkNo = CN;
94 X.Info = Inf;
95 return X;
96 }
97
98 SourceLocation getIncludeLoc() const { return IncludeLoc; }
99 unsigned getChunkNo() const { return ChunkNo; }
100 const InfoRec *getInfo() const { return Info; }
101 };
102
103 /// MacroIDInfo - Macro SourceLocations refer to these records by their ID.
104 /// Each MacroIDInfo encodes the Instantiation location - where the macro was
105 /// instantiated, and the PhysicalLoc - where the actual character data for
106 /// the token came from. An actual macro SourceLocation stores deltas from
107 /// these positions.
108 class MacroIDInfo {
109 SourceLocation InstantiationLoc, PhysicalLoc;
110 public:
111 SourceLocation getInstantiationLoc() const { return InstantiationLoc; }
112 SourceLocation getPhysicalLoc() const { return PhysicalLoc; }
113
114 /// get - Return a MacroID for a macro expansion. IL specifies
115 /// the instantiation location, and PL specifies the physical location
116 /// (where the characters from the token come from). Both IL and PL refer
117 /// to normal File SLocs.
118 static MacroIDInfo get(SourceLocation IL, SourceLocation PL) {
119 MacroIDInfo X;
120 X.InstantiationLoc = IL;
121 X.PhysicalLoc = PL;
122 return X;
123 }
124 };
125} // end SrcMgr namespace.
126
127
128/// SourceManager - This file handles loading and caching of source files into
129/// memory. This object owns the MemoryBuffer objects for all of the loaded
130/// files and assigns unique FileID's for each unique #include chain.
131///
132/// The SourceManager can be queried for information about SourceLocation
133/// objects, turning them into either physical or logical locations. Physical
134/// locations represent where the bytes corresponding to a token came from and
135/// logical locations represent where the location is in the user's view. In
136/// the case of a macro expansion, for example, the physical location indicates
137/// where the expanded token came from and the logical location specifies where
138/// it was expanded. Logical locations are also influenced by #line directives,
139/// etc.
140class SourceManager {
141 /// FileInfos - Memoized information about all of the files tracked by this
142 /// SourceManager.
143 std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos;
144
145 /// MemBufferInfos - Information about various memory buffers that we have
146 /// read in. This is a list, instead of a vector, because we need pointers to
147 /// the FileInfo objects to be stable.
148 std::list<SrcMgr::InfoRec> MemBufferInfos;
149
150 /// FileIDs - Information about each FileID. FileID #0 is not valid, so all
151 /// entries are off by one.
152 std::vector<SrcMgr::FileIDInfo> FileIDs;
153
154 /// MacroIDs - Information about each MacroID.
155 std::vector<SrcMgr::MacroIDInfo> MacroIDs;
156
157 /// LastLineNo - These ivars serve as a cache used in the getLineNumber
158 /// method which is used to speedup getLineNumber calls to nearby locations.
159 unsigned LastLineNoFileIDQuery;
160 SrcMgr::FileInfo *LastLineNoFileInfo;
161 unsigned LastLineNoFilePos;
162 unsigned LastLineNoResult;
163public:
164 SourceManager() : LastLineNoFileIDQuery(~0U) {}
165 ~SourceManager();
166
167 void clearIDTables() {
168 FileIDs.clear();
169 MacroIDs.clear();
170 LastLineNoFileIDQuery = ~0U;
171 LastLineNoFileInfo = 0;
172 }
173
174 /// createFileID - Create a new FileID that represents the specified file
175 /// being #included from the specified IncludePosition. This returns 0 on
176 /// error and translates NULL into standard input.
177 unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){
178 const SrcMgr::InfoRec *IR = getInfoRec(SourceFile);
179 if (IR == 0) return 0; // Error opening file?
180 return createFileID(IR, IncludePos);
181 }
182
183 /// createFileIDForMemBuffer - Create a new FileID that represents the
184 /// specified memory buffer. This does no caching of the buffer and takes
185 /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once.
186 unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
187 return createFileID(createMemBufferInfoRec(Buffer), SourceLocation());
188 }
189
190 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
191 /// that a token at Loc should actually be referenced from InstantiationLoc.
192 SourceLocation getInstantiationLoc(SourceLocation Loc,
193 SourceLocation InstantiationLoc);
194
195 /// getBuffer - Return the buffer for the specified FileID.
196 ///
197 const llvm::MemoryBuffer *getBuffer(unsigned FileID) const {
198 return getFileInfo(FileID)->Buffer;
199 }
200
201 /// getIncludeLoc - Return the location of the #include for the specified
202 /// SourceLocation. If this is a macro expansion, this transparently figures
203 /// out which file includes the file being expanded into.
204 SourceLocation getIncludeLoc(SourceLocation ID) const {
205 return getFIDInfo(getLogicalLoc(ID).getFileID())->getIncludeLoc();
206 }
207
208 /// getCharacterData - Return a pointer to the start of the specified location
209 /// in the appropriate MemoryBuffer.
210 const char *getCharacterData(SourceLocation SL) const;
211
212 /// getColumnNumber - Return the column # for the specified file position.
213 /// This is significantly cheaper to compute than the line number. This
214 /// returns zero if the column number isn't known. This may only be called on
215 /// a file sloc, so you must choose a physical or logical location before
216 /// calling this method.
217 unsigned getColumnNumber(SourceLocation Loc) const;
218
219 unsigned getPhysicalColumnNumber(SourceLocation Loc) const {
220 return getColumnNumber(getPhysicalLoc(Loc));
221 }
222 unsigned getLogicalColumnNumber(SourceLocation Loc) const {
223 return getColumnNumber(getLogicalLoc(Loc));
224 }
225
226
227 /// getLineNumber - Given a SourceLocation, return the physical line number
228 /// for the position indicated. This requires building and caching a table of
229 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
230 /// about to emit a diagnostic.
231 unsigned getLineNumber(SourceLocation Loc);
232
233 unsigned getLogicalLineNumber(SourceLocation Loc) {
234 return getLineNumber(getLogicalLoc(Loc));
235 }
236 unsigned getPhysicalLineNumber(SourceLocation Loc) {
237 return getLineNumber(getPhysicalLoc(Loc));
238 }
239
240 /// getSourceName - This method returns the name of the file or buffer that
241 /// the SourceLocation specifies. This can be modified with #line directives,
242 /// etc.
Chris Lattner37f041172007-08-30 05:59:30 +0000243 const char *getSourceName(SourceLocation Loc) const;
Chris Lattner4b009652007-07-25 00:24:17 +0000244
245 /// Given a SourceLocation object, return the logical location referenced by
246 /// the ID. This logical location is subject to #line directives, etc.
247 SourceLocation getLogicalLoc(SourceLocation Loc) const {
248 // File locations are both physical and logical.
249 if (Loc.isFileID()) return Loc;
250
251 SourceLocation ILoc = MacroIDs[Loc.getMacroID()].getInstantiationLoc();
252 return ILoc.getFileLocWithOffset(Loc.getMacroLogOffs());
253 }
254
255 /// getPhysicalLoc - Given a SourceLocation object, return the physical
256 /// location referenced by the ID.
257 SourceLocation getPhysicalLoc(SourceLocation Loc) const {
258 // File locations are both physical and logical.
259 if (Loc.isFileID()) return Loc;
260
261 SourceLocation PLoc = MacroIDs[Loc.getMacroID()].getPhysicalLoc();
262 return PLoc.getFileLocWithOffset(Loc.getMacroPhysOffs());
263 }
264
265 /// getFileEntryForLoc - Return the FileEntry record for the physloc of the
266 /// specified SourceLocation, if one exists.
267 const FileEntry *getFileEntryForLoc(SourceLocation Loc) const {
268 Loc = getPhysicalLoc(Loc);
269 unsigned FileID = Loc.getFileID();
270 assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
271 return FileIDs[FileID-1].getInfo()->first;
272 }
273
274 /// PrintStats - Print statistics to stderr.
275 ///
276 void PrintStats() const;
277private:
278 /// createFileID - Create a new fileID for the specified InfoRec and include
279 /// position. This works regardless of whether the InfoRec corresponds to a
280 /// file or some other input source.
281 unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos);
282
283 /// getInfoRec - Create or return a cached FileInfo for the specified file.
284 /// This returns null on failure.
285 const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile);
286
287 /// createMemBufferInfoRec - Create a new info record for the specified memory
288 /// buffer. This does no caching.
289 const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf);
290
291 const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const {
292 assert(FileID-1 < FileIDs.size() && "Invalid FileID!");
293 return &FileIDs[FileID-1];
294 }
295
296 const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const {
297 return getInfoRec(getFIDInfo(FileID));
298 }
299
300 SrcMgr::FileInfo *getFileInfo(unsigned FileID) const {
301 if (const SrcMgr::InfoRec *IR = getInfoRec(FileID))
302 return const_cast<SrcMgr::FileInfo *>(&IR->second);
303 return 0;
304 }
305
306 /// Return the InfoRec structure for the specified FileID. This is always the
307 /// physical reference for the ID.
308 const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const {
309 return FIDInfo->getInfo();
310 }
311
312
313 /// getFullFilePos - This (efficient) method returns the offset from the start
314 /// of the file that the specified physical SourceLocation represents. This
315 /// returns the location of the physical character data, not the logical file
316 /// position.
317 unsigned getFullFilePos(SourceLocation PhysLoc) const {
318 // TODO: Add a flag "is first chunk" to SLOC.
319 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(PhysLoc.getFileID());
320
321 // If this file has been split up into chunks, factor in the chunk number
322 // that the FileID references.
323 unsigned ChunkNo = FIDInfo->getChunkNo();
324 return PhysLoc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits);
325 }
326};
327
328
329} // end namespace clang
330
331#endif