blob: f6148c1602b250bf70c245d387472aeeab61f586 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/MemoryBuffer.h"
17#include "llvm/System/Path.h"
18#include <algorithm>
19#include <iostream>
20using namespace clang;
21using namespace SrcMgr;
22using llvm::MemoryBuffer;
23
24SourceManager::~SourceManager() {
25 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
26 E = FileInfos.end(); I != E; ++I) {
27 delete I->second.Buffer;
28 delete[] I->second.SourceLineCache;
29 }
30
31 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
32 E = MemBufferInfos.end(); I != E; ++I) {
33 delete I->second.Buffer;
34 delete[] I->second.SourceLineCache;
35 }
36}
37
38
39// FIXME: REMOVE THESE
40#include <unistd.h>
41#include <sys/types.h>
42#include <sys/uio.h>
43#include <sys/fcntl.h>
44#include <cerrno>
45
46static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
47#if 0
48 // FIXME: Reintroduce this and zap this function once the common llvm stuff
49 // is fast for the small case.
50 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
51 FileEnt->getSize());
52#endif
53
54 // If the file is larger than some threshold, use 'read', otherwise use mmap.
55 if (FileEnt->getSize() >= 4096*4)
56 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
57 0, FileEnt->getSize());
58
59 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
60 FileEnt->getName());
61 char *BufPtr = const_cast<char*>(SB->getBufferStart());
62
63 int FD = ::open(FileEnt->getName(), O_RDONLY);
64 if (FD == -1) {
65 delete SB;
66 return 0;
67 }
68
69 unsigned BytesLeft = FileEnt->getSize();
70 while (BytesLeft) {
71 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
72 if (NumRead != -1) {
73 BytesLeft -= NumRead;
74 BufPtr += NumRead;
75 } else if (errno == EINTR) {
76 // try again
77 } else {
78 // error reading.
79 close(FD);
80 delete SB;
81 return 0;
82 }
83 }
84 close(FD);
85
86 return SB;
87}
88
89
90/// getFileInfo - Create or return a cached FileInfo for the specified file.
91///
92const InfoRec *
93SourceManager::getInfoRec(const FileEntry *FileEnt) {
94 assert(FileEnt && "Didn't specify a file entry to use?");
95 // Do we already have information about this file?
96 std::map<const FileEntry *, FileInfo>::iterator I =
97 FileInfos.lower_bound(FileEnt);
98 if (I != FileInfos.end() && I->first == FileEnt)
99 return &*I;
100
101 // Nope, get information.
102 const MemoryBuffer *File = ReadFileFast(FileEnt);
103 if (File == 0)
104 return 0;
105
106 const InfoRec &Entry =
107 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
108 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
109
110 Info.Buffer = File;
111 Info.SourceLineCache = 0;
112 Info.NumLines = 0;
113 return &Entry;
114}
115
116
117/// createMemBufferInfoRec - Create a new info record for the specified memory
118/// buffer. This does no caching.
119const InfoRec *
120SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
121 // Add a new info record to the MemBufferInfos list and return it.
122 FileInfo FI;
123 FI.Buffer = Buffer;
124 FI.SourceLineCache = 0;
125 FI.NumLines = 0;
126 MemBufferInfos.push_back(InfoRec(0, FI));
127 return &MemBufferInfos.back();
128}
129
130
131/// createFileID - Create a new fileID for the specified InfoRec and include
132/// position. This works regardless of whether the InfoRec corresponds to a
133/// file or some other input source.
134unsigned SourceManager::createFileID(const InfoRec *File,
135 SourceLocation IncludePos) {
136 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
137 // to fit an arbitrary position in the file in the FilePos field. To handle
138 // this, we create one FileID for each chunk of the file that fits in a
139 // FilePos field.
140 unsigned FileSize = File->second.Buffer->getBufferSize();
141 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
142 FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, 0, File));
143 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
144 "Ran out of file ID's!");
145 return FileIDs.size();
146 }
147
148 // Create one FileID for each chunk of the file.
149 unsigned Result = FileIDs.size()+1;
150
151 unsigned ChunkNo = 0;
152 while (1) {
153 FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, ChunkNo++, File));
154
155 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
156 FileSize -= (1 << SourceLocation::FilePosBits);
157 }
158
159 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
160 "Ran out of file ID's!");
161 return Result;
162}
163
164/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
165/// that a token from physloc PhysLoc should actually be referenced from
166/// InstantiationLoc.
167SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
168 SourceLocation InstantLoc) {
169 assert(getFIDInfo(PhysLoc.getFileID())->IDType !=
170 SrcMgr::FileIDInfo::MacroExpansion &&
171 "Location instantiated in a macro?");
172
173 // Resolve InstantLoc down to a real logical location.
174 InstantLoc = getLogicalLoc(InstantLoc);
175
176 unsigned InstantiationFileID;
177 // If this is the same instantiation as was requested last time, return this
178 // immediately.
179 if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
180 InstantLoc == LastInstantiationLoc_InstantLoc) {
181 InstantiationFileID = LastInstantiationLoc_Result;
182 } else {
183 // Add a FileID for this. FIXME: should cache these!
184 FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
185 PhysLoc.getFileID()));
186 InstantiationFileID = FileIDs.size();
187
188 // Remember this in the single-entry cache for next time.
189 LastInstantiationLoc_MacroFID = PhysLoc.getFileID();
190 LastInstantiationLoc_InstantLoc = InstantLoc;
191 LastInstantiationLoc_Result = InstantiationFileID;
192 }
193 return SourceLocation(InstantiationFileID, PhysLoc.getRawFilePos());
194}
195
196
197
198/// getCharacterData - Return a pointer to the start of the specified location
199/// in the appropriate MemoryBuffer.
200const char *SourceManager::getCharacterData(SourceLocation SL) const {
201 // Note that this is a hot function in the getSpelling() path, which is
202 // heavily used by -E mode.
203 unsigned FileID = SL.getFileID();
204 assert(FileID && "Invalid source location!");
205
206 return getFileInfo(FileID)->Buffer->getBufferStart() + getFilePos(SL);
207}
208
209/// getIncludeLoc - Return the location of the #include for the specified
210/// FileID.
211SourceLocation SourceManager::getIncludeLoc(unsigned FileID) const {
212 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(FileID);
213
214 // For Macros, the physical loc is specified by the MacroTokenFileID.
215 if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
216 FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
217
218 return FIDInfo->IncludeLoc;
219}
220
221
222/// getColumnNumber - Return the column # for the specified include position.
223/// this is significantly cheaper to compute than the line number. This returns
224/// zero if the column number isn't known.
225unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
226 Loc = getLogicalLoc(Loc);
227 unsigned FileID = Loc.getFileID();
228 if (FileID == 0) return 0;
229
230 unsigned FilePos = getFilePos(Loc);
231 const MemoryBuffer *Buffer = getBuffer(FileID);
232 const char *Buf = Buffer->getBufferStart();
233
234 unsigned LineStart = FilePos;
235 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
236 --LineStart;
237 return FilePos-LineStart+1;
238}
239
240/// getSourceName - This method returns the name of the file or buffer that
241/// the SourceLocation specifies. This can be modified with #line directives,
242/// etc.
243std::string SourceManager::getSourceName(SourceLocation Loc) {
244 Loc = getLogicalLoc(Loc);
245 unsigned FileID = Loc.getFileID();
246 if (FileID == 0) return "";
247 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
248}
249
250
251/// getLineNumber - Given a SourceLocation, return the physical line number
252/// for the position indicated. This requires building and caching a table of
253/// line offsets for the MemoryBuffer, so this is not cheap: use only when
254/// about to emit a diagnostic.
255unsigned SourceManager::getLineNumber(SourceLocation Loc) {
256 Loc = getLogicalLoc(Loc);
257 unsigned FileID = Loc.getFileID();
258 if (FileID == 0) return 0;
259 FileInfo *FileInfo = getFileInfo(FileID);
260
261 // If this is the first use of line information for this buffer, compute the
262 /// SourceLineCache for it on demand.
263 if (FileInfo->SourceLineCache == 0) {
264 const MemoryBuffer *Buffer = FileInfo->Buffer;
265
266 // Find the file offsets of all of the *physical* source lines. This does
267 // not look at trigraphs, escaped newlines, or anything else tricky.
268 std::vector<unsigned> LineOffsets;
269
270 // Line #1 starts at char 0.
271 LineOffsets.push_back(0);
272
273 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
274 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
275 unsigned Offs = 0;
276 while (1) {
277 // Skip over the contents of the line.
278 // TODO: Vectorize this? This is very performance sensitive for programs
279 // with lots of diagnostics and in -E mode.
280 const unsigned char *NextBuf = (const unsigned char *)Buf;
281 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
282 ++NextBuf;
283 Offs += NextBuf-Buf;
284 Buf = NextBuf;
285
286 if (Buf[0] == '\n' || Buf[0] == '\r') {
287 // If this is \n\r or \r\n, skip both characters.
288 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
289 ++Offs, ++Buf;
290 ++Offs, ++Buf;
291 LineOffsets.push_back(Offs);
292 } else {
293 // Otherwise, this is a null. If end of file, exit.
294 if (Buf == End) break;
295 // Otherwise, skip the null.
296 ++Offs, ++Buf;
297 }
298 }
299 LineOffsets.push_back(Offs);
300
301 // Copy the offsets into the FileInfo structure.
302 FileInfo->NumLines = LineOffsets.size();
303 FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
304 std::copy(LineOffsets.begin(), LineOffsets.end(),
305 FileInfo->SourceLineCache);
306 }
307
308 // Okay, we know we have a line number table. Do a binary search to find the
309 // line number that this character position lands on.
310 unsigned NumLines = FileInfo->NumLines;
311 unsigned *SourceLineCache = FileInfo->SourceLineCache;
312
313 // TODO: If this is performance sensitive, we could try doing simple radix
314 // type approaches to make good (tight?) initial guesses based on the
315 // assumption that all lines are the same average size.
316 unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
317 getFilePos(Loc)+1);
318 return Pos-SourceLineCache;
319}
320
321/// getSourceFilePos - This method returns the *logical* offset from the start
322/// of the file that the specified SourceLocation represents. This returns
323/// the location of the *logical* character data, not the physical file
324/// position. In the case of macros, for example, this returns where the
325/// macro was instantiated, not where the characters for the macro can be
326/// found.
327unsigned SourceManager::getSourceFilePos(SourceLocation Loc) const {
328
329 // If this is a macro, we need to get the instantiation location.
330 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
331 while (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) {
332 Loc = FIDInfo->IncludeLoc;
333 FIDInfo = getFIDInfo(Loc.getFileID());
334 }
335
336 return getFilePos(Loc);
337}
338
339
340/// PrintStats - Print statistics to stderr.
341///
342void SourceManager::PrintStats() const {
343 std::cerr << "\n*** Source Manager Stats:\n";
344 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
345 << " mem buffers mapped, " << FileIDs.size()
346 << " file ID's allocated.\n";
347 unsigned NumBuffers = 0, NumMacros = 0;
348 for (unsigned i = 0, e = FileIDs.size(); i != e; ++i) {
349 if (FileIDs[i].IDType == FileIDInfo::NormalBuffer)
350 ++NumBuffers;
351 else if (FileIDs[i].IDType == FileIDInfo::MacroExpansion)
352 ++NumMacros;
353 else
354 assert(0 && "Unknown FileID!");
355 }
356 std::cerr << " " << NumBuffers << " normal buffer FileID's, "
357 << NumMacros << " macro expansion FileID's.\n";
358
359
360
361 unsigned NumLineNumsComputed = 0;
362 unsigned NumFileBytesMapped = 0;
363 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
364 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
365 NumLineNumsComputed += I->second.SourceLineCache != 0;
366 NumFileBytesMapped += I->second.Buffer->getBufferSize();
367 }
368 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
369 << NumLineNumsComputed << " files with line #'s computed.\n";
370}