blob: 05d58cf79e64a26833c82d7ab5ef052bc76f5273 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Chris Lattner5e36a7a2007-07-24 05:57:19 +000016#include "llvm/Support/Compiler.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000017#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include <algorithm>
20#include <iostream>
Gabor Greif15012182007-07-12 16:00:00 +000021#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000022using namespace clang;
23using namespace SrcMgr;
24using llvm::MemoryBuffer;
25
26SourceManager::~SourceManager() {
27 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
28 E = FileInfos.end(); I != E; ++I) {
29 delete I->second.Buffer;
30 delete[] I->second.SourceLineCache;
31 }
32
33 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
34 E = MemBufferInfos.end(); I != E; ++I) {
35 delete I->second.Buffer;
36 delete[] I->second.SourceLineCache;
37 }
38}
39
40
41// FIXME: REMOVE THESE
42#include <unistd.h>
43#include <sys/types.h>
44#include <sys/uio.h>
45#include <sys/fcntl.h>
46#include <cerrno>
47
48static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
49#if 0
50 // FIXME: Reintroduce this and zap this function once the common llvm stuff
51 // is fast for the small case.
52 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
53 FileEnt->getSize());
54#endif
55
56 // If the file is larger than some threshold, use 'read', otherwise use mmap.
57 if (FileEnt->getSize() >= 4096*4)
58 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
59 0, FileEnt->getSize());
60
61 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
62 FileEnt->getName());
63 char *BufPtr = const_cast<char*>(SB->getBufferStart());
64
65 int FD = ::open(FileEnt->getName(), O_RDONLY);
66 if (FD == -1) {
67 delete SB;
68 return 0;
69 }
70
71 unsigned BytesLeft = FileEnt->getSize();
72 while (BytesLeft) {
73 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
74 if (NumRead != -1) {
75 BytesLeft -= NumRead;
76 BufPtr += NumRead;
77 } else if (errno == EINTR) {
78 // try again
79 } else {
80 // error reading.
81 close(FD);
82 delete SB;
83 return 0;
84 }
85 }
86 close(FD);
87
88 return SB;
89}
90
91
92/// getFileInfo - Create or return a cached FileInfo for the specified file.
93///
94const InfoRec *
95SourceManager::getInfoRec(const FileEntry *FileEnt) {
96 assert(FileEnt && "Didn't specify a file entry to use?");
97 // Do we already have information about this file?
98 std::map<const FileEntry *, FileInfo>::iterator I =
99 FileInfos.lower_bound(FileEnt);
100 if (I != FileInfos.end() && I->first == FileEnt)
101 return &*I;
102
103 // Nope, get information.
104 const MemoryBuffer *File = ReadFileFast(FileEnt);
105 if (File == 0)
106 return 0;
107
108 const InfoRec &Entry =
109 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
110 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
111
112 Info.Buffer = File;
113 Info.SourceLineCache = 0;
114 Info.NumLines = 0;
115 return &Entry;
116}
117
118
119/// createMemBufferInfoRec - Create a new info record for the specified memory
120/// buffer. This does no caching.
121const InfoRec *
122SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
123 // Add a new info record to the MemBufferInfos list and return it.
124 FileInfo FI;
125 FI.Buffer = Buffer;
126 FI.SourceLineCache = 0;
127 FI.NumLines = 0;
128 MemBufferInfos.push_back(InfoRec(0, FI));
129 return &MemBufferInfos.back();
130}
131
132
133/// createFileID - Create a new fileID for the specified InfoRec and include
134/// position. This works regardless of whether the InfoRec corresponds to a
135/// file or some other input source.
136unsigned SourceManager::createFileID(const InfoRec *File,
137 SourceLocation IncludePos) {
138 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
139 // to fit an arbitrary position in the file in the FilePos field. To handle
140 // this, we create one FileID for each chunk of the file that fits in a
141 // FilePos field.
142 unsigned FileSize = File->second.Buffer->getBufferSize();
143 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000144 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000145 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
146 "Ran out of file ID's!");
147 return FileIDs.size();
148 }
149
150 // Create one FileID for each chunk of the file.
151 unsigned Result = FileIDs.size()+1;
152
153 unsigned ChunkNo = 0;
154 while (1) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000155 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000156
157 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
158 FileSize -= (1 << SourceLocation::FilePosBits);
159 }
160
161 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
162 "Ran out of file ID's!");
163 return Result;
164}
165
166/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
167/// that a token from physloc PhysLoc should actually be referenced from
168/// InstantiationLoc.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000169SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Reid Spencer5f016e22007-07-11 17:01:13 +0000170 SourceLocation InstantLoc) {
Chris Lattnerabca2bb2007-07-15 06:35:27 +0000171 // The specified source location may be a mapped location, due to a macro
172 // instantiation or #line directive. Strip off this information to find out
173 // where the characters are actually located.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000174 PhysLoc = getPhysicalLoc(PhysLoc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000175
176 // Resolve InstantLoc down to a real logical location.
177 InstantLoc = getLogicalLoc(InstantLoc);
178
Chris Lattner31bb8be2007-07-20 18:00:12 +0000179
180 // If the last macro id is close to the currently requested location, try to
Chris Lattner991ae512007-08-02 03:55:37 +0000181 // reuse it. This implements a small cache.
182 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
183 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattnerd1623a82007-07-21 06:41:57 +0000184
Chris Lattner991ae512007-08-02 03:55:37 +0000185 // The instanitation point and source physloc have to exactly match to reuse
186 // (for now). We could allow "nearby" instantiations in the future.
187 if (LastOne.getInstantiationLoc() != InstantLoc ||
188 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
189 continue;
190
191 // Check to see if the physloc of the token came from near enough to reuse.
192 int PhysDelta = PhysLoc.getRawFilePos() -
193 LastOne.getPhysicalLoc().getRawFilePos();
194 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
195 return SourceLocation::getMacroLoc(MacroIDs.size()-1, PhysDelta, 0);
Chris Lattner31bb8be2007-07-20 18:00:12 +0000196 }
197
Chris Lattner45011cf2007-07-20 18:26:45 +0000198
Chris Lattner9dc1f532007-07-20 16:37:10 +0000199 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattner9dc1f532007-07-20 16:37:10 +0000200 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Reid Spencer5f016e22007-07-11 17:01:13 +0000201}
202
203
204
205/// getCharacterData - Return a pointer to the start of the specified location
206/// in the appropriate MemoryBuffer.
207const char *SourceManager::getCharacterData(SourceLocation SL) const {
208 // Note that this is a hot function in the getSpelling() path, which is
209 // heavily used by -E mode.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000210 SL = getPhysicalLoc(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000211
Chris Lattner9dc1f532007-07-20 16:37:10 +0000212 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
213 getFullFilePos(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000214}
215
Reid Spencer5f016e22007-07-11 17:01:13 +0000216
Chris Lattner9dc1f532007-07-20 16:37:10 +0000217/// getColumnNumber - Return the column # for the specified file position.
Reid Spencer5f016e22007-07-11 17:01:13 +0000218/// this is significantly cheaper to compute than the line number. This returns
219/// zero if the column number isn't known.
220unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000221 unsigned FileID = Loc.getFileID();
222 if (FileID == 0) return 0;
223
Chris Lattner9dc1f532007-07-20 16:37:10 +0000224 unsigned FilePos = getFullFilePos(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000225 const MemoryBuffer *Buffer = getBuffer(FileID);
226 const char *Buf = Buffer->getBufferStart();
227
228 unsigned LineStart = FilePos;
229 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
230 --LineStart;
231 return FilePos-LineStart+1;
232}
233
234/// getSourceName - This method returns the name of the file or buffer that
235/// the SourceLocation specifies. This can be modified with #line directives,
236/// etc.
Chris Lattnerd8e30832007-07-24 06:57:14 +0000237const char *SourceManager::getSourceName(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000238 unsigned FileID = Loc.getFileID();
239 if (FileID == 0) return "";
240 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
241}
242
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000243static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
244static void ComputeLineNumbers(FileInfo *FI) {
245 const MemoryBuffer *Buffer = FI->Buffer;
246
247 // Find the file offsets of all of the *physical* source lines. This does
248 // not look at trigraphs, escaped newlines, or anything else tricky.
249 std::vector<unsigned> LineOffsets;
250
251 // Line #1 starts at char 0.
252 LineOffsets.push_back(0);
253
254 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
255 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
256 unsigned Offs = 0;
257 while (1) {
258 // Skip over the contents of the line.
259 // TODO: Vectorize this? This is very performance sensitive for programs
260 // with lots of diagnostics and in -E mode.
261 const unsigned char *NextBuf = (const unsigned char *)Buf;
262 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
263 ++NextBuf;
264 Offs += NextBuf-Buf;
265 Buf = NextBuf;
266
267 if (Buf[0] == '\n' || Buf[0] == '\r') {
268 // If this is \n\r or \r\n, skip both characters.
269 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
270 ++Offs, ++Buf;
271 ++Offs, ++Buf;
272 LineOffsets.push_back(Offs);
273 } else {
274 // Otherwise, this is a null. If end of file, exit.
275 if (Buf == End) break;
276 // Otherwise, skip the null.
277 ++Offs, ++Buf;
278 }
279 }
280 LineOffsets.push_back(Offs);
281
282 // Copy the offsets into the FileInfo structure.
283 FI->NumLines = LineOffsets.size();
284 FI->SourceLineCache = new unsigned[LineOffsets.size()];
285 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
286}
Reid Spencer5f016e22007-07-11 17:01:13 +0000287
288/// getLineNumber - Given a SourceLocation, return the physical line number
289/// for the position indicated. This requires building and caching a table of
290/// line offsets for the MemoryBuffer, so this is not cheap: use only when
291/// about to emit a diagnostic.
292unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000293 unsigned FileID = Loc.getFileID();
294 if (FileID == 0) return 0;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000295 FileInfo *FileInfo;
296
297 if (LastLineNoFileIDQuery == FileID)
298 FileInfo = LastLineNoFileInfo;
299 else
300 FileInfo = getFileInfo(FileID);
Reid Spencer5f016e22007-07-11 17:01:13 +0000301
302 // If this is the first use of line information for this buffer, compute the
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000303 /// SourceLineCache for it on demand.
304 if (FileInfo->SourceLineCache == 0)
305 ComputeLineNumbers(FileInfo);
Reid Spencer5f016e22007-07-11 17:01:13 +0000306
307 // Okay, we know we have a line number table. Do a binary search to find the
308 // line number that this character position lands on.
Reid Spencer5f016e22007-07-11 17:01:13 +0000309 unsigned *SourceLineCache = FileInfo->SourceLineCache;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000310 unsigned *SourceLineCacheStart = SourceLineCache;
311 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
312
313 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
314
315 // If the previous query was to the same file, we know both the file pos from
316 // that query and the line number returned. This allows us to narrow the
317 // search space from the entire file to something near the match.
318 if (LastLineNoFileIDQuery == FileID) {
319 if (QueriedFilePos >= LastLineNoFilePos) {
320 SourceLineCache = SourceLineCache+LastLineNoResult-1;
321
322 // The query is likely to be nearby the previous one. Here we check to
323 // see if it is within 5, 10 or 20 lines. It can be far away in cases
324 // where big comment blocks and vertical whitespace eat up lines but
325 // contribute no tokens.
326 if (SourceLineCache+5 < SourceLineCacheEnd) {
327 if (SourceLineCache[5] > QueriedFilePos)
328 SourceLineCacheEnd = SourceLineCache+5;
329 else if (SourceLineCache+10 < SourceLineCacheEnd) {
330 if (SourceLineCache[10] > QueriedFilePos)
331 SourceLineCacheEnd = SourceLineCache+10;
332 else if (SourceLineCache+20 < SourceLineCacheEnd) {
333 if (SourceLineCache[20] > QueriedFilePos)
334 SourceLineCacheEnd = SourceLineCache+20;
335 }
336 }
337 }
338 } else {
339 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
340 }
341 }
342
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000343 // If the spread is large, do a "radix" test as our initial guess, based on
344 // the assumption that lines average to approximately the same length.
345 // NOTE: This is currently disabled, as it does not appear to be profitable in
346 // initial measurements.
347 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
348 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
349
350 // Take a stab at guessing where it is.
351 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
352
353 // Check for -10 and +10 lines.
354 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
355 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
356
357 // If the computed lower bound is less than the query location, move it in.
358 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
359 SourceLineCacheStart[LowerBound] < QueriedFilePos)
360 SourceLineCache = SourceLineCacheStart+LowerBound;
361
362 // If the computed upper bound is greater than the query location, move it.
363 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
364 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
365 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
366 }
367
368 unsigned *Pos
369 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000370 unsigned LineNo = Pos-SourceLineCacheStart;
371
372 LastLineNoFileIDQuery = FileID;
373 LastLineNoFileInfo = FileInfo;
374 LastLineNoFilePos = QueriedFilePos;
375 LastLineNoResult = LineNo;
376 return LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000377}
378
Reid Spencer5f016e22007-07-11 17:01:13 +0000379/// PrintStats - Print statistics to stderr.
380///
381void SourceManager::PrintStats() const {
382 std::cerr << "\n*** Source Manager Stats:\n";
383 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
384 << " mem buffers mapped, " << FileIDs.size()
385 << " file ID's allocated.\n";
Chris Lattner9dc1f532007-07-20 16:37:10 +0000386 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
387 << MacroIDs.size() << " macro expansion FileID's.\n";
Reid Spencer5f016e22007-07-11 17:01:13 +0000388
389
390
391 unsigned NumLineNumsComputed = 0;
392 unsigned NumFileBytesMapped = 0;
393 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
394 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
395 NumLineNumsComputed += I->second.SourceLineCache != 0;
396 NumFileBytesMapped += I->second.Buffer->getBufferSize();
397 }
398 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
399 << NumLineNumsComputed << " files with line #'s computed.\n";
400}