blob: 59103caf2985f78a99ef0000966b22972a7a1a27 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Hartmut Kaiser34947252007-09-12 15:39:04 +000016#include "llvm/Config/config.h"
Chris Lattner5e36a7a2007-07-24 05:57:19 +000017#include "llvm/Support/Compiler.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000018#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
Ted Kremenek78d85f52007-10-30 21:08:08 +000020#include "llvm/Bitcode/Serialize.h"
21#include "llvm/Bitcode/Deserialize.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include <algorithm>
23#include <iostream>
Gabor Greif15012182007-07-12 16:00:00 +000024#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000025using namespace clang;
26using namespace SrcMgr;
27using llvm::MemoryBuffer;
28
Ted Kremenek78d85f52007-10-30 21:08:08 +000029ContentCache::~ContentCache() {
30 delete Buffer;
31 delete [] SourceLineCache;
Reid Spencer5f016e22007-07-11 17:01:13 +000032}
33
Reid Spencer5f016e22007-07-11 17:01:13 +000034// FIXME: REMOVE THESE
35#include <unistd.h>
36#include <sys/types.h>
Anton Korobeynikovbd0be392007-10-16 09:09:44 +000037#if !defined(_MSC_VER) && !defined(__MINGW32__)
Reid Spencer5f016e22007-07-11 17:01:13 +000038#include <sys/uio.h>
39#include <sys/fcntl.h>
Chris Lattner6a4545e2007-09-03 18:24:56 +000040#else
41#include <io.h>
42#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000043#include <cerrno>
44
45static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
46#if 0
47 // FIXME: Reintroduce this and zap this function once the common llvm stuff
48 // is fast for the small case.
49 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
50 FileEnt->getSize());
51#endif
52
53 // If the file is larger than some threshold, use 'read', otherwise use mmap.
54 if (FileEnt->getSize() >= 4096*4)
55 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
56 0, FileEnt->getSize());
57
58 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
59 FileEnt->getName());
60 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattner6a4545e2007-09-03 18:24:56 +000061
Hartmut Kaiser34947252007-09-12 15:39:04 +000062#if defined(LLVM_ON_WIN32)
Chris Lattner6a4545e2007-09-03 18:24:56 +000063 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
64#else
Reid Spencer5f016e22007-07-11 17:01:13 +000065 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattner6a4545e2007-09-03 18:24:56 +000066#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000067 if (FD == -1) {
68 delete SB;
69 return 0;
70 }
71
72 unsigned BytesLeft = FileEnt->getSize();
73 while (BytesLeft) {
74 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
75 if (NumRead != -1) {
76 BytesLeft -= NumRead;
77 BufPtr += NumRead;
78 } else if (errno == EINTR) {
79 // try again
80 } else {
81 // error reading.
82 close(FD);
83 delete SB;
84 return 0;
85 }
86 }
87 close(FD);
88
89 return SB;
90}
91
92
93/// getFileInfo - Create or return a cached FileInfo for the specified file.
94///
Ted Kremenek78d85f52007-10-30 21:08:08 +000095const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
96
Reid Spencer5f016e22007-07-11 17:01:13 +000097 assert(FileEnt && "Didn't specify a file entry to use?");
98 // Do we already have information about this file?
Ted Kremenek78d85f52007-10-30 21:08:08 +000099 std::set<ContentCache>::iterator I =
100 FileInfos.lower_bound(ContentCache(FileEnt));
101
102 if (I != FileInfos.end() && I->Entry == FileEnt)
Reid Spencer5f016e22007-07-11 17:01:13 +0000103 return &*I;
104
105 // Nope, get information.
106 const MemoryBuffer *File = ReadFileFast(FileEnt);
107 if (File == 0)
108 return 0;
109
Ted Kremenek78d85f52007-10-30 21:08:08 +0000110 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
Reid Spencer5f016e22007-07-11 17:01:13 +0000111
Ted Kremenek78d85f52007-10-30 21:08:08 +0000112 Entry.Buffer = File;
113 Entry.SourceLineCache = 0;
114 Entry.NumLines = 0;
Reid Spencer5f016e22007-07-11 17:01:13 +0000115 return &Entry;
116}
117
118
119/// createMemBufferInfoRec - Create a new info record for the specified memory
120/// buffer. This does no caching.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000121const ContentCache*
122SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000123 // Add a new info record to the MemBufferInfos list and return it.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000124 MemBufferInfos.push_back(ContentCache());
125 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
126 Entry.Buffer = Buffer;
127 return &Entry;
Reid Spencer5f016e22007-07-11 17:01:13 +0000128}
129
130
131/// createFileID - Create a new fileID for the specified InfoRec and include
132/// position. This works regardless of whether the InfoRec corresponds to a
133/// file or some other input source.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000134unsigned SourceManager::createFileID(const ContentCache *File,
Reid Spencer5f016e22007-07-11 17:01:13 +0000135 SourceLocation IncludePos) {
136 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
137 // to fit an arbitrary position in the file in the FilePos field. To handle
138 // this, we create one FileID for each chunk of the file that fits in a
139 // FilePos field.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000140 unsigned FileSize = File->Buffer->getBufferSize();
Reid Spencer5f016e22007-07-11 17:01:13 +0000141 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000142 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000143 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
144 "Ran out of file ID's!");
145 return FileIDs.size();
146 }
147
148 // Create one FileID for each chunk of the file.
149 unsigned Result = FileIDs.size()+1;
150
151 unsigned ChunkNo = 0;
152 while (1) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000153 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000154
155 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
156 FileSize -= (1 << SourceLocation::FilePosBits);
157 }
158
159 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
160 "Ran out of file ID's!");
161 return Result;
162}
163
164/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
165/// that a token from physloc PhysLoc should actually be referenced from
166/// InstantiationLoc.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000167SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Reid Spencer5f016e22007-07-11 17:01:13 +0000168 SourceLocation InstantLoc) {
Chris Lattnerabca2bb2007-07-15 06:35:27 +0000169 // The specified source location may be a mapped location, due to a macro
170 // instantiation or #line directive. Strip off this information to find out
171 // where the characters are actually located.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000172 PhysLoc = getPhysicalLoc(PhysLoc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000173
174 // Resolve InstantLoc down to a real logical location.
175 InstantLoc = getLogicalLoc(InstantLoc);
176
Chris Lattner31bb8be2007-07-20 18:00:12 +0000177
178 // If the last macro id is close to the currently requested location, try to
Chris Lattner991ae512007-08-02 03:55:37 +0000179 // reuse it. This implements a small cache.
180 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
181 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattnerd1623a82007-07-21 06:41:57 +0000182
Chris Lattner991ae512007-08-02 03:55:37 +0000183 // The instanitation point and source physloc have to exactly match to reuse
184 // (for now). We could allow "nearby" instantiations in the future.
185 if (LastOne.getInstantiationLoc() != InstantLoc ||
186 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
187 continue;
188
189 // Check to see if the physloc of the token came from near enough to reuse.
190 int PhysDelta = PhysLoc.getRawFilePos() -
191 LastOne.getPhysicalLoc().getRawFilePos();
192 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattnerc1e50fc2007-08-02 04:22:39 +0000193 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattner31bb8be2007-07-20 18:00:12 +0000194 }
195
Chris Lattner45011cf2007-07-20 18:26:45 +0000196
Chris Lattner9dc1f532007-07-20 16:37:10 +0000197 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattner9dc1f532007-07-20 16:37:10 +0000198 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Reid Spencer5f016e22007-07-11 17:01:13 +0000199}
200
Chris Lattner8a12c272007-10-11 18:38:32 +0000201/// getBufferData - Return a pointer to the start and end of the character
202/// data for the specified FileID.
203std::pair<const char*, const char*>
204SourceManager::getBufferData(unsigned FileID) const {
205 const llvm::MemoryBuffer *Buf = getBuffer(FileID);
206 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
207}
Reid Spencer5f016e22007-07-11 17:01:13 +0000208
209
210/// getCharacterData - Return a pointer to the start of the specified location
211/// in the appropriate MemoryBuffer.
212const char *SourceManager::getCharacterData(SourceLocation SL) const {
213 // Note that this is a hot function in the getSpelling() path, which is
214 // heavily used by -E mode.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000215 SL = getPhysicalLoc(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000216
Ted Kremenek78d85f52007-10-30 21:08:08 +0000217 return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
Chris Lattner9dc1f532007-07-20 16:37:10 +0000218 getFullFilePos(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000219}
220
Reid Spencer5f016e22007-07-11 17:01:13 +0000221
Chris Lattner9dc1f532007-07-20 16:37:10 +0000222/// getColumnNumber - Return the column # for the specified file position.
Reid Spencer5f016e22007-07-11 17:01:13 +0000223/// this is significantly cheaper to compute than the line number. This returns
224/// zero if the column number isn't known.
225unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000226 unsigned FileID = Loc.getFileID();
227 if (FileID == 0) return 0;
228
Chris Lattner9dc1f532007-07-20 16:37:10 +0000229 unsigned FilePos = getFullFilePos(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000230 const MemoryBuffer *Buffer = getBuffer(FileID);
231 const char *Buf = Buffer->getBufferStart();
232
233 unsigned LineStart = FilePos;
234 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
235 --LineStart;
236 return FilePos-LineStart+1;
237}
238
239/// getSourceName - This method returns the name of the file or buffer that
240/// the SourceLocation specifies. This can be modified with #line directives,
241/// etc.
Chris Lattner8b6ca882007-08-30 05:59:30 +0000242const char *SourceManager::getSourceName(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000243 unsigned FileID = Loc.getFileID();
244 if (FileID == 0) return "";
Ted Kremenek78d85f52007-10-30 21:08:08 +0000245 return getContentCache(FileID)->Buffer->getBufferIdentifier();
Reid Spencer5f016e22007-07-11 17:01:13 +0000246}
247
Ted Kremenek78d85f52007-10-30 21:08:08 +0000248static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
249static void ComputeLineNumbers(ContentCache* FI) {
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000250 const MemoryBuffer *Buffer = FI->Buffer;
251
252 // Find the file offsets of all of the *physical* source lines. This does
253 // not look at trigraphs, escaped newlines, or anything else tricky.
254 std::vector<unsigned> LineOffsets;
255
256 // Line #1 starts at char 0.
257 LineOffsets.push_back(0);
258
259 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
260 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
261 unsigned Offs = 0;
262 while (1) {
263 // Skip over the contents of the line.
264 // TODO: Vectorize this? This is very performance sensitive for programs
265 // with lots of diagnostics and in -E mode.
266 const unsigned char *NextBuf = (const unsigned char *)Buf;
267 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
268 ++NextBuf;
269 Offs += NextBuf-Buf;
270 Buf = NextBuf;
271
272 if (Buf[0] == '\n' || Buf[0] == '\r') {
273 // If this is \n\r or \r\n, skip both characters.
274 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
275 ++Offs, ++Buf;
276 ++Offs, ++Buf;
277 LineOffsets.push_back(Offs);
278 } else {
279 // Otherwise, this is a null. If end of file, exit.
280 if (Buf == End) break;
281 // Otherwise, skip the null.
282 ++Offs, ++Buf;
283 }
284 }
285 LineOffsets.push_back(Offs);
286
287 // Copy the offsets into the FileInfo structure.
288 FI->NumLines = LineOffsets.size();
289 FI->SourceLineCache = new unsigned[LineOffsets.size()];
290 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
291}
Reid Spencer5f016e22007-07-11 17:01:13 +0000292
293/// getLineNumber - Given a SourceLocation, return the physical line number
294/// for the position indicated. This requires building and caching a table of
295/// line offsets for the MemoryBuffer, so this is not cheap: use only when
296/// about to emit a diagnostic.
297unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000298 unsigned FileID = Loc.getFileID();
299 if (FileID == 0) return 0;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000300
301 ContentCache* Content;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000302
303 if (LastLineNoFileIDQuery == FileID)
Ted Kremenek78d85f52007-10-30 21:08:08 +0000304 Content = LastLineNoContentCache;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000305 else
Ted Kremenek78d85f52007-10-30 21:08:08 +0000306 Content = const_cast<ContentCache*>(getContentCache(FileID));
Reid Spencer5f016e22007-07-11 17:01:13 +0000307
308 // If this is the first use of line information for this buffer, compute the
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000309 /// SourceLineCache for it on demand.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000310 if (Content->SourceLineCache == 0)
311 ComputeLineNumbers(Content);
Reid Spencer5f016e22007-07-11 17:01:13 +0000312
313 // Okay, we know we have a line number table. Do a binary search to find the
314 // line number that this character position lands on.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000315 unsigned *SourceLineCache = Content->SourceLineCache;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000316 unsigned *SourceLineCacheStart = SourceLineCache;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000317 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000318
319 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
320
321 // If the previous query was to the same file, we know both the file pos from
322 // that query and the line number returned. This allows us to narrow the
323 // search space from the entire file to something near the match.
324 if (LastLineNoFileIDQuery == FileID) {
325 if (QueriedFilePos >= LastLineNoFilePos) {
326 SourceLineCache = SourceLineCache+LastLineNoResult-1;
327
328 // The query is likely to be nearby the previous one. Here we check to
329 // see if it is within 5, 10 or 20 lines. It can be far away in cases
330 // where big comment blocks and vertical whitespace eat up lines but
331 // contribute no tokens.
332 if (SourceLineCache+5 < SourceLineCacheEnd) {
333 if (SourceLineCache[5] > QueriedFilePos)
334 SourceLineCacheEnd = SourceLineCache+5;
335 else if (SourceLineCache+10 < SourceLineCacheEnd) {
336 if (SourceLineCache[10] > QueriedFilePos)
337 SourceLineCacheEnd = SourceLineCache+10;
338 else if (SourceLineCache+20 < SourceLineCacheEnd) {
339 if (SourceLineCache[20] > QueriedFilePos)
340 SourceLineCacheEnd = SourceLineCache+20;
341 }
342 }
343 }
344 } else {
345 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
346 }
347 }
348
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000349 // If the spread is large, do a "radix" test as our initial guess, based on
350 // the assumption that lines average to approximately the same length.
351 // NOTE: This is currently disabled, as it does not appear to be profitable in
352 // initial measurements.
353 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
Ted Kremenek78d85f52007-10-30 21:08:08 +0000354 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000355
356 // Take a stab at guessing where it is.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000357 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000358
359 // Check for -10 and +10 lines.
360 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
361 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
362
363 // If the computed lower bound is less than the query location, move it in.
364 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
365 SourceLineCacheStart[LowerBound] < QueriedFilePos)
366 SourceLineCache = SourceLineCacheStart+LowerBound;
367
368 // If the computed upper bound is greater than the query location, move it.
369 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
370 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
371 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
372 }
373
374 unsigned *Pos
375 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000376 unsigned LineNo = Pos-SourceLineCacheStart;
377
378 LastLineNoFileIDQuery = FileID;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000379 LastLineNoContentCache = Content;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000380 LastLineNoFilePos = QueriedFilePos;
381 LastLineNoResult = LineNo;
382 return LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000383}
384
Reid Spencer5f016e22007-07-11 17:01:13 +0000385/// PrintStats - Print statistics to stderr.
386///
387void SourceManager::PrintStats() const {
388 std::cerr << "\n*** Source Manager Stats:\n";
389 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
390 << " mem buffers mapped, " << FileIDs.size()
391 << " file ID's allocated.\n";
Chris Lattner9dc1f532007-07-20 16:37:10 +0000392 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
393 << MacroIDs.size() << " macro expansion FileID's.\n";
Reid Spencer5f016e22007-07-11 17:01:13 +0000394
Reid Spencer5f016e22007-07-11 17:01:13 +0000395 unsigned NumLineNumsComputed = 0;
396 unsigned NumFileBytesMapped = 0;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000397 for (std::set<ContentCache>::const_iterator I =
Reid Spencer5f016e22007-07-11 17:01:13 +0000398 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
Ted Kremenek78d85f52007-10-30 21:08:08 +0000399 NumLineNumsComputed += I->SourceLineCache != 0;
400 NumFileBytesMapped += I->Buffer->getBufferSize();
Reid Spencer5f016e22007-07-11 17:01:13 +0000401 }
Ted Kremenek78d85f52007-10-30 21:08:08 +0000402
Reid Spencer5f016e22007-07-11 17:01:13 +0000403 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
404 << NumLineNumsComputed << " files with line #'s computed.\n";
405}