blob: 0482d4c705bbd6175f7814650e7da2f82fc82706 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Hartmut Kaiser34947252007-09-12 15:39:04 +000016#include "llvm/Config/config.h"
Chris Lattner5e36a7a2007-07-24 05:57:19 +000017#include "llvm/Support/Compiler.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000018#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
Ted Kremenek78d85f52007-10-30 21:08:08 +000020#include "llvm/Bitcode/Serialize.h"
21#include "llvm/Bitcode/Deserialize.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include <algorithm>
23#include <iostream>
Gabor Greif15012182007-07-12 16:00:00 +000024#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000025using namespace clang;
26using namespace SrcMgr;
27using llvm::MemoryBuffer;
28
Ted Kremenek78d85f52007-10-30 21:08:08 +000029ContentCache::~ContentCache() {
30 delete Buffer;
31 delete [] SourceLineCache;
Reid Spencer5f016e22007-07-11 17:01:13 +000032}
33
Reid Spencer5f016e22007-07-11 17:01:13 +000034// FIXME: REMOVE THESE
35#include <unistd.h>
36#include <sys/types.h>
Anton Korobeynikovbd0be392007-10-16 09:09:44 +000037#if !defined(_MSC_VER) && !defined(__MINGW32__)
Reid Spencer5f016e22007-07-11 17:01:13 +000038#include <sys/uio.h>
39#include <sys/fcntl.h>
Chris Lattner6a4545e2007-09-03 18:24:56 +000040#else
41#include <io.h>
42#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000043#include <cerrno>
44
45static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
46#if 0
47 // FIXME: Reintroduce this and zap this function once the common llvm stuff
48 // is fast for the small case.
49 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
50 FileEnt->getSize());
51#endif
52
53 // If the file is larger than some threshold, use 'read', otherwise use mmap.
54 if (FileEnt->getSize() >= 4096*4)
55 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
56 0, FileEnt->getSize());
57
58 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
59 FileEnt->getName());
60 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattner6a4545e2007-09-03 18:24:56 +000061
Hartmut Kaiser34947252007-09-12 15:39:04 +000062#if defined(LLVM_ON_WIN32)
Chris Lattner6a4545e2007-09-03 18:24:56 +000063 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
64#else
Reid Spencer5f016e22007-07-11 17:01:13 +000065 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattner6a4545e2007-09-03 18:24:56 +000066#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000067 if (FD == -1) {
68 delete SB;
69 return 0;
70 }
71
72 unsigned BytesLeft = FileEnt->getSize();
73 while (BytesLeft) {
74 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
75 if (NumRead != -1) {
76 BytesLeft -= NumRead;
77 BufPtr += NumRead;
78 } else if (errno == EINTR) {
79 // try again
80 } else {
81 // error reading.
82 close(FD);
83 delete SB;
84 return 0;
85 }
86 }
87 close(FD);
88
89 return SB;
90}
91
92
93/// getFileInfo - Create or return a cached FileInfo for the specified file.
94///
Ted Kremenek78d85f52007-10-30 21:08:08 +000095const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
96
Reid Spencer5f016e22007-07-11 17:01:13 +000097 assert(FileEnt && "Didn't specify a file entry to use?");
98 // Do we already have information about this file?
Ted Kremenek78d85f52007-10-30 21:08:08 +000099 std::set<ContentCache>::iterator I =
100 FileInfos.lower_bound(ContentCache(FileEnt));
101
102 if (I != FileInfos.end() && I->Entry == FileEnt)
Reid Spencer5f016e22007-07-11 17:01:13 +0000103 return &*I;
104
105 // Nope, get information.
106 const MemoryBuffer *File = ReadFileFast(FileEnt);
107 if (File == 0)
108 return 0;
109
Ted Kremenek78d85f52007-10-30 21:08:08 +0000110 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
Reid Spencer5f016e22007-07-11 17:01:13 +0000111
Ted Kremenek78d85f52007-10-30 21:08:08 +0000112 Entry.Buffer = File;
113 Entry.SourceLineCache = 0;
114 Entry.NumLines = 0;
Reid Spencer5f016e22007-07-11 17:01:13 +0000115 return &Entry;
116}
117
118
Ted Kremenekd1c0eee2007-10-31 17:53:38 +0000119/// createMemBufferContentCache - Create a new ContentCache for the specified
120/// memory buffer. This does no caching.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000121const ContentCache*
122SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
Ted Kremenek0d892d82007-10-30 22:57:35 +0000123 // Add a new ContentCache to the MemBufferInfos list and return it. We
124 // must default construct the object first that the instance actually
125 // stored within MemBufferInfos actually owns the Buffer, and not any
126 // temporary we would use in the call to "push_back".
Ted Kremenek78d85f52007-10-30 21:08:08 +0000127 MemBufferInfos.push_back(ContentCache());
128 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
129 Entry.Buffer = Buffer;
130 return &Entry;
Reid Spencer5f016e22007-07-11 17:01:13 +0000131}
132
133
Ted Kremenek0d892d82007-10-30 22:57:35 +0000134/// createFileID - Create a new fileID for the specified ContentCache and
135/// include position. This works regardless of whether the ContentCache
136/// corresponds to a file or some other input source.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000137unsigned SourceManager::createFileID(const ContentCache *File,
Reid Spencer5f016e22007-07-11 17:01:13 +0000138 SourceLocation IncludePos) {
139 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
140 // to fit an arbitrary position in the file in the FilePos field. To handle
141 // this, we create one FileID for each chunk of the file that fits in a
142 // FilePos field.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000143 unsigned FileSize = File->Buffer->getBufferSize();
Reid Spencer5f016e22007-07-11 17:01:13 +0000144 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000145 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000146 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
147 "Ran out of file ID's!");
148 return FileIDs.size();
149 }
150
151 // Create one FileID for each chunk of the file.
152 unsigned Result = FileIDs.size()+1;
153
154 unsigned ChunkNo = 0;
155 while (1) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000156 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000157
158 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
159 FileSize -= (1 << SourceLocation::FilePosBits);
160 }
161
162 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
163 "Ran out of file ID's!");
164 return Result;
165}
166
167/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
168/// that a token from physloc PhysLoc should actually be referenced from
169/// InstantiationLoc.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000170SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Reid Spencer5f016e22007-07-11 17:01:13 +0000171 SourceLocation InstantLoc) {
Chris Lattnerabca2bb2007-07-15 06:35:27 +0000172 // The specified source location may be a mapped location, due to a macro
173 // instantiation or #line directive. Strip off this information to find out
174 // where the characters are actually located.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000175 PhysLoc = getPhysicalLoc(PhysLoc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000176
177 // Resolve InstantLoc down to a real logical location.
178 InstantLoc = getLogicalLoc(InstantLoc);
179
Chris Lattner31bb8be2007-07-20 18:00:12 +0000180
181 // If the last macro id is close to the currently requested location, try to
Chris Lattner991ae512007-08-02 03:55:37 +0000182 // reuse it. This implements a small cache.
183 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
184 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattnerd1623a82007-07-21 06:41:57 +0000185
Chris Lattner991ae512007-08-02 03:55:37 +0000186 // The instanitation point and source physloc have to exactly match to reuse
187 // (for now). We could allow "nearby" instantiations in the future.
188 if (LastOne.getInstantiationLoc() != InstantLoc ||
189 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
190 continue;
191
192 // Check to see if the physloc of the token came from near enough to reuse.
193 int PhysDelta = PhysLoc.getRawFilePos() -
194 LastOne.getPhysicalLoc().getRawFilePos();
195 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattnerc1e50fc2007-08-02 04:22:39 +0000196 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattner31bb8be2007-07-20 18:00:12 +0000197 }
198
Chris Lattner45011cf2007-07-20 18:26:45 +0000199
Chris Lattner9dc1f532007-07-20 16:37:10 +0000200 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattner9dc1f532007-07-20 16:37:10 +0000201 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Reid Spencer5f016e22007-07-11 17:01:13 +0000202}
203
Chris Lattner8a12c272007-10-11 18:38:32 +0000204/// getBufferData - Return a pointer to the start and end of the character
205/// data for the specified FileID.
206std::pair<const char*, const char*>
207SourceManager::getBufferData(unsigned FileID) const {
208 const llvm::MemoryBuffer *Buf = getBuffer(FileID);
209 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
210}
Reid Spencer5f016e22007-07-11 17:01:13 +0000211
212
213/// getCharacterData - Return a pointer to the start of the specified location
214/// in the appropriate MemoryBuffer.
215const char *SourceManager::getCharacterData(SourceLocation SL) const {
216 // Note that this is a hot function in the getSpelling() path, which is
217 // heavily used by -E mode.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000218 SL = getPhysicalLoc(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000219
Ted Kremenek78d85f52007-10-30 21:08:08 +0000220 return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
Chris Lattner9dc1f532007-07-20 16:37:10 +0000221 getFullFilePos(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000222}
223
Reid Spencer5f016e22007-07-11 17:01:13 +0000224
Chris Lattner9dc1f532007-07-20 16:37:10 +0000225/// getColumnNumber - Return the column # for the specified file position.
Reid Spencer5f016e22007-07-11 17:01:13 +0000226/// this is significantly cheaper to compute than the line number. This returns
227/// zero if the column number isn't known.
228unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000229 unsigned FileID = Loc.getFileID();
230 if (FileID == 0) return 0;
231
Chris Lattner9dc1f532007-07-20 16:37:10 +0000232 unsigned FilePos = getFullFilePos(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000233 const MemoryBuffer *Buffer = getBuffer(FileID);
234 const char *Buf = Buffer->getBufferStart();
235
236 unsigned LineStart = FilePos;
237 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
238 --LineStart;
239 return FilePos-LineStart+1;
240}
241
242/// getSourceName - This method returns the name of the file or buffer that
243/// the SourceLocation specifies. This can be modified with #line directives,
244/// etc.
Chris Lattner8b6ca882007-08-30 05:59:30 +0000245const char *SourceManager::getSourceName(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000246 unsigned FileID = Loc.getFileID();
247 if (FileID == 0) return "";
Ted Kremenek78d85f52007-10-30 21:08:08 +0000248 return getContentCache(FileID)->Buffer->getBufferIdentifier();
Reid Spencer5f016e22007-07-11 17:01:13 +0000249}
250
Ted Kremenek78d85f52007-10-30 21:08:08 +0000251static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
252static void ComputeLineNumbers(ContentCache* FI) {
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000253 const MemoryBuffer *Buffer = FI->Buffer;
254
255 // Find the file offsets of all of the *physical* source lines. This does
256 // not look at trigraphs, escaped newlines, or anything else tricky.
257 std::vector<unsigned> LineOffsets;
258
259 // Line #1 starts at char 0.
260 LineOffsets.push_back(0);
261
262 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
263 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
264 unsigned Offs = 0;
265 while (1) {
266 // Skip over the contents of the line.
267 // TODO: Vectorize this? This is very performance sensitive for programs
268 // with lots of diagnostics and in -E mode.
269 const unsigned char *NextBuf = (const unsigned char *)Buf;
270 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
271 ++NextBuf;
272 Offs += NextBuf-Buf;
273 Buf = NextBuf;
274
275 if (Buf[0] == '\n' || Buf[0] == '\r') {
276 // If this is \n\r or \r\n, skip both characters.
277 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
278 ++Offs, ++Buf;
279 ++Offs, ++Buf;
280 LineOffsets.push_back(Offs);
281 } else {
282 // Otherwise, this is a null. If end of file, exit.
283 if (Buf == End) break;
284 // Otherwise, skip the null.
285 ++Offs, ++Buf;
286 }
287 }
288 LineOffsets.push_back(Offs);
289
290 // Copy the offsets into the FileInfo structure.
291 FI->NumLines = LineOffsets.size();
292 FI->SourceLineCache = new unsigned[LineOffsets.size()];
293 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
294}
Reid Spencer5f016e22007-07-11 17:01:13 +0000295
296/// getLineNumber - Given a SourceLocation, return the physical line number
297/// for the position indicated. This requires building and caching a table of
298/// line offsets for the MemoryBuffer, so this is not cheap: use only when
299/// about to emit a diagnostic.
300unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000301 unsigned FileID = Loc.getFileID();
302 if (FileID == 0) return 0;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000303
304 ContentCache* Content;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000305
306 if (LastLineNoFileIDQuery == FileID)
Ted Kremenek78d85f52007-10-30 21:08:08 +0000307 Content = LastLineNoContentCache;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000308 else
Ted Kremenek78d85f52007-10-30 21:08:08 +0000309 Content = const_cast<ContentCache*>(getContentCache(FileID));
Reid Spencer5f016e22007-07-11 17:01:13 +0000310
311 // If this is the first use of line information for this buffer, compute the
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000312 /// SourceLineCache for it on demand.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000313 if (Content->SourceLineCache == 0)
314 ComputeLineNumbers(Content);
Reid Spencer5f016e22007-07-11 17:01:13 +0000315
316 // Okay, we know we have a line number table. Do a binary search to find the
317 // line number that this character position lands on.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000318 unsigned *SourceLineCache = Content->SourceLineCache;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000319 unsigned *SourceLineCacheStart = SourceLineCache;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000320 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000321
322 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
323
324 // If the previous query was to the same file, we know both the file pos from
325 // that query and the line number returned. This allows us to narrow the
326 // search space from the entire file to something near the match.
327 if (LastLineNoFileIDQuery == FileID) {
328 if (QueriedFilePos >= LastLineNoFilePos) {
329 SourceLineCache = SourceLineCache+LastLineNoResult-1;
330
331 // The query is likely to be nearby the previous one. Here we check to
332 // see if it is within 5, 10 or 20 lines. It can be far away in cases
333 // where big comment blocks and vertical whitespace eat up lines but
334 // contribute no tokens.
335 if (SourceLineCache+5 < SourceLineCacheEnd) {
336 if (SourceLineCache[5] > QueriedFilePos)
337 SourceLineCacheEnd = SourceLineCache+5;
338 else if (SourceLineCache+10 < SourceLineCacheEnd) {
339 if (SourceLineCache[10] > QueriedFilePos)
340 SourceLineCacheEnd = SourceLineCache+10;
341 else if (SourceLineCache+20 < SourceLineCacheEnd) {
342 if (SourceLineCache[20] > QueriedFilePos)
343 SourceLineCacheEnd = SourceLineCache+20;
344 }
345 }
346 }
347 } else {
348 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
349 }
350 }
351
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000352 // If the spread is large, do a "radix" test as our initial guess, based on
353 // the assumption that lines average to approximately the same length.
354 // NOTE: This is currently disabled, as it does not appear to be profitable in
355 // initial measurements.
356 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
Ted Kremenek78d85f52007-10-30 21:08:08 +0000357 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000358
359 // Take a stab at guessing where it is.
Ted Kremenek78d85f52007-10-30 21:08:08 +0000360 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000361
362 // Check for -10 and +10 lines.
363 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
364 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
365
366 // If the computed lower bound is less than the query location, move it in.
367 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
368 SourceLineCacheStart[LowerBound] < QueriedFilePos)
369 SourceLineCache = SourceLineCacheStart+LowerBound;
370
371 // If the computed upper bound is greater than the query location, move it.
372 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
373 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
374 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
375 }
376
377 unsigned *Pos
378 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000379 unsigned LineNo = Pos-SourceLineCacheStart;
380
381 LastLineNoFileIDQuery = FileID;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000382 LastLineNoContentCache = Content;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000383 LastLineNoFilePos = QueriedFilePos;
384 LastLineNoResult = LineNo;
385 return LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000386}
387
Reid Spencer5f016e22007-07-11 17:01:13 +0000388/// PrintStats - Print statistics to stderr.
389///
390void SourceManager::PrintStats() const {
391 std::cerr << "\n*** Source Manager Stats:\n";
392 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
393 << " mem buffers mapped, " << FileIDs.size()
394 << " file ID's allocated.\n";
Chris Lattner9dc1f532007-07-20 16:37:10 +0000395 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
396 << MacroIDs.size() << " macro expansion FileID's.\n";
Reid Spencer5f016e22007-07-11 17:01:13 +0000397
Reid Spencer5f016e22007-07-11 17:01:13 +0000398 unsigned NumLineNumsComputed = 0;
399 unsigned NumFileBytesMapped = 0;
Ted Kremenek78d85f52007-10-30 21:08:08 +0000400 for (std::set<ContentCache>::const_iterator I =
Reid Spencer5f016e22007-07-11 17:01:13 +0000401 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
Ted Kremenek78d85f52007-10-30 21:08:08 +0000402 NumLineNumsComputed += I->SourceLineCache != 0;
403 NumFileBytesMapped += I->Buffer->getBufferSize();
Reid Spencer5f016e22007-07-11 17:01:13 +0000404 }
Ted Kremenek78d85f52007-10-30 21:08:08 +0000405
Reid Spencer5f016e22007-07-11 17:01:13 +0000406 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
407 << NumLineNumsComputed << " files with line #'s computed.\n";
408}