blob: 569c864dde7fd5314f73e65c00ea50a757daa002 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Chris Lattner8996fff2007-07-24 05:57:19 +000016#include "llvm/Support/Compiler.h"
Chris Lattner739e7392007-04-29 07:12:06 +000017#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000018#include "llvm/System/Path.h"
19#include <algorithm>
20#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000021#include <fcntl.h>
Chris Lattner22eb9722006-06-18 05:43:12 +000022using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000023using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000024using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000025
26SourceManager::~SourceManager() {
27 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
28 E = FileInfos.end(); I != E; ++I) {
29 delete I->second.Buffer;
30 delete[] I->second.SourceLineCache;
31 }
32
33 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
34 E = MemBufferInfos.end(); I != E; ++I) {
35 delete I->second.Buffer;
36 delete[] I->second.SourceLineCache;
37 }
38}
39
Chris Lattnere92976d2007-04-29 06:44:41 +000040
41// FIXME: REMOVE THESE
42#include <unistd.h>
43#include <sys/types.h>
Chris Lattnercd1e8d22007-09-03 18:24:56 +000044#if !defined(_MSC_VER)
Chris Lattnere92976d2007-04-29 06:44:41 +000045#include <sys/uio.h>
46#include <sys/fcntl.h>
Chris Lattnercd1e8d22007-09-03 18:24:56 +000047#else
48#include <io.h>
49#endif
Chris Lattnere92976d2007-04-29 06:44:41 +000050#include <cerrno>
51
Chris Lattner739e7392007-04-29 07:12:06 +000052static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000053#if 0
54 // FIXME: Reintroduce this and zap this function once the common llvm stuff
55 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000056 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000057 FileEnt->getSize());
58#endif
59
60 // If the file is larger than some threshold, use 'read', otherwise use mmap.
61 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000062 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000063 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000064
Chris Lattner739e7392007-04-29 07:12:06 +000065 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000066 FileEnt->getName());
67 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattnercd1e8d22007-09-03 18:24:56 +000068
69#if defined(_WIN32) || defined(_WIN64)
70 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
71#else
Chris Lattnere92976d2007-04-29 06:44:41 +000072 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattnercd1e8d22007-09-03 18:24:56 +000073#endif
Chris Lattnere92976d2007-04-29 06:44:41 +000074 if (FD == -1) {
75 delete SB;
76 return 0;
77 }
78
79 unsigned BytesLeft = FileEnt->getSize();
80 while (BytesLeft) {
81 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
82 if (NumRead != -1) {
83 BytesLeft -= NumRead;
84 BufPtr += NumRead;
85 } else if (errno == EINTR) {
86 // try again
87 } else {
88 // error reading.
89 close(FD);
90 delete SB;
91 return 0;
92 }
93 }
94 close(FD);
95
96 return SB;
97}
98
99
Chris Lattner22eb9722006-06-18 05:43:12 +0000100/// getFileInfo - Create or return a cached FileInfo for the specified file.
101///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000102const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +0000103SourceManager::getInfoRec(const FileEntry *FileEnt) {
104 assert(FileEnt && "Didn't specify a file entry to use?");
105 // Do we already have information about this file?
106 std::map<const FileEntry *, FileInfo>::iterator I =
107 FileInfos.lower_bound(FileEnt);
108 if (I != FileInfos.end() && I->first == FileEnt)
109 return &*I;
110
111 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000112 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000113 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000114 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000115
116 const InfoRec &Entry =
117 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
118 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
119
120 Info.Buffer = File;
121 Info.SourceLineCache = 0;
122 Info.NumLines = 0;
123 return &Entry;
124}
125
126
127/// createMemBufferInfoRec - Create a new info record for the specified memory
128/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000129const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000130SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000131 // Add a new info record to the MemBufferInfos list and return it.
132 FileInfo FI;
133 FI.Buffer = Buffer;
134 FI.SourceLineCache = 0;
135 FI.NumLines = 0;
136 MemBufferInfos.push_back(InfoRec(0, FI));
137 return &MemBufferInfos.back();
138}
139
140
141/// createFileID - Create a new fileID for the specified InfoRec and include
142/// position. This works regardless of whether the InfoRec corresponds to a
143/// file or some other input source.
144unsigned SourceManager::createFileID(const InfoRec *File,
145 SourceLocation IncludePos) {
146 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
147 // to fit an arbitrary position in the file in the FilePos field. To handle
148 // this, we create one FileID for each chunk of the file that fits in a
149 // FilePos field.
150 unsigned FileSize = File->second.Buffer->getBufferSize();
151 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000152 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000153 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
154 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000155 return FileIDs.size();
156 }
157
158 // Create one FileID for each chunk of the file.
159 unsigned Result = FileIDs.size()+1;
160
161 unsigned ChunkNo = 0;
162 while (1) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000163 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000164
165 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
166 FileSize -= (1 << SourceLocation::FilePosBits);
167 }
168
Chris Lattner2a904d02006-10-22 06:33:42 +0000169 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
170 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000171 return Result;
172}
173
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000174/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
175/// that a token from physloc PhysLoc should actually be referenced from
176/// InstantiationLoc.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000177SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000178 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000179 // The specified source location may be a mapped location, due to a macro
180 // instantiation or #line directive. Strip off this information to find out
181 // where the characters are actually located.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000182 PhysLoc = getPhysicalLoc(PhysLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000183
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000184 // Resolve InstantLoc down to a real logical location.
185 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000186
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000187
188 // If the last macro id is close to the currently requested location, try to
Chris Lattner04e3d202007-08-02 03:55:37 +0000189 // reuse it. This implements a small cache.
190 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
191 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattner2e380892007-07-21 06:41:57 +0000192
Chris Lattner04e3d202007-08-02 03:55:37 +0000193 // The instanitation point and source physloc have to exactly match to reuse
194 // (for now). We could allow "nearby" instantiations in the future.
195 if (LastOne.getInstantiationLoc() != InstantLoc ||
196 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
197 continue;
198
199 // Check to see if the physloc of the token came from near enough to reuse.
200 int PhysDelta = PhysLoc.getRawFilePos() -
201 LastOne.getPhysicalLoc().getRawFilePos();
202 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattner6a340b42007-08-02 04:22:39 +0000203 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000204 }
205
Chris Lattnere60b21c2007-07-20 18:26:45 +0000206
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000207 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000208 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000209}
210
211
Chris Lattner30709b032006-06-21 03:01:55 +0000212
Chris Lattnerd01e2912006-06-18 16:22:51 +0000213/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000214/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000215const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000216 // Note that this is a hot function in the getSpelling() path, which is
217 // heavily used by -E mode.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000218 SL = getPhysicalLoc(SL);
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000219
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000220 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
221 getFullFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000222}
223
Chris Lattner685730f2006-06-26 01:36:22 +0000224
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000225/// getColumnNumber - Return the column # for the specified file position.
Chris Lattner22eb9722006-06-18 05:43:12 +0000226/// this is significantly cheaper to compute than the line number. This returns
227/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000228unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
229 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000230 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000231
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000232 unsigned FilePos = getFullFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000233 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000234 const char *Buf = Buffer->getBufferStart();
235
236 unsigned LineStart = FilePos;
237 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
238 --LineStart;
239 return FilePos-LineStart+1;
240}
241
Chris Lattner9a13bde2006-06-21 04:57:09 +0000242/// getSourceName - This method returns the name of the file or buffer that
243/// the SourceLocation specifies. This can be modified with #line directives,
244/// etc.
Chris Lattner257fa2a2007-08-30 05:59:30 +0000245const char *SourceManager::getSourceName(SourceLocation Loc) const {
Chris Lattner9a13bde2006-06-21 04:57:09 +0000246 unsigned FileID = Loc.getFileID();
247 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000248 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000249}
250
Chris Lattner8996fff2007-07-24 05:57:19 +0000251static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
252static void ComputeLineNumbers(FileInfo *FI) {
253 const MemoryBuffer *Buffer = FI->Buffer;
254
255 // Find the file offsets of all of the *physical* source lines. This does
256 // not look at trigraphs, escaped newlines, or anything else tricky.
257 std::vector<unsigned> LineOffsets;
258
259 // Line #1 starts at char 0.
260 LineOffsets.push_back(0);
261
262 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
263 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
264 unsigned Offs = 0;
265 while (1) {
266 // Skip over the contents of the line.
267 // TODO: Vectorize this? This is very performance sensitive for programs
268 // with lots of diagnostics and in -E mode.
269 const unsigned char *NextBuf = (const unsigned char *)Buf;
270 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
271 ++NextBuf;
272 Offs += NextBuf-Buf;
273 Buf = NextBuf;
274
275 if (Buf[0] == '\n' || Buf[0] == '\r') {
276 // If this is \n\r or \r\n, skip both characters.
277 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
278 ++Offs, ++Buf;
279 ++Offs, ++Buf;
280 LineOffsets.push_back(Offs);
281 } else {
282 // Otherwise, this is a null. If end of file, exit.
283 if (Buf == End) break;
284 // Otherwise, skip the null.
285 ++Offs, ++Buf;
286 }
287 }
288 LineOffsets.push_back(Offs);
289
290 // Copy the offsets into the FileInfo structure.
291 FI->NumLines = LineOffsets.size();
292 FI->SourceLineCache = new unsigned[LineOffsets.size()];
293 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
294}
Chris Lattner9a13bde2006-06-21 04:57:09 +0000295
Chris Lattner22eb9722006-06-18 05:43:12 +0000296/// getLineNumber - Given a SourceLocation, return the physical line number
297/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000298/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000299/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000300unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattnera85a9d22006-07-02 20:07:52 +0000301 unsigned FileID = Loc.getFileID();
302 if (FileID == 0) return 0;
Chris Lattner8996fff2007-07-24 05:57:19 +0000303 FileInfo *FileInfo;
304
305 if (LastLineNoFileIDQuery == FileID)
306 FileInfo = LastLineNoFileInfo;
307 else
308 FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000309
310 // If this is the first use of line information for this buffer, compute the
Chris Lattner8996fff2007-07-24 05:57:19 +0000311 /// SourceLineCache for it on demand.
312 if (FileInfo->SourceLineCache == 0)
313 ComputeLineNumbers(FileInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +0000314
315 // Okay, we know we have a line number table. Do a binary search to find the
316 // line number that this character position lands on.
Chris Lattner22eb9722006-06-18 05:43:12 +0000317 unsigned *SourceLineCache = FileInfo->SourceLineCache;
Chris Lattner8996fff2007-07-24 05:57:19 +0000318 unsigned *SourceLineCacheStart = SourceLineCache;
319 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
320
321 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
322
323 // If the previous query was to the same file, we know both the file pos from
324 // that query and the line number returned. This allows us to narrow the
325 // search space from the entire file to something near the match.
326 if (LastLineNoFileIDQuery == FileID) {
327 if (QueriedFilePos >= LastLineNoFilePos) {
328 SourceLineCache = SourceLineCache+LastLineNoResult-1;
329
330 // The query is likely to be nearby the previous one. Here we check to
331 // see if it is within 5, 10 or 20 lines. It can be far away in cases
332 // where big comment blocks and vertical whitespace eat up lines but
333 // contribute no tokens.
334 if (SourceLineCache+5 < SourceLineCacheEnd) {
335 if (SourceLineCache[5] > QueriedFilePos)
336 SourceLineCacheEnd = SourceLineCache+5;
337 else if (SourceLineCache+10 < SourceLineCacheEnd) {
338 if (SourceLineCache[10] > QueriedFilePos)
339 SourceLineCacheEnd = SourceLineCache+10;
340 else if (SourceLineCache+20 < SourceLineCacheEnd) {
341 if (SourceLineCache[20] > QueriedFilePos)
342 SourceLineCacheEnd = SourceLineCache+20;
343 }
344 }
345 }
346 } else {
347 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
348 }
349 }
350
Chris Lattner830a77f2007-07-24 06:43:46 +0000351 // If the spread is large, do a "radix" test as our initial guess, based on
352 // the assumption that lines average to approximately the same length.
353 // NOTE: This is currently disabled, as it does not appear to be profitable in
354 // initial measurements.
355 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
356 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
357
358 // Take a stab at guessing where it is.
359 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
360
361 // Check for -10 and +10 lines.
362 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
363 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
364
365 // If the computed lower bound is less than the query location, move it in.
366 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
367 SourceLineCacheStart[LowerBound] < QueriedFilePos)
368 SourceLineCache = SourceLineCacheStart+LowerBound;
369
370 // If the computed upper bound is greater than the query location, move it.
371 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
372 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
373 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
374 }
375
376 unsigned *Pos
377 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner8996fff2007-07-24 05:57:19 +0000378 unsigned LineNo = Pos-SourceLineCacheStart;
379
380 LastLineNoFileIDQuery = FileID;
381 LastLineNoFileInfo = FileInfo;
382 LastLineNoFilePos = QueriedFilePos;
383 LastLineNoResult = LineNo;
384 return LineNo;
Chris Lattner22eb9722006-06-18 05:43:12 +0000385}
386
387/// PrintStats - Print statistics to stderr.
388///
389void SourceManager::PrintStats() const {
390 std::cerr << "\n*** Source Manager Stats:\n";
391 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
392 << " mem buffers mapped, " << FileIDs.size()
393 << " file ID's allocated.\n";
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000394 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
395 << MacroIDs.size() << " macro expansion FileID's.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000396
397
Chris Lattner22eb9722006-06-18 05:43:12 +0000398
399 unsigned NumLineNumsComputed = 0;
400 unsigned NumFileBytesMapped = 0;
401 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
402 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
403 NumLineNumsComputed += I->second.SourceLineCache != 0;
404 NumFileBytesMapped += I->second.Buffer->getBufferSize();
405 }
406 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
407 << NumLineNumsComputed << " files with line #'s computed.\n";
408}