blob: cc1312f049951294b357fa901b62f6b880f238f3 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Hartmut Kaiser02491042007-09-12 15:39:04 +000016#include "llvm/Config/config.h"
Chris Lattner8996fff2007-07-24 05:57:19 +000017#include "llvm/Support/Compiler.h"
Chris Lattner739e7392007-04-29 07:12:06 +000018#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000019#include "llvm/System/Path.h"
20#include <algorithm>
21#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000022#include <fcntl.h>
Chris Lattner22eb9722006-06-18 05:43:12 +000023using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000024using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000025using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000026
27SourceManager::~SourceManager() {
28 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
29 E = FileInfos.end(); I != E; ++I) {
30 delete I->second.Buffer;
31 delete[] I->second.SourceLineCache;
32 }
33
34 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
35 E = MemBufferInfos.end(); I != E; ++I) {
36 delete I->second.Buffer;
37 delete[] I->second.SourceLineCache;
38 }
39}
40
Chris Lattnere92976d2007-04-29 06:44:41 +000041
42// FIXME: REMOVE THESE
43#include <unistd.h>
44#include <sys/types.h>
Chris Lattnercd1e8d22007-09-03 18:24:56 +000045#if !defined(_MSC_VER)
Chris Lattnere92976d2007-04-29 06:44:41 +000046#include <sys/uio.h>
47#include <sys/fcntl.h>
Chris Lattnercd1e8d22007-09-03 18:24:56 +000048#else
49#include <io.h>
50#endif
Chris Lattnere92976d2007-04-29 06:44:41 +000051#include <cerrno>
52
Chris Lattner739e7392007-04-29 07:12:06 +000053static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000054#if 0
55 // FIXME: Reintroduce this and zap this function once the common llvm stuff
56 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000057 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000058 FileEnt->getSize());
59#endif
60
61 // If the file is larger than some threshold, use 'read', otherwise use mmap.
62 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000063 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000064 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000065
Chris Lattner739e7392007-04-29 07:12:06 +000066 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000067 FileEnt->getName());
68 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattnercd1e8d22007-09-03 18:24:56 +000069
Hartmut Kaiser02491042007-09-12 15:39:04 +000070#if defined(LLVM_ON_WIN32)
Chris Lattnercd1e8d22007-09-03 18:24:56 +000071 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
72#else
Chris Lattnere92976d2007-04-29 06:44:41 +000073 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattnercd1e8d22007-09-03 18:24:56 +000074#endif
Chris Lattnere92976d2007-04-29 06:44:41 +000075 if (FD == -1) {
76 delete SB;
77 return 0;
78 }
79
80 unsigned BytesLeft = FileEnt->getSize();
81 while (BytesLeft) {
82 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
83 if (NumRead != -1) {
84 BytesLeft -= NumRead;
85 BufPtr += NumRead;
86 } else if (errno == EINTR) {
87 // try again
88 } else {
89 // error reading.
90 close(FD);
91 delete SB;
92 return 0;
93 }
94 }
95 close(FD);
96
97 return SB;
98}
99
100
Chris Lattner22eb9722006-06-18 05:43:12 +0000101/// getFileInfo - Create or return a cached FileInfo for the specified file.
102///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000103const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +0000104SourceManager::getInfoRec(const FileEntry *FileEnt) {
105 assert(FileEnt && "Didn't specify a file entry to use?");
106 // Do we already have information about this file?
107 std::map<const FileEntry *, FileInfo>::iterator I =
108 FileInfos.lower_bound(FileEnt);
109 if (I != FileInfos.end() && I->first == FileEnt)
110 return &*I;
111
112 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000113 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000114 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000115 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000116
117 const InfoRec &Entry =
118 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
119 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
120
121 Info.Buffer = File;
122 Info.SourceLineCache = 0;
123 Info.NumLines = 0;
124 return &Entry;
125}
126
127
128/// createMemBufferInfoRec - Create a new info record for the specified memory
129/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000130const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000131SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000132 // Add a new info record to the MemBufferInfos list and return it.
133 FileInfo FI;
134 FI.Buffer = Buffer;
135 FI.SourceLineCache = 0;
136 FI.NumLines = 0;
137 MemBufferInfos.push_back(InfoRec(0, FI));
138 return &MemBufferInfos.back();
139}
140
141
142/// createFileID - Create a new fileID for the specified InfoRec and include
143/// position. This works regardless of whether the InfoRec corresponds to a
144/// file or some other input source.
145unsigned SourceManager::createFileID(const InfoRec *File,
146 SourceLocation IncludePos) {
147 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
148 // to fit an arbitrary position in the file in the FilePos field. To handle
149 // this, we create one FileID for each chunk of the file that fits in a
150 // FilePos field.
151 unsigned FileSize = File->second.Buffer->getBufferSize();
152 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000153 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000154 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
155 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000156 return FileIDs.size();
157 }
158
159 // Create one FileID for each chunk of the file.
160 unsigned Result = FileIDs.size()+1;
161
162 unsigned ChunkNo = 0;
163 while (1) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000164 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000165
166 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
167 FileSize -= (1 << SourceLocation::FilePosBits);
168 }
169
Chris Lattner2a904d02006-10-22 06:33:42 +0000170 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
171 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000172 return Result;
173}
174
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000175/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
176/// that a token from physloc PhysLoc should actually be referenced from
177/// InstantiationLoc.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000178SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000179 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000180 // The specified source location may be a mapped location, due to a macro
181 // instantiation or #line directive. Strip off this information to find out
182 // where the characters are actually located.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000183 PhysLoc = getPhysicalLoc(PhysLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000184
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000185 // Resolve InstantLoc down to a real logical location.
186 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000187
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000188
189 // If the last macro id is close to the currently requested location, try to
Chris Lattner04e3d202007-08-02 03:55:37 +0000190 // reuse it. This implements a small cache.
191 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
192 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattner2e380892007-07-21 06:41:57 +0000193
Chris Lattner04e3d202007-08-02 03:55:37 +0000194 // The instanitation point and source physloc have to exactly match to reuse
195 // (for now). We could allow "nearby" instantiations in the future.
196 if (LastOne.getInstantiationLoc() != InstantLoc ||
197 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
198 continue;
199
200 // Check to see if the physloc of the token came from near enough to reuse.
201 int PhysDelta = PhysLoc.getRawFilePos() -
202 LastOne.getPhysicalLoc().getRawFilePos();
203 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattner6a340b42007-08-02 04:22:39 +0000204 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000205 }
206
Chris Lattnere60b21c2007-07-20 18:26:45 +0000207
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000208 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000209 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000210}
211
Chris Lattner16a0de42007-10-11 18:38:32 +0000212/// getBufferData - Return a pointer to the start and end of the character
213/// data for the specified FileID.
214std::pair<const char*, const char*>
215SourceManager::getBufferData(unsigned FileID) const {
216 const llvm::MemoryBuffer *Buf = getBuffer(FileID);
217 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
218}
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000219
Chris Lattner30709b032006-06-21 03:01:55 +0000220
Chris Lattnerd01e2912006-06-18 16:22:51 +0000221/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000222/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000223const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000224 // Note that this is a hot function in the getSpelling() path, which is
225 // heavily used by -E mode.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000226 SL = getPhysicalLoc(SL);
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000227
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000228 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
229 getFullFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000230}
231
Chris Lattner685730f2006-06-26 01:36:22 +0000232
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000233/// getColumnNumber - Return the column # for the specified file position.
Chris Lattner22eb9722006-06-18 05:43:12 +0000234/// this is significantly cheaper to compute than the line number. This returns
235/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000236unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
237 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000238 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000239
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000240 unsigned FilePos = getFullFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000241 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000242 const char *Buf = Buffer->getBufferStart();
243
244 unsigned LineStart = FilePos;
245 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
246 --LineStart;
247 return FilePos-LineStart+1;
248}
249
Chris Lattner9a13bde2006-06-21 04:57:09 +0000250/// getSourceName - This method returns the name of the file or buffer that
251/// the SourceLocation specifies. This can be modified with #line directives,
252/// etc.
Chris Lattner257fa2a2007-08-30 05:59:30 +0000253const char *SourceManager::getSourceName(SourceLocation Loc) const {
Chris Lattner9a13bde2006-06-21 04:57:09 +0000254 unsigned FileID = Loc.getFileID();
255 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000256 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000257}
258
Chris Lattner8996fff2007-07-24 05:57:19 +0000259static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
260static void ComputeLineNumbers(FileInfo *FI) {
261 const MemoryBuffer *Buffer = FI->Buffer;
262
263 // Find the file offsets of all of the *physical* source lines. This does
264 // not look at trigraphs, escaped newlines, or anything else tricky.
265 std::vector<unsigned> LineOffsets;
266
267 // Line #1 starts at char 0.
268 LineOffsets.push_back(0);
269
270 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
271 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
272 unsigned Offs = 0;
273 while (1) {
274 // Skip over the contents of the line.
275 // TODO: Vectorize this? This is very performance sensitive for programs
276 // with lots of diagnostics and in -E mode.
277 const unsigned char *NextBuf = (const unsigned char *)Buf;
278 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
279 ++NextBuf;
280 Offs += NextBuf-Buf;
281 Buf = NextBuf;
282
283 if (Buf[0] == '\n' || Buf[0] == '\r') {
284 // If this is \n\r or \r\n, skip both characters.
285 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
286 ++Offs, ++Buf;
287 ++Offs, ++Buf;
288 LineOffsets.push_back(Offs);
289 } else {
290 // Otherwise, this is a null. If end of file, exit.
291 if (Buf == End) break;
292 // Otherwise, skip the null.
293 ++Offs, ++Buf;
294 }
295 }
296 LineOffsets.push_back(Offs);
297
298 // Copy the offsets into the FileInfo structure.
299 FI->NumLines = LineOffsets.size();
300 FI->SourceLineCache = new unsigned[LineOffsets.size()];
301 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
302}
Chris Lattner9a13bde2006-06-21 04:57:09 +0000303
Chris Lattner22eb9722006-06-18 05:43:12 +0000304/// getLineNumber - Given a SourceLocation, return the physical line number
305/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000306/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000307/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000308unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattnera85a9d22006-07-02 20:07:52 +0000309 unsigned FileID = Loc.getFileID();
310 if (FileID == 0) return 0;
Chris Lattner8996fff2007-07-24 05:57:19 +0000311 FileInfo *FileInfo;
312
313 if (LastLineNoFileIDQuery == FileID)
314 FileInfo = LastLineNoFileInfo;
315 else
316 FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000317
318 // If this is the first use of line information for this buffer, compute the
Chris Lattner8996fff2007-07-24 05:57:19 +0000319 /// SourceLineCache for it on demand.
320 if (FileInfo->SourceLineCache == 0)
321 ComputeLineNumbers(FileInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +0000322
323 // Okay, we know we have a line number table. Do a binary search to find the
324 // line number that this character position lands on.
Chris Lattner22eb9722006-06-18 05:43:12 +0000325 unsigned *SourceLineCache = FileInfo->SourceLineCache;
Chris Lattner8996fff2007-07-24 05:57:19 +0000326 unsigned *SourceLineCacheStart = SourceLineCache;
327 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
328
329 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
330
331 // If the previous query was to the same file, we know both the file pos from
332 // that query and the line number returned. This allows us to narrow the
333 // search space from the entire file to something near the match.
334 if (LastLineNoFileIDQuery == FileID) {
335 if (QueriedFilePos >= LastLineNoFilePos) {
336 SourceLineCache = SourceLineCache+LastLineNoResult-1;
337
338 // The query is likely to be nearby the previous one. Here we check to
339 // see if it is within 5, 10 or 20 lines. It can be far away in cases
340 // where big comment blocks and vertical whitespace eat up lines but
341 // contribute no tokens.
342 if (SourceLineCache+5 < SourceLineCacheEnd) {
343 if (SourceLineCache[5] > QueriedFilePos)
344 SourceLineCacheEnd = SourceLineCache+5;
345 else if (SourceLineCache+10 < SourceLineCacheEnd) {
346 if (SourceLineCache[10] > QueriedFilePos)
347 SourceLineCacheEnd = SourceLineCache+10;
348 else if (SourceLineCache+20 < SourceLineCacheEnd) {
349 if (SourceLineCache[20] > QueriedFilePos)
350 SourceLineCacheEnd = SourceLineCache+20;
351 }
352 }
353 }
354 } else {
355 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
356 }
357 }
358
Chris Lattner830a77f2007-07-24 06:43:46 +0000359 // If the spread is large, do a "radix" test as our initial guess, based on
360 // the assumption that lines average to approximately the same length.
361 // NOTE: This is currently disabled, as it does not appear to be profitable in
362 // initial measurements.
363 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
364 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
365
366 // Take a stab at guessing where it is.
367 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
368
369 // Check for -10 and +10 lines.
370 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
371 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
372
373 // If the computed lower bound is less than the query location, move it in.
374 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
375 SourceLineCacheStart[LowerBound] < QueriedFilePos)
376 SourceLineCache = SourceLineCacheStart+LowerBound;
377
378 // If the computed upper bound is greater than the query location, move it.
379 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
380 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
381 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
382 }
383
384 unsigned *Pos
385 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner8996fff2007-07-24 05:57:19 +0000386 unsigned LineNo = Pos-SourceLineCacheStart;
387
388 LastLineNoFileIDQuery = FileID;
389 LastLineNoFileInfo = FileInfo;
390 LastLineNoFilePos = QueriedFilePos;
391 LastLineNoResult = LineNo;
392 return LineNo;
Chris Lattner22eb9722006-06-18 05:43:12 +0000393}
394
395/// PrintStats - Print statistics to stderr.
396///
397void SourceManager::PrintStats() const {
398 std::cerr << "\n*** Source Manager Stats:\n";
399 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
400 << " mem buffers mapped, " << FileIDs.size()
401 << " file ID's allocated.\n";
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000402 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
403 << MacroIDs.size() << " macro expansion FileID's.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000404
405
Chris Lattner22eb9722006-06-18 05:43:12 +0000406
407 unsigned NumLineNumsComputed = 0;
408 unsigned NumFileBytesMapped = 0;
409 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
410 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
411 NumLineNumsComputed += I->second.SourceLineCache != 0;
412 NumFileBytesMapped += I->second.Buffer->getBufferSize();
413 }
414 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
415 << NumLineNumsComputed << " files with line #'s computed.\n";
416}