blob: 380edc0f9317bb71a8d8f2f4329e92d719d225bf [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Hartmut Kaiser02491042007-09-12 15:39:04 +000016#include "llvm/Config/config.h"
Chris Lattner8996fff2007-07-24 05:57:19 +000017#include "llvm/Support/Compiler.h"
Chris Lattner739e7392007-04-29 07:12:06 +000018#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000019#include "llvm/System/Path.h"
20#include <algorithm>
21#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000022#include <fcntl.h>
Chris Lattner22eb9722006-06-18 05:43:12 +000023using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000024using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000025using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000026
27SourceManager::~SourceManager() {
28 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
29 E = FileInfos.end(); I != E; ++I) {
30 delete I->second.Buffer;
31 delete[] I->second.SourceLineCache;
32 }
33
34 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
35 E = MemBufferInfos.end(); I != E; ++I) {
36 delete I->second.Buffer;
37 delete[] I->second.SourceLineCache;
38 }
39}
40
Chris Lattnere92976d2007-04-29 06:44:41 +000041
42// FIXME: REMOVE THESE
43#include <unistd.h>
44#include <sys/types.h>
Chris Lattnercd1e8d22007-09-03 18:24:56 +000045#if !defined(_MSC_VER)
Chris Lattnere92976d2007-04-29 06:44:41 +000046#include <sys/uio.h>
47#include <sys/fcntl.h>
Chris Lattnercd1e8d22007-09-03 18:24:56 +000048#else
49#include <io.h>
50#endif
Chris Lattnere92976d2007-04-29 06:44:41 +000051#include <cerrno>
52
Chris Lattner739e7392007-04-29 07:12:06 +000053static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000054#if 0
55 // FIXME: Reintroduce this and zap this function once the common llvm stuff
56 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000057 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000058 FileEnt->getSize());
59#endif
60
61 // If the file is larger than some threshold, use 'read', otherwise use mmap.
62 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000063 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000064 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000065
Chris Lattner739e7392007-04-29 07:12:06 +000066 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000067 FileEnt->getName());
68 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattnercd1e8d22007-09-03 18:24:56 +000069
Hartmut Kaiser02491042007-09-12 15:39:04 +000070#if defined(LLVM_ON_WIN32)
Chris Lattnercd1e8d22007-09-03 18:24:56 +000071 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
72#else
Chris Lattnere92976d2007-04-29 06:44:41 +000073 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattnercd1e8d22007-09-03 18:24:56 +000074#endif
Chris Lattnere92976d2007-04-29 06:44:41 +000075 if (FD == -1) {
76 delete SB;
77 return 0;
78 }
79
80 unsigned BytesLeft = FileEnt->getSize();
81 while (BytesLeft) {
82 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
83 if (NumRead != -1) {
84 BytesLeft -= NumRead;
85 BufPtr += NumRead;
86 } else if (errno == EINTR) {
87 // try again
88 } else {
89 // error reading.
90 close(FD);
91 delete SB;
92 return 0;
93 }
94 }
95 close(FD);
96
97 return SB;
98}
99
100
Chris Lattner22eb9722006-06-18 05:43:12 +0000101/// getFileInfo - Create or return a cached FileInfo for the specified file.
102///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000103const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +0000104SourceManager::getInfoRec(const FileEntry *FileEnt) {
105 assert(FileEnt && "Didn't specify a file entry to use?");
106 // Do we already have information about this file?
107 std::map<const FileEntry *, FileInfo>::iterator I =
108 FileInfos.lower_bound(FileEnt);
109 if (I != FileInfos.end() && I->first == FileEnt)
110 return &*I;
111
112 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000113 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000114 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000115 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000116
117 const InfoRec &Entry =
118 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
119 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
120
121 Info.Buffer = File;
122 Info.SourceLineCache = 0;
123 Info.NumLines = 0;
124 return &Entry;
125}
126
127
128/// createMemBufferInfoRec - Create a new info record for the specified memory
129/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000130const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000131SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000132 // Add a new info record to the MemBufferInfos list and return it.
133 FileInfo FI;
134 FI.Buffer = Buffer;
135 FI.SourceLineCache = 0;
136 FI.NumLines = 0;
137 MemBufferInfos.push_back(InfoRec(0, FI));
138 return &MemBufferInfos.back();
139}
140
141
142/// createFileID - Create a new fileID for the specified InfoRec and include
143/// position. This works regardless of whether the InfoRec corresponds to a
144/// file or some other input source.
145unsigned SourceManager::createFileID(const InfoRec *File,
146 SourceLocation IncludePos) {
147 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
148 // to fit an arbitrary position in the file in the FilePos field. To handle
149 // this, we create one FileID for each chunk of the file that fits in a
150 // FilePos field.
151 unsigned FileSize = File->second.Buffer->getBufferSize();
152 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000153 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000154 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
155 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000156 return FileIDs.size();
157 }
158
159 // Create one FileID for each chunk of the file.
160 unsigned Result = FileIDs.size()+1;
161
162 unsigned ChunkNo = 0;
163 while (1) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000164 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000165
166 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
167 FileSize -= (1 << SourceLocation::FilePosBits);
168 }
169
Chris Lattner2a904d02006-10-22 06:33:42 +0000170 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
171 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000172 return Result;
173}
174
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000175/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
176/// that a token from physloc PhysLoc should actually be referenced from
177/// InstantiationLoc.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000178SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000179 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000180 // The specified source location may be a mapped location, due to a macro
181 // instantiation or #line directive. Strip off this information to find out
182 // where the characters are actually located.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000183 PhysLoc = getPhysicalLoc(PhysLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000184
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000185 // Resolve InstantLoc down to a real logical location.
186 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000187
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000188
189 // If the last macro id is close to the currently requested location, try to
Chris Lattner04e3d202007-08-02 03:55:37 +0000190 // reuse it. This implements a small cache.
191 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
192 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattner2e380892007-07-21 06:41:57 +0000193
Chris Lattner04e3d202007-08-02 03:55:37 +0000194 // The instanitation point and source physloc have to exactly match to reuse
195 // (for now). We could allow "nearby" instantiations in the future.
196 if (LastOne.getInstantiationLoc() != InstantLoc ||
197 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
198 continue;
199
200 // Check to see if the physloc of the token came from near enough to reuse.
201 int PhysDelta = PhysLoc.getRawFilePos() -
202 LastOne.getPhysicalLoc().getRawFilePos();
203 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattner6a340b42007-08-02 04:22:39 +0000204 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000205 }
206
Chris Lattnere60b21c2007-07-20 18:26:45 +0000207
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000208 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000209 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000210}
211
212
Chris Lattner30709b032006-06-21 03:01:55 +0000213
Chris Lattnerd01e2912006-06-18 16:22:51 +0000214/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000215/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000216const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000217 // Note that this is a hot function in the getSpelling() path, which is
218 // heavily used by -E mode.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000219 SL = getPhysicalLoc(SL);
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000220
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000221 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
222 getFullFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000223}
224
Chris Lattner685730f2006-06-26 01:36:22 +0000225
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000226/// getColumnNumber - Return the column # for the specified file position.
Chris Lattner22eb9722006-06-18 05:43:12 +0000227/// this is significantly cheaper to compute than the line number. This returns
228/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000229unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
230 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000231 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000232
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000233 unsigned FilePos = getFullFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000234 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000235 const char *Buf = Buffer->getBufferStart();
236
237 unsigned LineStart = FilePos;
238 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
239 --LineStart;
240 return FilePos-LineStart+1;
241}
242
Chris Lattner9a13bde2006-06-21 04:57:09 +0000243/// getSourceName - This method returns the name of the file or buffer that
244/// the SourceLocation specifies. This can be modified with #line directives,
245/// etc.
Chris Lattner257fa2a2007-08-30 05:59:30 +0000246const char *SourceManager::getSourceName(SourceLocation Loc) const {
Chris Lattner9a13bde2006-06-21 04:57:09 +0000247 unsigned FileID = Loc.getFileID();
248 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000249 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000250}
251
Chris Lattner8996fff2007-07-24 05:57:19 +0000252static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
253static void ComputeLineNumbers(FileInfo *FI) {
254 const MemoryBuffer *Buffer = FI->Buffer;
255
256 // Find the file offsets of all of the *physical* source lines. This does
257 // not look at trigraphs, escaped newlines, or anything else tricky.
258 std::vector<unsigned> LineOffsets;
259
260 // Line #1 starts at char 0.
261 LineOffsets.push_back(0);
262
263 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
264 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
265 unsigned Offs = 0;
266 while (1) {
267 // Skip over the contents of the line.
268 // TODO: Vectorize this? This is very performance sensitive for programs
269 // with lots of diagnostics and in -E mode.
270 const unsigned char *NextBuf = (const unsigned char *)Buf;
271 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
272 ++NextBuf;
273 Offs += NextBuf-Buf;
274 Buf = NextBuf;
275
276 if (Buf[0] == '\n' || Buf[0] == '\r') {
277 // If this is \n\r or \r\n, skip both characters.
278 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
279 ++Offs, ++Buf;
280 ++Offs, ++Buf;
281 LineOffsets.push_back(Offs);
282 } else {
283 // Otherwise, this is a null. If end of file, exit.
284 if (Buf == End) break;
285 // Otherwise, skip the null.
286 ++Offs, ++Buf;
287 }
288 }
289 LineOffsets.push_back(Offs);
290
291 // Copy the offsets into the FileInfo structure.
292 FI->NumLines = LineOffsets.size();
293 FI->SourceLineCache = new unsigned[LineOffsets.size()];
294 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
295}
Chris Lattner9a13bde2006-06-21 04:57:09 +0000296
Chris Lattner22eb9722006-06-18 05:43:12 +0000297/// getLineNumber - Given a SourceLocation, return the physical line number
298/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000299/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000300/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000301unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattnera85a9d22006-07-02 20:07:52 +0000302 unsigned FileID = Loc.getFileID();
303 if (FileID == 0) return 0;
Chris Lattner8996fff2007-07-24 05:57:19 +0000304 FileInfo *FileInfo;
305
306 if (LastLineNoFileIDQuery == FileID)
307 FileInfo = LastLineNoFileInfo;
308 else
309 FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000310
311 // If this is the first use of line information for this buffer, compute the
Chris Lattner8996fff2007-07-24 05:57:19 +0000312 /// SourceLineCache for it on demand.
313 if (FileInfo->SourceLineCache == 0)
314 ComputeLineNumbers(FileInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +0000315
316 // Okay, we know we have a line number table. Do a binary search to find the
317 // line number that this character position lands on.
Chris Lattner22eb9722006-06-18 05:43:12 +0000318 unsigned *SourceLineCache = FileInfo->SourceLineCache;
Chris Lattner8996fff2007-07-24 05:57:19 +0000319 unsigned *SourceLineCacheStart = SourceLineCache;
320 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
321
322 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
323
324 // If the previous query was to the same file, we know both the file pos from
325 // that query and the line number returned. This allows us to narrow the
326 // search space from the entire file to something near the match.
327 if (LastLineNoFileIDQuery == FileID) {
328 if (QueriedFilePos >= LastLineNoFilePos) {
329 SourceLineCache = SourceLineCache+LastLineNoResult-1;
330
331 // The query is likely to be nearby the previous one. Here we check to
332 // see if it is within 5, 10 or 20 lines. It can be far away in cases
333 // where big comment blocks and vertical whitespace eat up lines but
334 // contribute no tokens.
335 if (SourceLineCache+5 < SourceLineCacheEnd) {
336 if (SourceLineCache[5] > QueriedFilePos)
337 SourceLineCacheEnd = SourceLineCache+5;
338 else if (SourceLineCache+10 < SourceLineCacheEnd) {
339 if (SourceLineCache[10] > QueriedFilePos)
340 SourceLineCacheEnd = SourceLineCache+10;
341 else if (SourceLineCache+20 < SourceLineCacheEnd) {
342 if (SourceLineCache[20] > QueriedFilePos)
343 SourceLineCacheEnd = SourceLineCache+20;
344 }
345 }
346 }
347 } else {
348 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
349 }
350 }
351
Chris Lattner830a77f2007-07-24 06:43:46 +0000352 // If the spread is large, do a "radix" test as our initial guess, based on
353 // the assumption that lines average to approximately the same length.
354 // NOTE: This is currently disabled, as it does not appear to be profitable in
355 // initial measurements.
356 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
357 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
358
359 // Take a stab at guessing where it is.
360 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
361
362 // Check for -10 and +10 lines.
363 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
364 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
365
366 // If the computed lower bound is less than the query location, move it in.
367 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
368 SourceLineCacheStart[LowerBound] < QueriedFilePos)
369 SourceLineCache = SourceLineCacheStart+LowerBound;
370
371 // If the computed upper bound is greater than the query location, move it.
372 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
373 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
374 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
375 }
376
377 unsigned *Pos
378 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner8996fff2007-07-24 05:57:19 +0000379 unsigned LineNo = Pos-SourceLineCacheStart;
380
381 LastLineNoFileIDQuery = FileID;
382 LastLineNoFileInfo = FileInfo;
383 LastLineNoFilePos = QueriedFilePos;
384 LastLineNoResult = LineNo;
385 return LineNo;
Chris Lattner22eb9722006-06-18 05:43:12 +0000386}
387
388/// PrintStats - Print statistics to stderr.
389///
390void SourceManager::PrintStats() const {
391 std::cerr << "\n*** Source Manager Stats:\n";
392 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
393 << " mem buffers mapped, " << FileIDs.size()
394 << " file ID's allocated.\n";
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000395 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
396 << MacroIDs.size() << " macro expansion FileID's.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000397
398
Chris Lattner22eb9722006-06-18 05:43:12 +0000399
400 unsigned NumLineNumsComputed = 0;
401 unsigned NumFileBytesMapped = 0;
402 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
403 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
404 NumLineNumsComputed += I->second.SourceLineCache != 0;
405 NumFileBytesMapped += I->second.Buffer->getBufferSize();
406 }
407 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
408 << NumLineNumsComputed << " files with line #'s computed.\n";
409}