blob: 380edc0f9317bb71a8d8f2f4329e92d719d225bf [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Hartmut Kaiser34947252007-09-12 15:39:04 +000016#include "llvm/Config/config.h"
Chris Lattner5e36a7a2007-07-24 05:57:19 +000017#include "llvm/Support/Compiler.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000018#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
20#include <algorithm>
21#include <iostream>
Gabor Greif15012182007-07-12 16:00:00 +000022#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000023using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27SourceManager::~SourceManager() {
28 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
29 E = FileInfos.end(); I != E; ++I) {
30 delete I->second.Buffer;
31 delete[] I->second.SourceLineCache;
32 }
33
34 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
35 E = MemBufferInfos.end(); I != E; ++I) {
36 delete I->second.Buffer;
37 delete[] I->second.SourceLineCache;
38 }
39}
40
41
42// FIXME: REMOVE THESE
43#include <unistd.h>
44#include <sys/types.h>
Chris Lattner6a4545e2007-09-03 18:24:56 +000045#if !defined(_MSC_VER)
Reid Spencer5f016e22007-07-11 17:01:13 +000046#include <sys/uio.h>
47#include <sys/fcntl.h>
Chris Lattner6a4545e2007-09-03 18:24:56 +000048#else
49#include <io.h>
50#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000051#include <cerrno>
52
53static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
54#if 0
55 // FIXME: Reintroduce this and zap this function once the common llvm stuff
56 // is fast for the small case.
57 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
58 FileEnt->getSize());
59#endif
60
61 // If the file is larger than some threshold, use 'read', otherwise use mmap.
62 if (FileEnt->getSize() >= 4096*4)
63 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
64 0, FileEnt->getSize());
65
66 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
67 FileEnt->getName());
68 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattner6a4545e2007-09-03 18:24:56 +000069
Hartmut Kaiser34947252007-09-12 15:39:04 +000070#if defined(LLVM_ON_WIN32)
Chris Lattner6a4545e2007-09-03 18:24:56 +000071 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
72#else
Reid Spencer5f016e22007-07-11 17:01:13 +000073 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattner6a4545e2007-09-03 18:24:56 +000074#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000075 if (FD == -1) {
76 delete SB;
77 return 0;
78 }
79
80 unsigned BytesLeft = FileEnt->getSize();
81 while (BytesLeft) {
82 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
83 if (NumRead != -1) {
84 BytesLeft -= NumRead;
85 BufPtr += NumRead;
86 } else if (errno == EINTR) {
87 // try again
88 } else {
89 // error reading.
90 close(FD);
91 delete SB;
92 return 0;
93 }
94 }
95 close(FD);
96
97 return SB;
98}
99
100
101/// getFileInfo - Create or return a cached FileInfo for the specified file.
102///
103const InfoRec *
104SourceManager::getInfoRec(const FileEntry *FileEnt) {
105 assert(FileEnt && "Didn't specify a file entry to use?");
106 // Do we already have information about this file?
107 std::map<const FileEntry *, FileInfo>::iterator I =
108 FileInfos.lower_bound(FileEnt);
109 if (I != FileInfos.end() && I->first == FileEnt)
110 return &*I;
111
112 // Nope, get information.
113 const MemoryBuffer *File = ReadFileFast(FileEnt);
114 if (File == 0)
115 return 0;
116
117 const InfoRec &Entry =
118 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
119 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
120
121 Info.Buffer = File;
122 Info.SourceLineCache = 0;
123 Info.NumLines = 0;
124 return &Entry;
125}
126
127
128/// createMemBufferInfoRec - Create a new info record for the specified memory
129/// buffer. This does no caching.
130const InfoRec *
131SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
132 // Add a new info record to the MemBufferInfos list and return it.
133 FileInfo FI;
134 FI.Buffer = Buffer;
135 FI.SourceLineCache = 0;
136 FI.NumLines = 0;
137 MemBufferInfos.push_back(InfoRec(0, FI));
138 return &MemBufferInfos.back();
139}
140
141
142/// createFileID - Create a new fileID for the specified InfoRec and include
143/// position. This works regardless of whether the InfoRec corresponds to a
144/// file or some other input source.
145unsigned SourceManager::createFileID(const InfoRec *File,
146 SourceLocation IncludePos) {
147 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
148 // to fit an arbitrary position in the file in the FilePos field. To handle
149 // this, we create one FileID for each chunk of the file that fits in a
150 // FilePos field.
151 unsigned FileSize = File->second.Buffer->getBufferSize();
152 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000153 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000154 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
155 "Ran out of file ID's!");
156 return FileIDs.size();
157 }
158
159 // Create one FileID for each chunk of the file.
160 unsigned Result = FileIDs.size()+1;
161
162 unsigned ChunkNo = 0;
163 while (1) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000164 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000165
166 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
167 FileSize -= (1 << SourceLocation::FilePosBits);
168 }
169
170 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
171 "Ran out of file ID's!");
172 return Result;
173}
174
175/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
176/// that a token from physloc PhysLoc should actually be referenced from
177/// InstantiationLoc.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000178SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Reid Spencer5f016e22007-07-11 17:01:13 +0000179 SourceLocation InstantLoc) {
Chris Lattnerabca2bb2007-07-15 06:35:27 +0000180 // The specified source location may be a mapped location, due to a macro
181 // instantiation or #line directive. Strip off this information to find out
182 // where the characters are actually located.
Chris Lattner31bb8be2007-07-20 18:00:12 +0000183 PhysLoc = getPhysicalLoc(PhysLoc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000184
185 // Resolve InstantLoc down to a real logical location.
186 InstantLoc = getLogicalLoc(InstantLoc);
187
Chris Lattner31bb8be2007-07-20 18:00:12 +0000188
189 // If the last macro id is close to the currently requested location, try to
Chris Lattner991ae512007-08-02 03:55:37 +0000190 // reuse it. This implements a small cache.
191 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
192 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattnerd1623a82007-07-21 06:41:57 +0000193
Chris Lattner991ae512007-08-02 03:55:37 +0000194 // The instanitation point and source physloc have to exactly match to reuse
195 // (for now). We could allow "nearby" instantiations in the future.
196 if (LastOne.getInstantiationLoc() != InstantLoc ||
197 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
198 continue;
199
200 // Check to see if the physloc of the token came from near enough to reuse.
201 int PhysDelta = PhysLoc.getRawFilePos() -
202 LastOne.getPhysicalLoc().getRawFilePos();
203 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattnerc1e50fc2007-08-02 04:22:39 +0000204 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattner31bb8be2007-07-20 18:00:12 +0000205 }
206
Chris Lattner45011cf2007-07-20 18:26:45 +0000207
Chris Lattner9dc1f532007-07-20 16:37:10 +0000208 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattner9dc1f532007-07-20 16:37:10 +0000209 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Reid Spencer5f016e22007-07-11 17:01:13 +0000210}
211
212
213
214/// getCharacterData - Return a pointer to the start of the specified location
215/// in the appropriate MemoryBuffer.
216const char *SourceManager::getCharacterData(SourceLocation SL) const {
217 // Note that this is a hot function in the getSpelling() path, which is
218 // heavily used by -E mode.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000219 SL = getPhysicalLoc(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000220
Chris Lattner9dc1f532007-07-20 16:37:10 +0000221 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
222 getFullFilePos(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000223}
224
Reid Spencer5f016e22007-07-11 17:01:13 +0000225
Chris Lattner9dc1f532007-07-20 16:37:10 +0000226/// getColumnNumber - Return the column # for the specified file position.
Reid Spencer5f016e22007-07-11 17:01:13 +0000227/// this is significantly cheaper to compute than the line number. This returns
228/// zero if the column number isn't known.
229unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000230 unsigned FileID = Loc.getFileID();
231 if (FileID == 0) return 0;
232
Chris Lattner9dc1f532007-07-20 16:37:10 +0000233 unsigned FilePos = getFullFilePos(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000234 const MemoryBuffer *Buffer = getBuffer(FileID);
235 const char *Buf = Buffer->getBufferStart();
236
237 unsigned LineStart = FilePos;
238 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
239 --LineStart;
240 return FilePos-LineStart+1;
241}
242
243/// getSourceName - This method returns the name of the file or buffer that
244/// the SourceLocation specifies. This can be modified with #line directives,
245/// etc.
Chris Lattner8b6ca882007-08-30 05:59:30 +0000246const char *SourceManager::getSourceName(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000247 unsigned FileID = Loc.getFileID();
248 if (FileID == 0) return "";
249 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
250}
251
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000252static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
253static void ComputeLineNumbers(FileInfo *FI) {
254 const MemoryBuffer *Buffer = FI->Buffer;
255
256 // Find the file offsets of all of the *physical* source lines. This does
257 // not look at trigraphs, escaped newlines, or anything else tricky.
258 std::vector<unsigned> LineOffsets;
259
260 // Line #1 starts at char 0.
261 LineOffsets.push_back(0);
262
263 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
264 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
265 unsigned Offs = 0;
266 while (1) {
267 // Skip over the contents of the line.
268 // TODO: Vectorize this? This is very performance sensitive for programs
269 // with lots of diagnostics and in -E mode.
270 const unsigned char *NextBuf = (const unsigned char *)Buf;
271 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
272 ++NextBuf;
273 Offs += NextBuf-Buf;
274 Buf = NextBuf;
275
276 if (Buf[0] == '\n' || Buf[0] == '\r') {
277 // If this is \n\r or \r\n, skip both characters.
278 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
279 ++Offs, ++Buf;
280 ++Offs, ++Buf;
281 LineOffsets.push_back(Offs);
282 } else {
283 // Otherwise, this is a null. If end of file, exit.
284 if (Buf == End) break;
285 // Otherwise, skip the null.
286 ++Offs, ++Buf;
287 }
288 }
289 LineOffsets.push_back(Offs);
290
291 // Copy the offsets into the FileInfo structure.
292 FI->NumLines = LineOffsets.size();
293 FI->SourceLineCache = new unsigned[LineOffsets.size()];
294 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
295}
Reid Spencer5f016e22007-07-11 17:01:13 +0000296
297/// getLineNumber - Given a SourceLocation, return the physical line number
298/// for the position indicated. This requires building and caching a table of
299/// line offsets for the MemoryBuffer, so this is not cheap: use only when
300/// about to emit a diagnostic.
301unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000302 unsigned FileID = Loc.getFileID();
303 if (FileID == 0) return 0;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000304 FileInfo *FileInfo;
305
306 if (LastLineNoFileIDQuery == FileID)
307 FileInfo = LastLineNoFileInfo;
308 else
309 FileInfo = getFileInfo(FileID);
Reid Spencer5f016e22007-07-11 17:01:13 +0000310
311 // If this is the first use of line information for this buffer, compute the
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000312 /// SourceLineCache for it on demand.
313 if (FileInfo->SourceLineCache == 0)
314 ComputeLineNumbers(FileInfo);
Reid Spencer5f016e22007-07-11 17:01:13 +0000315
316 // Okay, we know we have a line number table. Do a binary search to find the
317 // line number that this character position lands on.
Reid Spencer5f016e22007-07-11 17:01:13 +0000318 unsigned *SourceLineCache = FileInfo->SourceLineCache;
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000319 unsigned *SourceLineCacheStart = SourceLineCache;
320 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
321
322 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
323
324 // If the previous query was to the same file, we know both the file pos from
325 // that query and the line number returned. This allows us to narrow the
326 // search space from the entire file to something near the match.
327 if (LastLineNoFileIDQuery == FileID) {
328 if (QueriedFilePos >= LastLineNoFilePos) {
329 SourceLineCache = SourceLineCache+LastLineNoResult-1;
330
331 // The query is likely to be nearby the previous one. Here we check to
332 // see if it is within 5, 10 or 20 lines. It can be far away in cases
333 // where big comment blocks and vertical whitespace eat up lines but
334 // contribute no tokens.
335 if (SourceLineCache+5 < SourceLineCacheEnd) {
336 if (SourceLineCache[5] > QueriedFilePos)
337 SourceLineCacheEnd = SourceLineCache+5;
338 else if (SourceLineCache+10 < SourceLineCacheEnd) {
339 if (SourceLineCache[10] > QueriedFilePos)
340 SourceLineCacheEnd = SourceLineCache+10;
341 else if (SourceLineCache+20 < SourceLineCacheEnd) {
342 if (SourceLineCache[20] > QueriedFilePos)
343 SourceLineCacheEnd = SourceLineCache+20;
344 }
345 }
346 }
347 } else {
348 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
349 }
350 }
351
Chris Lattner1cf12bf2007-07-24 06:43:46 +0000352 // If the spread is large, do a "radix" test as our initial guess, based on
353 // the assumption that lines average to approximately the same length.
354 // NOTE: This is currently disabled, as it does not appear to be profitable in
355 // initial measurements.
356 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
357 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
358
359 // Take a stab at guessing where it is.
360 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
361
362 // Check for -10 and +10 lines.
363 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
364 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
365
366 // If the computed lower bound is less than the query location, move it in.
367 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
368 SourceLineCacheStart[LowerBound] < QueriedFilePos)
369 SourceLineCache = SourceLineCacheStart+LowerBound;
370
371 // If the computed upper bound is greater than the query location, move it.
372 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
373 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
374 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
375 }
376
377 unsigned *Pos
378 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
Chris Lattner5e36a7a2007-07-24 05:57:19 +0000379 unsigned LineNo = Pos-SourceLineCacheStart;
380
381 LastLineNoFileIDQuery = FileID;
382 LastLineNoFileInfo = FileInfo;
383 LastLineNoFilePos = QueriedFilePos;
384 LastLineNoResult = LineNo;
385 return LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000386}
387
Reid Spencer5f016e22007-07-11 17:01:13 +0000388/// PrintStats - Print statistics to stderr.
389///
390void SourceManager::PrintStats() const {
391 std::cerr << "\n*** Source Manager Stats:\n";
392 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
393 << " mem buffers mapped, " << FileIDs.size()
394 << " file ID's allocated.\n";
Chris Lattner9dc1f532007-07-20 16:37:10 +0000395 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
396 << MacroIDs.size() << " macro expansion FileID's.\n";
Reid Spencer5f016e22007-07-11 17:01:13 +0000397
398
399
400 unsigned NumLineNumsComputed = 0;
401 unsigned NumFileBytesMapped = 0;
402 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
403 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
404 NumLineNumsComputed += I->second.SourceLineCache != 0;
405 NumFileBytesMapped += I->second.Buffer->getBufferSize();
406 }
407 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
408 << NumLineNumsComputed << " files with line #'s computed.\n";
409}