blob: 569c864dde7fd5314f73e65c00ea50a757daa002 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/System/Path.h"
19#include <algorithm>
20#include <iostream>
21#include <fcntl.h>
22using namespace clang;
23using namespace SrcMgr;
24using llvm::MemoryBuffer;
25
26SourceManager::~SourceManager() {
27 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
28 E = FileInfos.end(); I != E; ++I) {
29 delete I->second.Buffer;
30 delete[] I->second.SourceLineCache;
31 }
32
33 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
34 E = MemBufferInfos.end(); I != E; ++I) {
35 delete I->second.Buffer;
36 delete[] I->second.SourceLineCache;
37 }
38}
39
40
41// FIXME: REMOVE THESE
42#include <unistd.h>
43#include <sys/types.h>
Chris Lattnera09a2c02007-09-03 18:24:56 +000044#if !defined(_MSC_VER)
Chris Lattner4b009652007-07-25 00:24:17 +000045#include <sys/uio.h>
46#include <sys/fcntl.h>
Chris Lattnera09a2c02007-09-03 18:24:56 +000047#else
48#include <io.h>
49#endif
Chris Lattner4b009652007-07-25 00:24:17 +000050#include <cerrno>
51
52static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
53#if 0
54 // FIXME: Reintroduce this and zap this function once the common llvm stuff
55 // is fast for the small case.
56 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
57 FileEnt->getSize());
58#endif
59
60 // If the file is larger than some threshold, use 'read', otherwise use mmap.
61 if (FileEnt->getSize() >= 4096*4)
62 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
63 0, FileEnt->getSize());
64
65 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
66 FileEnt->getName());
67 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattnera09a2c02007-09-03 18:24:56 +000068
69#if defined(_WIN32) || defined(_WIN64)
70 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
71#else
Chris Lattner4b009652007-07-25 00:24:17 +000072 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattnera09a2c02007-09-03 18:24:56 +000073#endif
Chris Lattner4b009652007-07-25 00:24:17 +000074 if (FD == -1) {
75 delete SB;
76 return 0;
77 }
78
79 unsigned BytesLeft = FileEnt->getSize();
80 while (BytesLeft) {
81 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
82 if (NumRead != -1) {
83 BytesLeft -= NumRead;
84 BufPtr += NumRead;
85 } else if (errno == EINTR) {
86 // try again
87 } else {
88 // error reading.
89 close(FD);
90 delete SB;
91 return 0;
92 }
93 }
94 close(FD);
95
96 return SB;
97}
98
99
100/// getFileInfo - Create or return a cached FileInfo for the specified file.
101///
102const InfoRec *
103SourceManager::getInfoRec(const FileEntry *FileEnt) {
104 assert(FileEnt && "Didn't specify a file entry to use?");
105 // Do we already have information about this file?
106 std::map<const FileEntry *, FileInfo>::iterator I =
107 FileInfos.lower_bound(FileEnt);
108 if (I != FileInfos.end() && I->first == FileEnt)
109 return &*I;
110
111 // Nope, get information.
112 const MemoryBuffer *File = ReadFileFast(FileEnt);
113 if (File == 0)
114 return 0;
115
116 const InfoRec &Entry =
117 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
118 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
119
120 Info.Buffer = File;
121 Info.SourceLineCache = 0;
122 Info.NumLines = 0;
123 return &Entry;
124}
125
126
127/// createMemBufferInfoRec - Create a new info record for the specified memory
128/// buffer. This does no caching.
129const InfoRec *
130SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
131 // Add a new info record to the MemBufferInfos list and return it.
132 FileInfo FI;
133 FI.Buffer = Buffer;
134 FI.SourceLineCache = 0;
135 FI.NumLines = 0;
136 MemBufferInfos.push_back(InfoRec(0, FI));
137 return &MemBufferInfos.back();
138}
139
140
141/// createFileID - Create a new fileID for the specified InfoRec and include
142/// position. This works regardless of whether the InfoRec corresponds to a
143/// file or some other input source.
144unsigned SourceManager::createFileID(const InfoRec *File,
145 SourceLocation IncludePos) {
146 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
147 // to fit an arbitrary position in the file in the FilePos field. To handle
148 // this, we create one FileID for each chunk of the file that fits in a
149 // FilePos field.
150 unsigned FileSize = File->second.Buffer->getBufferSize();
151 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
152 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
153 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
154 "Ran out of file ID's!");
155 return FileIDs.size();
156 }
157
158 // Create one FileID for each chunk of the file.
159 unsigned Result = FileIDs.size()+1;
160
161 unsigned ChunkNo = 0;
162 while (1) {
163 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
164
165 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
166 FileSize -= (1 << SourceLocation::FilePosBits);
167 }
168
169 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
170 "Ran out of file ID's!");
171 return Result;
172}
173
174/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
175/// that a token from physloc PhysLoc should actually be referenced from
176/// InstantiationLoc.
177SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
178 SourceLocation InstantLoc) {
179 // The specified source location may be a mapped location, due to a macro
180 // instantiation or #line directive. Strip off this information to find out
181 // where the characters are actually located.
182 PhysLoc = getPhysicalLoc(PhysLoc);
183
184 // Resolve InstantLoc down to a real logical location.
185 InstantLoc = getLogicalLoc(InstantLoc);
186
187
188 // If the last macro id is close to the currently requested location, try to
Chris Lattnerc5c08972007-08-02 03:55:37 +0000189 // reuse it. This implements a small cache.
190 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
191 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattner4b009652007-07-25 00:24:17 +0000192
Chris Lattnerc5c08972007-08-02 03:55:37 +0000193 // The instanitation point and source physloc have to exactly match to reuse
194 // (for now). We could allow "nearby" instantiations in the future.
195 if (LastOne.getInstantiationLoc() != InstantLoc ||
196 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
197 continue;
198
199 // Check to see if the physloc of the token came from near enough to reuse.
200 int PhysDelta = PhysLoc.getRawFilePos() -
201 LastOne.getPhysicalLoc().getRawFilePos();
202 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattnera835e042007-08-02 04:22:39 +0000203 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattner4b009652007-07-25 00:24:17 +0000204 }
205
206
207 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
208 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
209}
210
211
212
213/// getCharacterData - Return a pointer to the start of the specified location
214/// in the appropriate MemoryBuffer.
215const char *SourceManager::getCharacterData(SourceLocation SL) const {
216 // Note that this is a hot function in the getSpelling() path, which is
217 // heavily used by -E mode.
218 SL = getPhysicalLoc(SL);
219
220 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
221 getFullFilePos(SL);
222}
223
224
225/// getColumnNumber - Return the column # for the specified file position.
226/// this is significantly cheaper to compute than the line number. This returns
227/// zero if the column number isn't known.
228unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
229 unsigned FileID = Loc.getFileID();
230 if (FileID == 0) return 0;
231
232 unsigned FilePos = getFullFilePos(Loc);
233 const MemoryBuffer *Buffer = getBuffer(FileID);
234 const char *Buf = Buffer->getBufferStart();
235
236 unsigned LineStart = FilePos;
237 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
238 --LineStart;
239 return FilePos-LineStart+1;
240}
241
242/// getSourceName - This method returns the name of the file or buffer that
243/// the SourceLocation specifies. This can be modified with #line directives,
244/// etc.
Chris Lattner37f041172007-08-30 05:59:30 +0000245const char *SourceManager::getSourceName(SourceLocation Loc) const {
Chris Lattner4b009652007-07-25 00:24:17 +0000246 unsigned FileID = Loc.getFileID();
247 if (FileID == 0) return "";
248 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
249}
250
251static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
252static void ComputeLineNumbers(FileInfo *FI) {
253 const MemoryBuffer *Buffer = FI->Buffer;
254
255 // Find the file offsets of all of the *physical* source lines. This does
256 // not look at trigraphs, escaped newlines, or anything else tricky.
257 std::vector<unsigned> LineOffsets;
258
259 // Line #1 starts at char 0.
260 LineOffsets.push_back(0);
261
262 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
263 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
264 unsigned Offs = 0;
265 while (1) {
266 // Skip over the contents of the line.
267 // TODO: Vectorize this? This is very performance sensitive for programs
268 // with lots of diagnostics and in -E mode.
269 const unsigned char *NextBuf = (const unsigned char *)Buf;
270 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
271 ++NextBuf;
272 Offs += NextBuf-Buf;
273 Buf = NextBuf;
274
275 if (Buf[0] == '\n' || Buf[0] == '\r') {
276 // If this is \n\r or \r\n, skip both characters.
277 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
278 ++Offs, ++Buf;
279 ++Offs, ++Buf;
280 LineOffsets.push_back(Offs);
281 } else {
282 // Otherwise, this is a null. If end of file, exit.
283 if (Buf == End) break;
284 // Otherwise, skip the null.
285 ++Offs, ++Buf;
286 }
287 }
288 LineOffsets.push_back(Offs);
289
290 // Copy the offsets into the FileInfo structure.
291 FI->NumLines = LineOffsets.size();
292 FI->SourceLineCache = new unsigned[LineOffsets.size()];
293 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
294}
295
296/// getLineNumber - Given a SourceLocation, return the physical line number
297/// for the position indicated. This requires building and caching a table of
298/// line offsets for the MemoryBuffer, so this is not cheap: use only when
299/// about to emit a diagnostic.
300unsigned SourceManager::getLineNumber(SourceLocation Loc) {
301 unsigned FileID = Loc.getFileID();
302 if (FileID == 0) return 0;
303 FileInfo *FileInfo;
304
305 if (LastLineNoFileIDQuery == FileID)
306 FileInfo = LastLineNoFileInfo;
307 else
308 FileInfo = getFileInfo(FileID);
309
310 // If this is the first use of line information for this buffer, compute the
311 /// SourceLineCache for it on demand.
312 if (FileInfo->SourceLineCache == 0)
313 ComputeLineNumbers(FileInfo);
314
315 // Okay, we know we have a line number table. Do a binary search to find the
316 // line number that this character position lands on.
317 unsigned *SourceLineCache = FileInfo->SourceLineCache;
318 unsigned *SourceLineCacheStart = SourceLineCache;
319 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
320
321 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
322
323 // If the previous query was to the same file, we know both the file pos from
324 // that query and the line number returned. This allows us to narrow the
325 // search space from the entire file to something near the match.
326 if (LastLineNoFileIDQuery == FileID) {
327 if (QueriedFilePos >= LastLineNoFilePos) {
328 SourceLineCache = SourceLineCache+LastLineNoResult-1;
329
330 // The query is likely to be nearby the previous one. Here we check to
331 // see if it is within 5, 10 or 20 lines. It can be far away in cases
332 // where big comment blocks and vertical whitespace eat up lines but
333 // contribute no tokens.
334 if (SourceLineCache+5 < SourceLineCacheEnd) {
335 if (SourceLineCache[5] > QueriedFilePos)
336 SourceLineCacheEnd = SourceLineCache+5;
337 else if (SourceLineCache+10 < SourceLineCacheEnd) {
338 if (SourceLineCache[10] > QueriedFilePos)
339 SourceLineCacheEnd = SourceLineCache+10;
340 else if (SourceLineCache+20 < SourceLineCacheEnd) {
341 if (SourceLineCache[20] > QueriedFilePos)
342 SourceLineCacheEnd = SourceLineCache+20;
343 }
344 }
345 }
346 } else {
347 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
348 }
349 }
350
351 // If the spread is large, do a "radix" test as our initial guess, based on
352 // the assumption that lines average to approximately the same length.
353 // NOTE: This is currently disabled, as it does not appear to be profitable in
354 // initial measurements.
355 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
356 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
357
358 // Take a stab at guessing where it is.
359 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
360
361 // Check for -10 and +10 lines.
362 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
363 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
364
365 // If the computed lower bound is less than the query location, move it in.
366 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
367 SourceLineCacheStart[LowerBound] < QueriedFilePos)
368 SourceLineCache = SourceLineCacheStart+LowerBound;
369
370 // If the computed upper bound is greater than the query location, move it.
371 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
372 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
373 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
374 }
375
376 unsigned *Pos
377 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
378 unsigned LineNo = Pos-SourceLineCacheStart;
379
380 LastLineNoFileIDQuery = FileID;
381 LastLineNoFileInfo = FileInfo;
382 LastLineNoFilePos = QueriedFilePos;
383 LastLineNoResult = LineNo;
384 return LineNo;
385}
386
387/// PrintStats - Print statistics to stderr.
388///
389void SourceManager::PrintStats() const {
390 std::cerr << "\n*** Source Manager Stats:\n";
391 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
392 << " mem buffers mapped, " << FileIDs.size()
393 << " file ID's allocated.\n";
394 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
395 << MacroIDs.size() << " macro expansion FileID's.\n";
396
397
398
399 unsigned NumLineNumsComputed = 0;
400 unsigned NumFileBytesMapped = 0;
401 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
402 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
403 NumLineNumsComputed += I->second.SourceLineCache != 0;
404 NumFileBytesMapped += I->second.Buffer->getBufferSize();
405 }
406 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
407 << NumLineNumsComputed << " files with line #'s computed.\n";
408}