blob: 24aba4549b6cf0a7276bccb859e3de0cffa9eab7 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Hartmut Kaiser61adb662007-09-12 15:39:04 +000016#include "llvm/Config/config.h"
Chris Lattner4b009652007-07-25 00:24:17 +000017#include "llvm/Support/Compiler.h"
18#include "llvm/Support/MemoryBuffer.h"
19#include "llvm/System/Path.h"
20#include <algorithm>
21#include <iostream>
22#include <fcntl.h>
23using namespace clang;
24using namespace SrcMgr;
25using llvm::MemoryBuffer;
26
27SourceManager::~SourceManager() {
28 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
29 E = FileInfos.end(); I != E; ++I) {
30 delete I->second.Buffer;
31 delete[] I->second.SourceLineCache;
32 }
33
34 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
35 E = MemBufferInfos.end(); I != E; ++I) {
36 delete I->second.Buffer;
37 delete[] I->second.SourceLineCache;
38 }
39}
40
41
42// FIXME: REMOVE THESE
43#include <unistd.h>
44#include <sys/types.h>
Anton Korobeynikovd1cedd32007-10-16 09:09:44 +000045#if !defined(_MSC_VER) && !defined(__MINGW32__)
Chris Lattner4b009652007-07-25 00:24:17 +000046#include <sys/uio.h>
47#include <sys/fcntl.h>
Chris Lattnera09a2c02007-09-03 18:24:56 +000048#else
49#include <io.h>
50#endif
Chris Lattner4b009652007-07-25 00:24:17 +000051#include <cerrno>
52
53static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
54#if 0
55 // FIXME: Reintroduce this and zap this function once the common llvm stuff
56 // is fast for the small case.
57 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
58 FileEnt->getSize());
59#endif
60
61 // If the file is larger than some threshold, use 'read', otherwise use mmap.
62 if (FileEnt->getSize() >= 4096*4)
63 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
64 0, FileEnt->getSize());
65
66 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
67 FileEnt->getName());
68 char *BufPtr = const_cast<char*>(SB->getBufferStart());
Chris Lattnera09a2c02007-09-03 18:24:56 +000069
Hartmut Kaiser61adb662007-09-12 15:39:04 +000070#if defined(LLVM_ON_WIN32)
Chris Lattnera09a2c02007-09-03 18:24:56 +000071 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
72#else
Chris Lattner4b009652007-07-25 00:24:17 +000073 int FD = ::open(FileEnt->getName(), O_RDONLY);
Chris Lattnera09a2c02007-09-03 18:24:56 +000074#endif
Chris Lattner4b009652007-07-25 00:24:17 +000075 if (FD == -1) {
76 delete SB;
77 return 0;
78 }
79
80 unsigned BytesLeft = FileEnt->getSize();
81 while (BytesLeft) {
82 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
83 if (NumRead != -1) {
84 BytesLeft -= NumRead;
85 BufPtr += NumRead;
86 } else if (errno == EINTR) {
87 // try again
88 } else {
89 // error reading.
90 close(FD);
91 delete SB;
92 return 0;
93 }
94 }
95 close(FD);
96
97 return SB;
98}
99
100
101/// getFileInfo - Create or return a cached FileInfo for the specified file.
102///
103const InfoRec *
104SourceManager::getInfoRec(const FileEntry *FileEnt) {
105 assert(FileEnt && "Didn't specify a file entry to use?");
106 // Do we already have information about this file?
107 std::map<const FileEntry *, FileInfo>::iterator I =
108 FileInfos.lower_bound(FileEnt);
109 if (I != FileInfos.end() && I->first == FileEnt)
110 return &*I;
111
112 // Nope, get information.
113 const MemoryBuffer *File = ReadFileFast(FileEnt);
114 if (File == 0)
115 return 0;
116
117 const InfoRec &Entry =
118 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
119 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
120
121 Info.Buffer = File;
122 Info.SourceLineCache = 0;
123 Info.NumLines = 0;
124 return &Entry;
125}
126
127
128/// createMemBufferInfoRec - Create a new info record for the specified memory
129/// buffer. This does no caching.
130const InfoRec *
131SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
132 // Add a new info record to the MemBufferInfos list and return it.
133 FileInfo FI;
134 FI.Buffer = Buffer;
135 FI.SourceLineCache = 0;
136 FI.NumLines = 0;
137 MemBufferInfos.push_back(InfoRec(0, FI));
138 return &MemBufferInfos.back();
139}
140
141
142/// createFileID - Create a new fileID for the specified InfoRec and include
143/// position. This works regardless of whether the InfoRec corresponds to a
144/// file or some other input source.
145unsigned SourceManager::createFileID(const InfoRec *File,
146 SourceLocation IncludePos) {
147 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
148 // to fit an arbitrary position in the file in the FilePos field. To handle
149 // this, we create one FileID for each chunk of the file that fits in a
150 // FilePos field.
151 unsigned FileSize = File->second.Buffer->getBufferSize();
152 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
153 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
154 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
155 "Ran out of file ID's!");
156 return FileIDs.size();
157 }
158
159 // Create one FileID for each chunk of the file.
160 unsigned Result = FileIDs.size()+1;
161
162 unsigned ChunkNo = 0;
163 while (1) {
164 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
165
166 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
167 FileSize -= (1 << SourceLocation::FilePosBits);
168 }
169
170 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
171 "Ran out of file ID's!");
172 return Result;
173}
174
175/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
176/// that a token from physloc PhysLoc should actually be referenced from
177/// InstantiationLoc.
178SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
179 SourceLocation InstantLoc) {
180 // The specified source location may be a mapped location, due to a macro
181 // instantiation or #line directive. Strip off this information to find out
182 // where the characters are actually located.
183 PhysLoc = getPhysicalLoc(PhysLoc);
184
185 // Resolve InstantLoc down to a real logical location.
186 InstantLoc = getLogicalLoc(InstantLoc);
187
188
189 // If the last macro id is close to the currently requested location, try to
Chris Lattnerc5c08972007-08-02 03:55:37 +0000190 // reuse it. This implements a small cache.
191 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
192 MacroIDInfo &LastOne = MacroIDs[i];
Chris Lattner4b009652007-07-25 00:24:17 +0000193
Chris Lattnerc5c08972007-08-02 03:55:37 +0000194 // The instanitation point and source physloc have to exactly match to reuse
195 // (for now). We could allow "nearby" instantiations in the future.
196 if (LastOne.getInstantiationLoc() != InstantLoc ||
197 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
198 continue;
199
200 // Check to see if the physloc of the token came from near enough to reuse.
201 int PhysDelta = PhysLoc.getRawFilePos() -
202 LastOne.getPhysicalLoc().getRawFilePos();
203 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
Chris Lattnera835e042007-08-02 04:22:39 +0000204 return SourceLocation::getMacroLoc(i, PhysDelta, 0);
Chris Lattner4b009652007-07-25 00:24:17 +0000205 }
206
207
208 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
209 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
210}
211
Chris Lattner569faa62007-10-11 18:38:32 +0000212/// getBufferData - Return a pointer to the start and end of the character
213/// data for the specified FileID.
214std::pair<const char*, const char*>
215SourceManager::getBufferData(unsigned FileID) const {
216 const llvm::MemoryBuffer *Buf = getBuffer(FileID);
217 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
218}
Chris Lattner4b009652007-07-25 00:24:17 +0000219
220
221/// getCharacterData - Return a pointer to the start of the specified location
222/// in the appropriate MemoryBuffer.
223const char *SourceManager::getCharacterData(SourceLocation SL) const {
224 // Note that this is a hot function in the getSpelling() path, which is
225 // heavily used by -E mode.
226 SL = getPhysicalLoc(SL);
227
228 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
229 getFullFilePos(SL);
230}
231
232
233/// getColumnNumber - Return the column # for the specified file position.
234/// this is significantly cheaper to compute than the line number. This returns
235/// zero if the column number isn't known.
236unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
237 unsigned FileID = Loc.getFileID();
238 if (FileID == 0) return 0;
239
240 unsigned FilePos = getFullFilePos(Loc);
241 const MemoryBuffer *Buffer = getBuffer(FileID);
242 const char *Buf = Buffer->getBufferStart();
243
244 unsigned LineStart = FilePos;
245 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
246 --LineStart;
247 return FilePos-LineStart+1;
248}
249
250/// getSourceName - This method returns the name of the file or buffer that
251/// the SourceLocation specifies. This can be modified with #line directives,
252/// etc.
Chris Lattner37f041172007-08-30 05:59:30 +0000253const char *SourceManager::getSourceName(SourceLocation Loc) const {
Chris Lattner4b009652007-07-25 00:24:17 +0000254 unsigned FileID = Loc.getFileID();
255 if (FileID == 0) return "";
256 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
257}
258
259static void ComputeLineNumbers(FileInfo *FI) DISABLE_INLINE;
260static void ComputeLineNumbers(FileInfo *FI) {
261 const MemoryBuffer *Buffer = FI->Buffer;
262
263 // Find the file offsets of all of the *physical* source lines. This does
264 // not look at trigraphs, escaped newlines, or anything else tricky.
265 std::vector<unsigned> LineOffsets;
266
267 // Line #1 starts at char 0.
268 LineOffsets.push_back(0);
269
270 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
271 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
272 unsigned Offs = 0;
273 while (1) {
274 // Skip over the contents of the line.
275 // TODO: Vectorize this? This is very performance sensitive for programs
276 // with lots of diagnostics and in -E mode.
277 const unsigned char *NextBuf = (const unsigned char *)Buf;
278 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
279 ++NextBuf;
280 Offs += NextBuf-Buf;
281 Buf = NextBuf;
282
283 if (Buf[0] == '\n' || Buf[0] == '\r') {
284 // If this is \n\r or \r\n, skip both characters.
285 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
286 ++Offs, ++Buf;
287 ++Offs, ++Buf;
288 LineOffsets.push_back(Offs);
289 } else {
290 // Otherwise, this is a null. If end of file, exit.
291 if (Buf == End) break;
292 // Otherwise, skip the null.
293 ++Offs, ++Buf;
294 }
295 }
296 LineOffsets.push_back(Offs);
297
298 // Copy the offsets into the FileInfo structure.
299 FI->NumLines = LineOffsets.size();
300 FI->SourceLineCache = new unsigned[LineOffsets.size()];
301 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
302}
303
304/// getLineNumber - Given a SourceLocation, return the physical line number
305/// for the position indicated. This requires building and caching a table of
306/// line offsets for the MemoryBuffer, so this is not cheap: use only when
307/// about to emit a diagnostic.
308unsigned SourceManager::getLineNumber(SourceLocation Loc) {
309 unsigned FileID = Loc.getFileID();
310 if (FileID == 0) return 0;
311 FileInfo *FileInfo;
312
313 if (LastLineNoFileIDQuery == FileID)
314 FileInfo = LastLineNoFileInfo;
315 else
316 FileInfo = getFileInfo(FileID);
317
318 // If this is the first use of line information for this buffer, compute the
319 /// SourceLineCache for it on demand.
320 if (FileInfo->SourceLineCache == 0)
321 ComputeLineNumbers(FileInfo);
322
323 // Okay, we know we have a line number table. Do a binary search to find the
324 // line number that this character position lands on.
325 unsigned *SourceLineCache = FileInfo->SourceLineCache;
326 unsigned *SourceLineCacheStart = SourceLineCache;
327 unsigned *SourceLineCacheEnd = SourceLineCache + FileInfo->NumLines;
328
329 unsigned QueriedFilePos = getFullFilePos(Loc)+1;
330
331 // If the previous query was to the same file, we know both the file pos from
332 // that query and the line number returned. This allows us to narrow the
333 // search space from the entire file to something near the match.
334 if (LastLineNoFileIDQuery == FileID) {
335 if (QueriedFilePos >= LastLineNoFilePos) {
336 SourceLineCache = SourceLineCache+LastLineNoResult-1;
337
338 // The query is likely to be nearby the previous one. Here we check to
339 // see if it is within 5, 10 or 20 lines. It can be far away in cases
340 // where big comment blocks and vertical whitespace eat up lines but
341 // contribute no tokens.
342 if (SourceLineCache+5 < SourceLineCacheEnd) {
343 if (SourceLineCache[5] > QueriedFilePos)
344 SourceLineCacheEnd = SourceLineCache+5;
345 else if (SourceLineCache+10 < SourceLineCacheEnd) {
346 if (SourceLineCache[10] > QueriedFilePos)
347 SourceLineCacheEnd = SourceLineCache+10;
348 else if (SourceLineCache+20 < SourceLineCacheEnd) {
349 if (SourceLineCache[20] > QueriedFilePos)
350 SourceLineCacheEnd = SourceLineCache+20;
351 }
352 }
353 }
354 } else {
355 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
356 }
357 }
358
359 // If the spread is large, do a "radix" test as our initial guess, based on
360 // the assumption that lines average to approximately the same length.
361 // NOTE: This is currently disabled, as it does not appear to be profitable in
362 // initial measurements.
363 if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
364 unsigned FileLen = FileInfo->SourceLineCache[FileInfo->NumLines-1];
365
366 // Take a stab at guessing where it is.
367 unsigned ApproxPos = FileInfo->NumLines*QueriedFilePos / FileLen;
368
369 // Check for -10 and +10 lines.
370 unsigned LowerBound = std::max(int(ApproxPos-10), 0);
371 unsigned UpperBound = std::min(ApproxPos+10, FileLen);
372
373 // If the computed lower bound is less than the query location, move it in.
374 if (SourceLineCache < SourceLineCacheStart+LowerBound &&
375 SourceLineCacheStart[LowerBound] < QueriedFilePos)
376 SourceLineCache = SourceLineCacheStart+LowerBound;
377
378 // If the computed upper bound is greater than the query location, move it.
379 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
380 SourceLineCacheStart[UpperBound] >= QueriedFilePos)
381 SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
382 }
383
384 unsigned *Pos
385 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
386 unsigned LineNo = Pos-SourceLineCacheStart;
387
388 LastLineNoFileIDQuery = FileID;
389 LastLineNoFileInfo = FileInfo;
390 LastLineNoFilePos = QueriedFilePos;
391 LastLineNoResult = LineNo;
392 return LineNo;
393}
394
395/// PrintStats - Print statistics to stderr.
396///
397void SourceManager::PrintStats() const {
398 std::cerr << "\n*** Source Manager Stats:\n";
399 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
400 << " mem buffers mapped, " << FileIDs.size()
401 << " file ID's allocated.\n";
402 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
403 << MacroIDs.size() << " macro expansion FileID's.\n";
404
405
406
407 unsigned NumLineNumsComputed = 0;
408 unsigned NumFileBytesMapped = 0;
409 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
410 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
411 NumLineNumsComputed += I->second.SourceLineCache != 0;
412 NumFileBytesMapped += I->second.Buffer->getBufferSize();
413 }
414 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
415 << NumLineNumsComputed << " files with line #'s computed.\n";
416}