blob: 09941635dafb34b25a191d485a6fbc794a355e8d [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Chris Lattner739e7392007-04-29 07:12:06 +000016#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000017#include "llvm/System/Path.h"
18#include <algorithm>
19#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000020#include <fcntl.h>
Chris Lattner22eb9722006-06-18 05:43:12 +000021using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000022using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000023using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000024
25SourceManager::~SourceManager() {
26 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
27 E = FileInfos.end(); I != E; ++I) {
28 delete I->second.Buffer;
29 delete[] I->second.SourceLineCache;
30 }
31
32 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
33 E = MemBufferInfos.end(); I != E; ++I) {
34 delete I->second.Buffer;
35 delete[] I->second.SourceLineCache;
36 }
37}
38
Chris Lattnere92976d2007-04-29 06:44:41 +000039
40// FIXME: REMOVE THESE
41#include <unistd.h>
42#include <sys/types.h>
43#include <sys/uio.h>
44#include <sys/fcntl.h>
45#include <cerrno>
46
Chris Lattner739e7392007-04-29 07:12:06 +000047static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000048#if 0
49 // FIXME: Reintroduce this and zap this function once the common llvm stuff
50 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000051 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000052 FileEnt->getSize());
53#endif
54
55 // If the file is larger than some threshold, use 'read', otherwise use mmap.
56 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000057 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000058 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000059
Chris Lattner739e7392007-04-29 07:12:06 +000060 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000061 FileEnt->getName());
62 char *BufPtr = const_cast<char*>(SB->getBufferStart());
63
64 int FD = ::open(FileEnt->getName(), O_RDONLY);
65 if (FD == -1) {
66 delete SB;
67 return 0;
68 }
69
70 unsigned BytesLeft = FileEnt->getSize();
71 while (BytesLeft) {
72 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
73 if (NumRead != -1) {
74 BytesLeft -= NumRead;
75 BufPtr += NumRead;
76 } else if (errno == EINTR) {
77 // try again
78 } else {
79 // error reading.
80 close(FD);
81 delete SB;
82 return 0;
83 }
84 }
85 close(FD);
86
87 return SB;
88}
89
90
Chris Lattner22eb9722006-06-18 05:43:12 +000091/// getFileInfo - Create or return a cached FileInfo for the specified file.
92///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000093const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +000094SourceManager::getInfoRec(const FileEntry *FileEnt) {
95 assert(FileEnt && "Didn't specify a file entry to use?");
96 // Do we already have information about this file?
97 std::map<const FileEntry *, FileInfo>::iterator I =
98 FileInfos.lower_bound(FileEnt);
99 if (I != FileInfos.end() && I->first == FileEnt)
100 return &*I;
101
102 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000103 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000104 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000105 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000106
107 const InfoRec &Entry =
108 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
109 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
110
111 Info.Buffer = File;
112 Info.SourceLineCache = 0;
113 Info.NumLines = 0;
114 return &Entry;
115}
116
117
118/// createMemBufferInfoRec - Create a new info record for the specified memory
119/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000120const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000121SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000122 // Add a new info record to the MemBufferInfos list and return it.
123 FileInfo FI;
124 FI.Buffer = Buffer;
125 FI.SourceLineCache = 0;
126 FI.NumLines = 0;
127 MemBufferInfos.push_back(InfoRec(0, FI));
128 return &MemBufferInfos.back();
129}
130
131
132/// createFileID - Create a new fileID for the specified InfoRec and include
133/// position. This works regardless of whether the InfoRec corresponds to a
134/// file or some other input source.
135unsigned SourceManager::createFileID(const InfoRec *File,
136 SourceLocation IncludePos) {
137 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
138 // to fit an arbitrary position in the file in the FilePos field. To handle
139 // this, we create one FileID for each chunk of the file that fits in a
140 // FilePos field.
141 unsigned FileSize = File->second.Buffer->getBufferSize();
142 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000143 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000144 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
145 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000146 return FileIDs.size();
147 }
148
149 // Create one FileID for each chunk of the file.
150 unsigned Result = FileIDs.size()+1;
151
152 unsigned ChunkNo = 0;
153 while (1) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000154 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000155
156 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
157 FileSize -= (1 << SourceLocation::FilePosBits);
158 }
159
Chris Lattner2a904d02006-10-22 06:33:42 +0000160 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
161 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000162 return Result;
163}
164
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000165/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
166/// that a token from physloc PhysLoc should actually be referenced from
167/// InstantiationLoc.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000168SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000169 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000170 // The specified source location may be a mapped location, due to a macro
171 // instantiation or #line directive. Strip off this information to find out
172 // where the characters are actually located.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000173 PhysLoc = getPhysicalLoc(PhysLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000174
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000175 // Resolve InstantLoc down to a real logical location.
176 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000177
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000178
179 // If the last macro id is close to the currently requested location, try to
180 // reuse it. This implements a single-entry cache.
181 if (!MacroIDs.empty()) {
182 MacroIDInfo &LastOne = MacroIDs.back();
183 if (LastOne.getInstantiationLoc() == InstantLoc &&
184 LastOne.getPhysicalLoc().getFileID() == PhysLoc.getFileID()) {
185
186 int PhysDelta = PhysLoc.getRawFilePos() -
187 LastOne.getPhysicalLoc().getRawFilePos();
188 if (unsigned(PhysDelta) < (1 << SourceLocation::MacroPhysOffsBits))
189 return SourceLocation::getMacroLoc(MacroIDs.size()-1,
190 (unsigned)PhysDelta, 0);
191
192 }
193 }
194
195
196
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000197 // FIXME: intelligently cache macroid's.
198 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
199
200 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
201
202#if 0
Chris Lattner7fa8c882006-07-20 06:48:52 +0000203 unsigned InstantiationFileID;
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000204
Chris Lattner7fa8c882006-07-20 06:48:52 +0000205 // If this is the same instantiation as was requested last time, return this
206 // immediately.
207 if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
208 InstantLoc == LastInstantiationLoc_InstantLoc) {
209 InstantiationFileID = LastInstantiationLoc_Result;
210 } else {
211 // Add a FileID for this. FIXME: should cache these!
212 FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
213 PhysLoc.getFileID()));
214 InstantiationFileID = FileIDs.size();
215
216 // Remember this in the single-entry cache for next time.
217 LastInstantiationLoc_MacroFID = PhysLoc.getFileID();
218 LastInstantiationLoc_InstantLoc = InstantLoc;
219 LastInstantiationLoc_Result = InstantiationFileID;
220 }
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000221 return SourceLocation::getMacroLoc(InstantiationFileID,
222 PhysLoc.getRawFilePos());
223#endif
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000224}
225
226
Chris Lattner30709b032006-06-21 03:01:55 +0000227
Chris Lattnerd01e2912006-06-18 16:22:51 +0000228/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000229/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000230const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000231 // Note that this is a hot function in the getSpelling() path, which is
232 // heavily used by -E mode.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000233 SL = getPhysicalLoc(SL);
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000234
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000235 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
236 getFullFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000237}
238
Chris Lattner685730f2006-06-26 01:36:22 +0000239
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000240/// getColumnNumber - Return the column # for the specified file position.
Chris Lattner22eb9722006-06-18 05:43:12 +0000241/// this is significantly cheaper to compute than the line number. This returns
242/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000243unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
244 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000245 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000246
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000247 unsigned FilePos = getFullFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000248 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000249 const char *Buf = Buffer->getBufferStart();
250
251 unsigned LineStart = FilePos;
252 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
253 --LineStart;
254 return FilePos-LineStart+1;
255}
256
Chris Lattner9a13bde2006-06-21 04:57:09 +0000257/// getSourceName - This method returns the name of the file or buffer that
258/// the SourceLocation specifies. This can be modified with #line directives,
259/// etc.
260std::string SourceManager::getSourceName(SourceLocation Loc) {
261 unsigned FileID = Loc.getFileID();
262 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000263 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000264}
265
266
Chris Lattner22eb9722006-06-18 05:43:12 +0000267/// getLineNumber - Given a SourceLocation, return the physical line number
268/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000269/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000270/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000271unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattnera85a9d22006-07-02 20:07:52 +0000272 unsigned FileID = Loc.getFileID();
273 if (FileID == 0) return 0;
274 FileInfo *FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000275
276 // If this is the first use of line information for this buffer, compute the
277 /// SourceLineCache for it on demand.
278 if (FileInfo->SourceLineCache == 0) {
Chris Lattner739e7392007-04-29 07:12:06 +0000279 const MemoryBuffer *Buffer = FileInfo->Buffer;
Chris Lattner22eb9722006-06-18 05:43:12 +0000280
281 // Find the file offsets of all of the *physical* source lines. This does
282 // not look at trigraphs, escaped newlines, or anything else tricky.
283 std::vector<unsigned> LineOffsets;
284
285 // Line #1 starts at char 0.
286 LineOffsets.push_back(0);
287
288 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
289 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
290 unsigned Offs = 0;
291 while (1) {
292 // Skip over the contents of the line.
293 // TODO: Vectorize this? This is very performance sensitive for programs
Chris Lattnerd5da3ea2006-07-04 21:11:41 +0000294 // with lots of diagnostics and in -E mode.
Chris Lattner22eb9722006-06-18 05:43:12 +0000295 const unsigned char *NextBuf = (const unsigned char *)Buf;
296 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
297 ++NextBuf;
298 Offs += NextBuf-Buf;
299 Buf = NextBuf;
300
301 if (Buf[0] == '\n' || Buf[0] == '\r') {
302 // If this is \n\r or \r\n, skip both characters.
303 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
304 ++Offs, ++Buf;
305 ++Offs, ++Buf;
306 LineOffsets.push_back(Offs);
307 } else {
308 // Otherwise, this is a null. If end of file, exit.
309 if (Buf == End) break;
310 // Otherwise, skip the null.
311 ++Offs, ++Buf;
312 }
313 }
314 LineOffsets.push_back(Offs);
315
316 // Copy the offsets into the FileInfo structure.
317 FileInfo->NumLines = LineOffsets.size();
318 FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
319 std::copy(LineOffsets.begin(), LineOffsets.end(),
320 FileInfo->SourceLineCache);
321 }
322
323 // Okay, we know we have a line number table. Do a binary search to find the
324 // line number that this character position lands on.
325 unsigned NumLines = FileInfo->NumLines;
326 unsigned *SourceLineCache = FileInfo->SourceLineCache;
327
328 // TODO: If this is performance sensitive, we could try doing simple radix
329 // type approaches to make good (tight?) initial guesses based on the
330 // assumption that all lines are the same average size.
331 unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000332 getFullFilePos(Loc)+1);
Chris Lattner22eb9722006-06-18 05:43:12 +0000333 return Pos-SourceLineCache;
334}
335
336/// PrintStats - Print statistics to stderr.
337///
338void SourceManager::PrintStats() const {
339 std::cerr << "\n*** Source Manager Stats:\n";
340 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
341 << " mem buffers mapped, " << FileIDs.size()
342 << " file ID's allocated.\n";
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000343 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
344 << MacroIDs.size() << " macro expansion FileID's.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000345
346
Chris Lattner22eb9722006-06-18 05:43:12 +0000347
348 unsigned NumLineNumsComputed = 0;
349 unsigned NumFileBytesMapped = 0;
350 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
351 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
352 NumLineNumsComputed += I->second.SourceLineCache != 0;
353 NumFileBytesMapped += I->second.Buffer->getBufferSize();
354 }
355 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
356 << NumLineNumsComputed << " files with line #'s computed.\n";
357}