blob: be8eeee4086d13572915417c31795fab867d9145 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Chris Lattner739e7392007-04-29 07:12:06 +000016#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000017#include "llvm/System/Path.h"
18#include <algorithm>
19#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000020#include <fcntl.h>
Chris Lattner22eb9722006-06-18 05:43:12 +000021using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000022using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000023using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000024
25SourceManager::~SourceManager() {
26 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
27 E = FileInfos.end(); I != E; ++I) {
28 delete I->second.Buffer;
29 delete[] I->second.SourceLineCache;
30 }
31
32 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
33 E = MemBufferInfos.end(); I != E; ++I) {
34 delete I->second.Buffer;
35 delete[] I->second.SourceLineCache;
36 }
37}
38
Chris Lattnere92976d2007-04-29 06:44:41 +000039
40// FIXME: REMOVE THESE
41#include <unistd.h>
42#include <sys/types.h>
43#include <sys/uio.h>
44#include <sys/fcntl.h>
45#include <cerrno>
46
Chris Lattner739e7392007-04-29 07:12:06 +000047static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000048#if 0
49 // FIXME: Reintroduce this and zap this function once the common llvm stuff
50 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000051 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000052 FileEnt->getSize());
53#endif
54
55 // If the file is larger than some threshold, use 'read', otherwise use mmap.
56 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000057 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000058 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000059
Chris Lattner739e7392007-04-29 07:12:06 +000060 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000061 FileEnt->getName());
62 char *BufPtr = const_cast<char*>(SB->getBufferStart());
63
64 int FD = ::open(FileEnt->getName(), O_RDONLY);
65 if (FD == -1) {
66 delete SB;
67 return 0;
68 }
69
70 unsigned BytesLeft = FileEnt->getSize();
71 while (BytesLeft) {
72 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
73 if (NumRead != -1) {
74 BytesLeft -= NumRead;
75 BufPtr += NumRead;
76 } else if (errno == EINTR) {
77 // try again
78 } else {
79 // error reading.
80 close(FD);
81 delete SB;
82 return 0;
83 }
84 }
85 close(FD);
86
87 return SB;
88}
89
90
Chris Lattner22eb9722006-06-18 05:43:12 +000091/// getFileInfo - Create or return a cached FileInfo for the specified file.
92///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000093const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +000094SourceManager::getInfoRec(const FileEntry *FileEnt) {
95 assert(FileEnt && "Didn't specify a file entry to use?");
96 // Do we already have information about this file?
97 std::map<const FileEntry *, FileInfo>::iterator I =
98 FileInfos.lower_bound(FileEnt);
99 if (I != FileInfos.end() && I->first == FileEnt)
100 return &*I;
101
102 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000103 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000104 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000105 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000106
107 const InfoRec &Entry =
108 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
109 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
110
111 Info.Buffer = File;
112 Info.SourceLineCache = 0;
113 Info.NumLines = 0;
114 return &Entry;
115}
116
117
118/// createMemBufferInfoRec - Create a new info record for the specified memory
119/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000120const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000121SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000122 // Add a new info record to the MemBufferInfos list and return it.
123 FileInfo FI;
124 FI.Buffer = Buffer;
125 FI.SourceLineCache = 0;
126 FI.NumLines = 0;
127 MemBufferInfos.push_back(InfoRec(0, FI));
128 return &MemBufferInfos.back();
129}
130
131
132/// createFileID - Create a new fileID for the specified InfoRec and include
133/// position. This works regardless of whether the InfoRec corresponds to a
134/// file or some other input source.
135unsigned SourceManager::createFileID(const InfoRec *File,
136 SourceLocation IncludePos) {
137 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
138 // to fit an arbitrary position in the file in the FilePos field. To handle
139 // this, we create one FileID for each chunk of the file that fits in a
140 // FilePos field.
141 unsigned FileSize = File->second.Buffer->getBufferSize();
142 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000143 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000144 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
145 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000146 return FileIDs.size();
147 }
148
149 // Create one FileID for each chunk of the file.
150 unsigned Result = FileIDs.size()+1;
151
152 unsigned ChunkNo = 0;
153 while (1) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000154 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000155
156 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
157 FileSize -= (1 << SourceLocation::FilePosBits);
158 }
159
Chris Lattner2a904d02006-10-22 06:33:42 +0000160 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
161 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000162 return Result;
163}
164
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000165/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
166/// that a token from physloc PhysLoc should actually be referenced from
167/// InstantiationLoc.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000168SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000169 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000170 // The specified source location may be a mapped location, due to a macro
171 // instantiation or #line directive. Strip off this information to find out
172 // where the characters are actually located.
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000173 PhysLoc = getPhysicalLoc(PhysLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000174
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000175 // Resolve InstantLoc down to a real logical location.
176 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000177
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000178
179 // If the last macro id is close to the currently requested location, try to
180 // reuse it. This implements a single-entry cache.
181 if (!MacroIDs.empty()) {
182 MacroIDInfo &LastOne = MacroIDs.back();
Chris Lattner2e380892007-07-21 06:41:57 +0000183
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000184 if (LastOne.getInstantiationLoc() == InstantLoc &&
185 LastOne.getPhysicalLoc().getFileID() == PhysLoc.getFileID()) {
186
187 int PhysDelta = PhysLoc.getRawFilePos() -
188 LastOne.getPhysicalLoc().getRawFilePos();
Chris Lattner2e380892007-07-21 06:41:57 +0000189 if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
190 return SourceLocation::getMacroLoc(MacroIDs.size()-1, PhysDelta, 0);
Chris Lattnerca8ebc02007-07-20 18:00:12 +0000191 }
192 }
193
Chris Lattnere60b21c2007-07-20 18:26:45 +0000194
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000195 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000196 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000197}
198
199
Chris Lattner30709b032006-06-21 03:01:55 +0000200
Chris Lattnerd01e2912006-06-18 16:22:51 +0000201/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000202/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000203const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000204 // Note that this is a hot function in the getSpelling() path, which is
205 // heavily used by -E mode.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000206 SL = getPhysicalLoc(SL);
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000207
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000208 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
209 getFullFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000210}
211
Chris Lattner685730f2006-06-26 01:36:22 +0000212
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000213/// getColumnNumber - Return the column # for the specified file position.
Chris Lattner22eb9722006-06-18 05:43:12 +0000214/// this is significantly cheaper to compute than the line number. This returns
215/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000216unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
217 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000218 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000219
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000220 unsigned FilePos = getFullFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000221 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000222 const char *Buf = Buffer->getBufferStart();
223
224 unsigned LineStart = FilePos;
225 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
226 --LineStart;
227 return FilePos-LineStart+1;
228}
229
Chris Lattner9a13bde2006-06-21 04:57:09 +0000230/// getSourceName - This method returns the name of the file or buffer that
231/// the SourceLocation specifies. This can be modified with #line directives,
232/// etc.
233std::string SourceManager::getSourceName(SourceLocation Loc) {
234 unsigned FileID = Loc.getFileID();
235 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000236 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000237}
238
239
Chris Lattner22eb9722006-06-18 05:43:12 +0000240/// getLineNumber - Given a SourceLocation, return the physical line number
241/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000242/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000243/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000244unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattnera85a9d22006-07-02 20:07:52 +0000245 unsigned FileID = Loc.getFileID();
246 if (FileID == 0) return 0;
247 FileInfo *FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000248
249 // If this is the first use of line information for this buffer, compute the
250 /// SourceLineCache for it on demand.
251 if (FileInfo->SourceLineCache == 0) {
Chris Lattner739e7392007-04-29 07:12:06 +0000252 const MemoryBuffer *Buffer = FileInfo->Buffer;
Chris Lattner22eb9722006-06-18 05:43:12 +0000253
254 // Find the file offsets of all of the *physical* source lines. This does
255 // not look at trigraphs, escaped newlines, or anything else tricky.
256 std::vector<unsigned> LineOffsets;
257
258 // Line #1 starts at char 0.
259 LineOffsets.push_back(0);
260
261 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
262 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
263 unsigned Offs = 0;
264 while (1) {
265 // Skip over the contents of the line.
266 // TODO: Vectorize this? This is very performance sensitive for programs
Chris Lattnerd5da3ea2006-07-04 21:11:41 +0000267 // with lots of diagnostics and in -E mode.
Chris Lattner22eb9722006-06-18 05:43:12 +0000268 const unsigned char *NextBuf = (const unsigned char *)Buf;
269 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
270 ++NextBuf;
271 Offs += NextBuf-Buf;
272 Buf = NextBuf;
273
274 if (Buf[0] == '\n' || Buf[0] == '\r') {
275 // If this is \n\r or \r\n, skip both characters.
276 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
277 ++Offs, ++Buf;
278 ++Offs, ++Buf;
279 LineOffsets.push_back(Offs);
280 } else {
281 // Otherwise, this is a null. If end of file, exit.
282 if (Buf == End) break;
283 // Otherwise, skip the null.
284 ++Offs, ++Buf;
285 }
286 }
287 LineOffsets.push_back(Offs);
288
289 // Copy the offsets into the FileInfo structure.
290 FileInfo->NumLines = LineOffsets.size();
291 FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
292 std::copy(LineOffsets.begin(), LineOffsets.end(),
293 FileInfo->SourceLineCache);
294 }
295
296 // Okay, we know we have a line number table. Do a binary search to find the
297 // line number that this character position lands on.
298 unsigned NumLines = FileInfo->NumLines;
299 unsigned *SourceLineCache = FileInfo->SourceLineCache;
300
301 // TODO: If this is performance sensitive, we could try doing simple radix
302 // type approaches to make good (tight?) initial guesses based on the
303 // assumption that all lines are the same average size.
304 unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000305 getFullFilePos(Loc)+1);
Chris Lattner22eb9722006-06-18 05:43:12 +0000306 return Pos-SourceLineCache;
307}
308
309/// PrintStats - Print statistics to stderr.
310///
311void SourceManager::PrintStats() const {
312 std::cerr << "\n*** Source Manager Stats:\n";
313 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
314 << " mem buffers mapped, " << FileIDs.size()
315 << " file ID's allocated.\n";
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000316 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
317 << MacroIDs.size() << " macro expansion FileID's.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000318
319
Chris Lattner22eb9722006-06-18 05:43:12 +0000320
321 unsigned NumLineNumsComputed = 0;
322 unsigned NumFileBytesMapped = 0;
323 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
324 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
325 NumLineNumsComputed += I->second.SourceLineCache != 0;
326 NumFileBytesMapped += I->second.Buffer->getBufferSize();
327 }
328 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
329 << NumLineNumsComputed << " files with line #'s computed.\n";
330}