blob: 2177c982d453261ff16c38c51389773bf5577802 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Chris Lattner739e7392007-04-29 07:12:06 +000016#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000017#include "llvm/System/Path.h"
18#include <algorithm>
19#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000020#include <fcntl.h>
21
Chris Lattner22eb9722006-06-18 05:43:12 +000022using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000023using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000024using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000025
26SourceManager::~SourceManager() {
27 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
28 E = FileInfos.end(); I != E; ++I) {
29 delete I->second.Buffer;
30 delete[] I->second.SourceLineCache;
31 }
32
33 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
34 E = MemBufferInfos.end(); I != E; ++I) {
35 delete I->second.Buffer;
36 delete[] I->second.SourceLineCache;
37 }
38}
39
Chris Lattnere92976d2007-04-29 06:44:41 +000040
41// FIXME: REMOVE THESE
42#include <unistd.h>
43#include <sys/types.h>
44#include <sys/uio.h>
45#include <sys/fcntl.h>
46#include <cerrno>
47
Chris Lattner739e7392007-04-29 07:12:06 +000048static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000049#if 0
50 // FIXME: Reintroduce this and zap this function once the common llvm stuff
51 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000052 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000053 FileEnt->getSize());
54#endif
55
56 // If the file is larger than some threshold, use 'read', otherwise use mmap.
57 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000058 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000059 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000060
Chris Lattner739e7392007-04-29 07:12:06 +000061 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000062 FileEnt->getName());
63 char *BufPtr = const_cast<char*>(SB->getBufferStart());
64
65 int FD = ::open(FileEnt->getName(), O_RDONLY);
66 if (FD == -1) {
67 delete SB;
68 return 0;
69 }
70
71 unsigned BytesLeft = FileEnt->getSize();
72 while (BytesLeft) {
73 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
74 if (NumRead != -1) {
75 BytesLeft -= NumRead;
76 BufPtr += NumRead;
77 } else if (errno == EINTR) {
78 // try again
79 } else {
80 // error reading.
81 close(FD);
82 delete SB;
83 return 0;
84 }
85 }
86 close(FD);
87
88 return SB;
89}
90
91
Chris Lattner22eb9722006-06-18 05:43:12 +000092/// getFileInfo - Create or return a cached FileInfo for the specified file.
93///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000094const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +000095SourceManager::getInfoRec(const FileEntry *FileEnt) {
96 assert(FileEnt && "Didn't specify a file entry to use?");
97 // Do we already have information about this file?
98 std::map<const FileEntry *, FileInfo>::iterator I =
99 FileInfos.lower_bound(FileEnt);
100 if (I != FileInfos.end() && I->first == FileEnt)
101 return &*I;
102
103 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000104 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000105 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000106 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000107
108 const InfoRec &Entry =
109 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
110 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
111
112 Info.Buffer = File;
113 Info.SourceLineCache = 0;
114 Info.NumLines = 0;
115 return &Entry;
116}
117
118
119/// createMemBufferInfoRec - Create a new info record for the specified memory
120/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000121const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000122SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000123 // Add a new info record to the MemBufferInfos list and return it.
124 FileInfo FI;
125 FI.Buffer = Buffer;
126 FI.SourceLineCache = 0;
127 FI.NumLines = 0;
128 MemBufferInfos.push_back(InfoRec(0, FI));
129 return &MemBufferInfos.back();
130}
131
132
133/// createFileID - Create a new fileID for the specified InfoRec and include
134/// position. This works regardless of whether the InfoRec corresponds to a
135/// file or some other input source.
136unsigned SourceManager::createFileID(const InfoRec *File,
137 SourceLocation IncludePos) {
138 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
139 // to fit an arbitrary position in the file in the FilePos field. To handle
140 // this, we create one FileID for each chunk of the file that fits in a
141 // FilePos field.
142 unsigned FileSize = File->second.Buffer->getBufferSize();
143 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000144 FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000145 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
146 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000147 return FileIDs.size();
148 }
149
150 // Create one FileID for each chunk of the file.
151 unsigned Result = FileIDs.size()+1;
152
153 unsigned ChunkNo = 0;
154 while (1) {
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000155 FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000156
157 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
158 FileSize -= (1 << SourceLocation::FilePosBits);
159 }
160
Chris Lattner2a904d02006-10-22 06:33:42 +0000161 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
162 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000163 return Result;
164}
165
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000166/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
167/// that a token from physloc PhysLoc should actually be referenced from
168/// InstantiationLoc.
Chris Lattner3fc74e22007-07-15 06:35:27 +0000169SourceLocation SourceManager::getInstantiationLoc(SourceLocation VirtLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000170 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000171 // The specified source location may be a mapped location, due to a macro
172 // instantiation or #line directive. Strip off this information to find out
173 // where the characters are actually located.
174 SourceLocation PhysLoc = getPhysicalLoc(VirtLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000175
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000176 // Resolve InstantLoc down to a real logical location.
177 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000178
179 unsigned InstantiationFileID;
180 // If this is the same instantiation as was requested last time, return this
181 // immediately.
182 if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
183 InstantLoc == LastInstantiationLoc_InstantLoc) {
184 InstantiationFileID = LastInstantiationLoc_Result;
185 } else {
186 // Add a FileID for this. FIXME: should cache these!
187 FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
188 PhysLoc.getFileID()));
189 InstantiationFileID = FileIDs.size();
190
191 // Remember this in the single-entry cache for next time.
192 LastInstantiationLoc_MacroFID = PhysLoc.getFileID();
193 LastInstantiationLoc_InstantLoc = InstantLoc;
194 LastInstantiationLoc_Result = InstantiationFileID;
195 }
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000196 return SourceLocation(InstantiationFileID, PhysLoc.getRawFilePos());
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000197}
198
199
Chris Lattner30709b032006-06-21 03:01:55 +0000200
Chris Lattnerd01e2912006-06-18 16:22:51 +0000201/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000202/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000203const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000204 // Note that this is a hot function in the getSpelling() path, which is
205 // heavily used by -E mode.
206 unsigned FileID = SL.getFileID();
207 assert(FileID && "Invalid source location!");
208
209 return getFileInfo(FileID)->Buffer->getBufferStart() + getFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000210}
211
Chris Lattner685730f2006-06-26 01:36:22 +0000212/// getIncludeLoc - Return the location of the #include for the specified
213/// FileID.
214SourceLocation SourceManager::getIncludeLoc(unsigned FileID) const {
215 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(FileID);
216
217 // For Macros, the physical loc is specified by the MacroTokenFileID.
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000218 if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
Chris Lattner685730f2006-06-26 01:36:22 +0000219 FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
220
221 return FIDInfo->IncludeLoc;
222}
223
Chris Lattnerd01e2912006-06-18 16:22:51 +0000224
Chris Lattner22eb9722006-06-18 05:43:12 +0000225/// getColumnNumber - Return the column # for the specified include position.
226/// this is significantly cheaper to compute than the line number. This returns
227/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000228unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000229 Loc = getLogicalLoc(Loc);
Chris Lattner9a13bde2006-06-21 04:57:09 +0000230 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000231 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000232
Chris Lattner9a13bde2006-06-21 04:57:09 +0000233 unsigned FilePos = getFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000234 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000235 const char *Buf = Buffer->getBufferStart();
236
237 unsigned LineStart = FilePos;
238 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
239 --LineStart;
240 return FilePos-LineStart+1;
241}
242
Chris Lattner9a13bde2006-06-21 04:57:09 +0000243/// getSourceName - This method returns the name of the file or buffer that
244/// the SourceLocation specifies. This can be modified with #line directives,
245/// etc.
246std::string SourceManager::getSourceName(SourceLocation Loc) {
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000247 Loc = getLogicalLoc(Loc);
Chris Lattner9a13bde2006-06-21 04:57:09 +0000248 unsigned FileID = Loc.getFileID();
249 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000250 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000251}
252
253
Chris Lattner22eb9722006-06-18 05:43:12 +0000254/// getLineNumber - Given a SourceLocation, return the physical line number
255/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000256/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000257/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000258unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000259 Loc = getLogicalLoc(Loc);
Chris Lattnera85a9d22006-07-02 20:07:52 +0000260 unsigned FileID = Loc.getFileID();
261 if (FileID == 0) return 0;
262 FileInfo *FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000263
264 // If this is the first use of line information for this buffer, compute the
265 /// SourceLineCache for it on demand.
266 if (FileInfo->SourceLineCache == 0) {
Chris Lattner739e7392007-04-29 07:12:06 +0000267 const MemoryBuffer *Buffer = FileInfo->Buffer;
Chris Lattner22eb9722006-06-18 05:43:12 +0000268
269 // Find the file offsets of all of the *physical* source lines. This does
270 // not look at trigraphs, escaped newlines, or anything else tricky.
271 std::vector<unsigned> LineOffsets;
272
273 // Line #1 starts at char 0.
274 LineOffsets.push_back(0);
275
276 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
277 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
278 unsigned Offs = 0;
279 while (1) {
280 // Skip over the contents of the line.
281 // TODO: Vectorize this? This is very performance sensitive for programs
Chris Lattnerd5da3ea2006-07-04 21:11:41 +0000282 // with lots of diagnostics and in -E mode.
Chris Lattner22eb9722006-06-18 05:43:12 +0000283 const unsigned char *NextBuf = (const unsigned char *)Buf;
284 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
285 ++NextBuf;
286 Offs += NextBuf-Buf;
287 Buf = NextBuf;
288
289 if (Buf[0] == '\n' || Buf[0] == '\r') {
290 // If this is \n\r or \r\n, skip both characters.
291 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
292 ++Offs, ++Buf;
293 ++Offs, ++Buf;
294 LineOffsets.push_back(Offs);
295 } else {
296 // Otherwise, this is a null. If end of file, exit.
297 if (Buf == End) break;
298 // Otherwise, skip the null.
299 ++Offs, ++Buf;
300 }
301 }
302 LineOffsets.push_back(Offs);
303
304 // Copy the offsets into the FileInfo structure.
305 FileInfo->NumLines = LineOffsets.size();
306 FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
307 std::copy(LineOffsets.begin(), LineOffsets.end(),
308 FileInfo->SourceLineCache);
309 }
310
311 // Okay, we know we have a line number table. Do a binary search to find the
312 // line number that this character position lands on.
313 unsigned NumLines = FileInfo->NumLines;
314 unsigned *SourceLineCache = FileInfo->SourceLineCache;
315
316 // TODO: If this is performance sensitive, we could try doing simple radix
317 // type approaches to make good (tight?) initial guesses based on the
318 // assumption that all lines are the same average size.
319 unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
Chris Lattner9a13bde2006-06-21 04:57:09 +0000320 getFilePos(Loc)+1);
Chris Lattner22eb9722006-06-18 05:43:12 +0000321 return Pos-SourceLineCache;
322}
323
Chris Lattnerf6fd68a2006-06-26 01:48:23 +0000324/// getSourceFilePos - This method returns the *logical* offset from the start
325/// of the file that the specified SourceLocation represents. This returns
326/// the location of the *logical* character data, not the physical file
327/// position. In the case of macros, for example, this returns where the
328/// macro was instantiated, not where the characters for the macro can be
329/// found.
330unsigned SourceManager::getSourceFilePos(SourceLocation Loc) const {
331
332 // If this is a macro, we need to get the instantiation location.
333 const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
Chris Lattnerdc9f9bf2006-06-29 06:33:42 +0000334 while (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) {
335 Loc = FIDInfo->IncludeLoc;
336 FIDInfo = getFIDInfo(Loc.getFileID());
337 }
Chris Lattnerf6fd68a2006-06-26 01:48:23 +0000338
339 return getFilePos(Loc);
340}
341
Chris Lattner22eb9722006-06-18 05:43:12 +0000342/// PrintStats - Print statistics to stderr.
343///
344void SourceManager::PrintStats() const {
345 std::cerr << "\n*** Source Manager Stats:\n";
346 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
347 << " mem buffers mapped, " << FileIDs.size()
348 << " file ID's allocated.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000349 unsigned NumBuffers = 0, NumMacros = 0;
350 for (unsigned i = 0, e = FileIDs.size(); i != e; ++i) {
351 if (FileIDs[i].IDType == FileIDInfo::NormalBuffer)
352 ++NumBuffers;
353 else if (FileIDs[i].IDType == FileIDInfo::MacroExpansion)
354 ++NumMacros;
355 else
356 assert(0 && "Unknown FileID!");
357 }
358 std::cerr << " " << NumBuffers << " normal buffer FileID's, "
359 << NumMacros << " macro expansion FileID's.\n";
360
361
Chris Lattner22eb9722006-06-18 05:43:12 +0000362
363 unsigned NumLineNumsComputed = 0;
364 unsigned NumFileBytesMapped = 0;
365 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
366 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
367 NumLineNumsComputed += I->second.SourceLineCache != 0;
368 NumFileBytesMapped += I->second.Buffer->getBufferSize();
369 }
370 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
371 << NumLineNumsComputed << " files with line #'s computed.\n";
372}