blob: 3e2623c8e441445510aacb5411e6c164e6241514 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
Chris Lattner739e7392007-04-29 07:12:06 +000016#include "llvm/Support/MemoryBuffer.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000017#include "llvm/System/Path.h"
18#include <algorithm>
19#include <iostream>
Gabor Greifffc337b2007-07-12 16:00:00 +000020#include <fcntl.h>
Chris Lattner22eb9722006-06-18 05:43:12 +000021using namespace clang;
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000022using namespace SrcMgr;
Chris Lattner23b7eb62007-06-15 23:05:46 +000023using llvm::MemoryBuffer;
Chris Lattner22eb9722006-06-18 05:43:12 +000024
25SourceManager::~SourceManager() {
26 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
27 E = FileInfos.end(); I != E; ++I) {
28 delete I->second.Buffer;
29 delete[] I->second.SourceLineCache;
30 }
31
32 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
33 E = MemBufferInfos.end(); I != E; ++I) {
34 delete I->second.Buffer;
35 delete[] I->second.SourceLineCache;
36 }
37}
38
Chris Lattnere92976d2007-04-29 06:44:41 +000039
40// FIXME: REMOVE THESE
41#include <unistd.h>
42#include <sys/types.h>
43#include <sys/uio.h>
44#include <sys/fcntl.h>
45#include <cerrno>
46
Chris Lattner739e7392007-04-29 07:12:06 +000047static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
Chris Lattnere92976d2007-04-29 06:44:41 +000048#if 0
49 // FIXME: Reintroduce this and zap this function once the common llvm stuff
50 // is fast for the small case.
Chris Lattner739e7392007-04-29 07:12:06 +000051 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattnere92976d2007-04-29 06:44:41 +000052 FileEnt->getSize());
53#endif
54
55 // If the file is larger than some threshold, use 'read', otherwise use mmap.
56 if (FileEnt->getSize() >= 4096*4)
Chris Lattner739e7392007-04-29 07:12:06 +000057 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
Chris Lattner776050e2007-05-06 23:34:12 +000058 0, FileEnt->getSize());
Chris Lattnere92976d2007-04-29 06:44:41 +000059
Chris Lattner739e7392007-04-29 07:12:06 +000060 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
Chris Lattnere92976d2007-04-29 06:44:41 +000061 FileEnt->getName());
62 char *BufPtr = const_cast<char*>(SB->getBufferStart());
63
64 int FD = ::open(FileEnt->getName(), O_RDONLY);
65 if (FD == -1) {
66 delete SB;
67 return 0;
68 }
69
70 unsigned BytesLeft = FileEnt->getSize();
71 while (BytesLeft) {
72 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
73 if (NumRead != -1) {
74 BytesLeft -= NumRead;
75 BufPtr += NumRead;
76 } else if (errno == EINTR) {
77 // try again
78 } else {
79 // error reading.
80 close(FD);
81 delete SB;
82 return 0;
83 }
84 }
85 close(FD);
86
87 return SB;
88}
89
90
Chris Lattner22eb9722006-06-18 05:43:12 +000091/// getFileInfo - Create or return a cached FileInfo for the specified file.
92///
Chris Lattner5f4b1ff2006-06-20 05:02:40 +000093const InfoRec *
Chris Lattner22eb9722006-06-18 05:43:12 +000094SourceManager::getInfoRec(const FileEntry *FileEnt) {
95 assert(FileEnt && "Didn't specify a file entry to use?");
96 // Do we already have information about this file?
97 std::map<const FileEntry *, FileInfo>::iterator I =
98 FileInfos.lower_bound(FileEnt);
99 if (I != FileInfos.end() && I->first == FileEnt)
100 return &*I;
101
102 // Nope, get information.
Chris Lattner739e7392007-04-29 07:12:06 +0000103 const MemoryBuffer *File = ReadFileFast(FileEnt);
Chris Lattner35f99852007-04-29 06:08:57 +0000104 if (File == 0)
Chris Lattner22eb9722006-06-18 05:43:12 +0000105 return 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000106
107 const InfoRec &Entry =
108 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
109 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
110
111 Info.Buffer = File;
112 Info.SourceLineCache = 0;
113 Info.NumLines = 0;
114 return &Entry;
115}
116
117
118/// createMemBufferInfoRec - Create a new info record for the specified memory
119/// buffer. This does no caching.
Chris Lattner5f4b1ff2006-06-20 05:02:40 +0000120const InfoRec *
Chris Lattner739e7392007-04-29 07:12:06 +0000121SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000122 // Add a new info record to the MemBufferInfos list and return it.
123 FileInfo FI;
124 FI.Buffer = Buffer;
125 FI.SourceLineCache = 0;
126 FI.NumLines = 0;
127 MemBufferInfos.push_back(InfoRec(0, FI));
128 return &MemBufferInfos.back();
129}
130
131
132/// createFileID - Create a new fileID for the specified InfoRec and include
133/// position. This works regardless of whether the InfoRec corresponds to a
134/// file or some other input source.
135unsigned SourceManager::createFileID(const InfoRec *File,
136 SourceLocation IncludePos) {
137 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
138 // to fit an arbitrary position in the file in the FilePos field. To handle
139 // this, we create one FileID for each chunk of the file that fits in a
140 // FilePos field.
141 unsigned FileSize = File->second.Buffer->getBufferSize();
142 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000143 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Chris Lattner2a904d02006-10-22 06:33:42 +0000144 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
145 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000146 return FileIDs.size();
147 }
148
149 // Create one FileID for each chunk of the file.
150 unsigned Result = FileIDs.size()+1;
151
152 unsigned ChunkNo = 0;
153 while (1) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000154 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Chris Lattner22eb9722006-06-18 05:43:12 +0000155
156 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
157 FileSize -= (1 << SourceLocation::FilePosBits);
158 }
159
Chris Lattner2a904d02006-10-22 06:33:42 +0000160 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
161 "Ran out of file ID's!");
Chris Lattner22eb9722006-06-18 05:43:12 +0000162 return Result;
163}
164
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000165/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
166/// that a token from physloc PhysLoc should actually be referenced from
167/// InstantiationLoc.
Chris Lattner3fc74e22007-07-15 06:35:27 +0000168SourceLocation SourceManager::getInstantiationLoc(SourceLocation VirtLoc,
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000169 SourceLocation InstantLoc) {
Chris Lattner3fc74e22007-07-15 06:35:27 +0000170 // The specified source location may be a mapped location, due to a macro
171 // instantiation or #line directive. Strip off this information to find out
172 // where the characters are actually located.
173 SourceLocation PhysLoc = getPhysicalLoc(VirtLoc);
Chris Lattner351050b2006-07-16 18:05:08 +0000174
Chris Lattner4c37a8c2006-06-30 06:15:08 +0000175 // Resolve InstantLoc down to a real logical location.
176 InstantLoc = getLogicalLoc(InstantLoc);
Chris Lattner7fa8c882006-07-20 06:48:52 +0000177
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000178 // FIXME: intelligently cache macroid's.
179 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
180
181 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
182
183#if 0
Chris Lattner7fa8c882006-07-20 06:48:52 +0000184 unsigned InstantiationFileID;
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000185
Chris Lattner7fa8c882006-07-20 06:48:52 +0000186 // If this is the same instantiation as was requested last time, return this
187 // immediately.
188 if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
189 InstantLoc == LastInstantiationLoc_InstantLoc) {
190 InstantiationFileID = LastInstantiationLoc_Result;
191 } else {
192 // Add a FileID for this. FIXME: should cache these!
193 FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
194 PhysLoc.getFileID()));
195 InstantiationFileID = FileIDs.size();
196
197 // Remember this in the single-entry cache for next time.
198 LastInstantiationLoc_MacroFID = PhysLoc.getFileID();
199 LastInstantiationLoc_InstantLoc = InstantLoc;
200 LastInstantiationLoc_Result = InstantiationFileID;
201 }
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000202 return SourceLocation::getMacroLoc(InstantiationFileID,
203 PhysLoc.getRawFilePos());
204#endif
Chris Lattner7d6a4f62006-06-30 06:10:08 +0000205}
206
207
Chris Lattner30709b032006-06-21 03:01:55 +0000208
Chris Lattnerd01e2912006-06-18 16:22:51 +0000209/// getCharacterData - Return a pointer to the start of the specified location
Chris Lattner739e7392007-04-29 07:12:06 +0000210/// in the appropriate MemoryBuffer.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000211const char *SourceManager::getCharacterData(SourceLocation SL) const {
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000212 // Note that this is a hot function in the getSpelling() path, which is
213 // heavily used by -E mode.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000214 SL = getPhysicalLoc(SL);
Chris Lattnerd3a15f72006-07-04 23:01:03 +0000215
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000216 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
217 getFullFilePos(SL);
Chris Lattnerd01e2912006-06-18 16:22:51 +0000218}
219
Chris Lattner685730f2006-06-26 01:36:22 +0000220
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000221/// getColumnNumber - Return the column # for the specified file position.
Chris Lattner22eb9722006-06-18 05:43:12 +0000222/// this is significantly cheaper to compute than the line number. This returns
223/// zero if the column number isn't known.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000224unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
225 unsigned FileID = Loc.getFileID();
Chris Lattner22eb9722006-06-18 05:43:12 +0000226 if (FileID == 0) return 0;
Chris Lattner30709b032006-06-21 03:01:55 +0000227
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000228 unsigned FilePos = getFullFilePos(Loc);
Chris Lattner739e7392007-04-29 07:12:06 +0000229 const MemoryBuffer *Buffer = getBuffer(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000230 const char *Buf = Buffer->getBufferStart();
231
232 unsigned LineStart = FilePos;
233 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
234 --LineStart;
235 return FilePos-LineStart+1;
236}
237
Chris Lattner9a13bde2006-06-21 04:57:09 +0000238/// getSourceName - This method returns the name of the file or buffer that
239/// the SourceLocation specifies. This can be modified with #line directives,
240/// etc.
241std::string SourceManager::getSourceName(SourceLocation Loc) {
242 unsigned FileID = Loc.getFileID();
243 if (FileID == 0) return "";
Chris Lattner2dffd2b2006-06-29 16:44:08 +0000244 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
Chris Lattner9a13bde2006-06-21 04:57:09 +0000245}
246
247
Chris Lattner22eb9722006-06-18 05:43:12 +0000248/// getLineNumber - Given a SourceLocation, return the physical line number
249/// for the position indicated. This requires building and caching a table of
Chris Lattner739e7392007-04-29 07:12:06 +0000250/// line offsets for the MemoryBuffer, so this is not cheap: use only when
Chris Lattner22eb9722006-06-18 05:43:12 +0000251/// about to emit a diagnostic.
Chris Lattner9a13bde2006-06-21 04:57:09 +0000252unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Chris Lattnera85a9d22006-07-02 20:07:52 +0000253 unsigned FileID = Loc.getFileID();
254 if (FileID == 0) return 0;
255 FileInfo *FileInfo = getFileInfo(FileID);
Chris Lattner22eb9722006-06-18 05:43:12 +0000256
257 // If this is the first use of line information for this buffer, compute the
258 /// SourceLineCache for it on demand.
259 if (FileInfo->SourceLineCache == 0) {
Chris Lattner739e7392007-04-29 07:12:06 +0000260 const MemoryBuffer *Buffer = FileInfo->Buffer;
Chris Lattner22eb9722006-06-18 05:43:12 +0000261
262 // Find the file offsets of all of the *physical* source lines. This does
263 // not look at trigraphs, escaped newlines, or anything else tricky.
264 std::vector<unsigned> LineOffsets;
265
266 // Line #1 starts at char 0.
267 LineOffsets.push_back(0);
268
269 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
270 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
271 unsigned Offs = 0;
272 while (1) {
273 // Skip over the contents of the line.
274 // TODO: Vectorize this? This is very performance sensitive for programs
Chris Lattnerd5da3ea2006-07-04 21:11:41 +0000275 // with lots of diagnostics and in -E mode.
Chris Lattner22eb9722006-06-18 05:43:12 +0000276 const unsigned char *NextBuf = (const unsigned char *)Buf;
277 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
278 ++NextBuf;
279 Offs += NextBuf-Buf;
280 Buf = NextBuf;
281
282 if (Buf[0] == '\n' || Buf[0] == '\r') {
283 // If this is \n\r or \r\n, skip both characters.
284 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
285 ++Offs, ++Buf;
286 ++Offs, ++Buf;
287 LineOffsets.push_back(Offs);
288 } else {
289 // Otherwise, this is a null. If end of file, exit.
290 if (Buf == End) break;
291 // Otherwise, skip the null.
292 ++Offs, ++Buf;
293 }
294 }
295 LineOffsets.push_back(Offs);
296
297 // Copy the offsets into the FileInfo structure.
298 FileInfo->NumLines = LineOffsets.size();
299 FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
300 std::copy(LineOffsets.begin(), LineOffsets.end(),
301 FileInfo->SourceLineCache);
302 }
303
304 // Okay, we know we have a line number table. Do a binary search to find the
305 // line number that this character position lands on.
306 unsigned NumLines = FileInfo->NumLines;
307 unsigned *SourceLineCache = FileInfo->SourceLineCache;
308
309 // TODO: If this is performance sensitive, we could try doing simple radix
310 // type approaches to make good (tight?) initial guesses based on the
311 // assumption that all lines are the same average size.
312 unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000313 getFullFilePos(Loc)+1);
Chris Lattner22eb9722006-06-18 05:43:12 +0000314 return Pos-SourceLineCache;
315}
316
317/// PrintStats - Print statistics to stderr.
318///
319void SourceManager::PrintStats() const {
320 std::cerr << "\n*** Source Manager Stats:\n";
321 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
322 << " mem buffers mapped, " << FileIDs.size()
323 << " file ID's allocated.\n";
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000324 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
325 << MacroIDs.size() << " macro expansion FileID's.\n";
Chris Lattner30709b032006-06-21 03:01:55 +0000326
327
Chris Lattner22eb9722006-06-18 05:43:12 +0000328
329 unsigned NumLineNumsComputed = 0;
330 unsigned NumFileBytesMapped = 0;
331 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
332 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
333 NumLineNumsComputed += I->second.SourceLineCache != 0;
334 NumFileBytesMapped += I->second.Buffer->getBufferSize();
335 }
336 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
337 << NumLineNumsComputed << " files with line #'s computed.\n";
338}