blob: 3e2623c8e441445510aacb5411e6c164e6241514 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- SourceManager.cpp - Track and cache source files -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SourceManager interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/SourceManager.h"
15#include "clang/Basic/FileManager.h"
16#include "llvm/Support/MemoryBuffer.h"
17#include "llvm/System/Path.h"
18#include <algorithm>
19#include <iostream>
Gabor Greif15012182007-07-12 16:00:00 +000020#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000021using namespace clang;
22using namespace SrcMgr;
23using llvm::MemoryBuffer;
24
25SourceManager::~SourceManager() {
26 for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
27 E = FileInfos.end(); I != E; ++I) {
28 delete I->second.Buffer;
29 delete[] I->second.SourceLineCache;
30 }
31
32 for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
33 E = MemBufferInfos.end(); I != E; ++I) {
34 delete I->second.Buffer;
35 delete[] I->second.SourceLineCache;
36 }
37}
38
39
40// FIXME: REMOVE THESE
41#include <unistd.h>
42#include <sys/types.h>
43#include <sys/uio.h>
44#include <sys/fcntl.h>
45#include <cerrno>
46
47static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
48#if 0
49 // FIXME: Reintroduce this and zap this function once the common llvm stuff
50 // is fast for the small case.
51 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
52 FileEnt->getSize());
53#endif
54
55 // If the file is larger than some threshold, use 'read', otherwise use mmap.
56 if (FileEnt->getSize() >= 4096*4)
57 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
58 0, FileEnt->getSize());
59
60 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
61 FileEnt->getName());
62 char *BufPtr = const_cast<char*>(SB->getBufferStart());
63
64 int FD = ::open(FileEnt->getName(), O_RDONLY);
65 if (FD == -1) {
66 delete SB;
67 return 0;
68 }
69
70 unsigned BytesLeft = FileEnt->getSize();
71 while (BytesLeft) {
72 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
73 if (NumRead != -1) {
74 BytesLeft -= NumRead;
75 BufPtr += NumRead;
76 } else if (errno == EINTR) {
77 // try again
78 } else {
79 // error reading.
80 close(FD);
81 delete SB;
82 return 0;
83 }
84 }
85 close(FD);
86
87 return SB;
88}
89
90
91/// getFileInfo - Create or return a cached FileInfo for the specified file.
92///
93const InfoRec *
94SourceManager::getInfoRec(const FileEntry *FileEnt) {
95 assert(FileEnt && "Didn't specify a file entry to use?");
96 // Do we already have information about this file?
97 std::map<const FileEntry *, FileInfo>::iterator I =
98 FileInfos.lower_bound(FileEnt);
99 if (I != FileInfos.end() && I->first == FileEnt)
100 return &*I;
101
102 // Nope, get information.
103 const MemoryBuffer *File = ReadFileFast(FileEnt);
104 if (File == 0)
105 return 0;
106
107 const InfoRec &Entry =
108 *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
109 FileInfo &Info = const_cast<FileInfo &>(Entry.second);
110
111 Info.Buffer = File;
112 Info.SourceLineCache = 0;
113 Info.NumLines = 0;
114 return &Entry;
115}
116
117
118/// createMemBufferInfoRec - Create a new info record for the specified memory
119/// buffer. This does no caching.
120const InfoRec *
121SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
122 // Add a new info record to the MemBufferInfos list and return it.
123 FileInfo FI;
124 FI.Buffer = Buffer;
125 FI.SourceLineCache = 0;
126 FI.NumLines = 0;
127 MemBufferInfos.push_back(InfoRec(0, FI));
128 return &MemBufferInfos.back();
129}
130
131
132/// createFileID - Create a new fileID for the specified InfoRec and include
133/// position. This works regardless of whether the InfoRec corresponds to a
134/// file or some other input source.
135unsigned SourceManager::createFileID(const InfoRec *File,
136 SourceLocation IncludePos) {
137 // If FileEnt is really large (e.g. it's a large .i file), we may not be able
138 // to fit an arbitrary position in the file in the FilePos field. To handle
139 // this, we create one FileID for each chunk of the file that fits in a
140 // FilePos field.
141 unsigned FileSize = File->second.Buffer->getBufferSize();
142 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000143 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000144 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
145 "Ran out of file ID's!");
146 return FileIDs.size();
147 }
148
149 // Create one FileID for each chunk of the file.
150 unsigned Result = FileIDs.size()+1;
151
152 unsigned ChunkNo = 0;
153 while (1) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000154 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
Reid Spencer5f016e22007-07-11 17:01:13 +0000155
156 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
157 FileSize -= (1 << SourceLocation::FilePosBits);
158 }
159
160 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
161 "Ran out of file ID's!");
162 return Result;
163}
164
165/// getInstantiationLoc - Return a new SourceLocation that encodes the fact
166/// that a token from physloc PhysLoc should actually be referenced from
167/// InstantiationLoc.
Chris Lattnerabca2bb2007-07-15 06:35:27 +0000168SourceLocation SourceManager::getInstantiationLoc(SourceLocation VirtLoc,
Reid Spencer5f016e22007-07-11 17:01:13 +0000169 SourceLocation InstantLoc) {
Chris Lattnerabca2bb2007-07-15 06:35:27 +0000170 // The specified source location may be a mapped location, due to a macro
171 // instantiation or #line directive. Strip off this information to find out
172 // where the characters are actually located.
173 SourceLocation PhysLoc = getPhysicalLoc(VirtLoc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000174
175 // Resolve InstantLoc down to a real logical location.
176 InstantLoc = getLogicalLoc(InstantLoc);
177
Chris Lattner9dc1f532007-07-20 16:37:10 +0000178 // FIXME: intelligently cache macroid's.
179 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
180
181 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0, 0);
182
183#if 0
Reid Spencer5f016e22007-07-11 17:01:13 +0000184 unsigned InstantiationFileID;
Chris Lattner9dc1f532007-07-20 16:37:10 +0000185
Reid Spencer5f016e22007-07-11 17:01:13 +0000186 // If this is the same instantiation as was requested last time, return this
187 // immediately.
188 if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
189 InstantLoc == LastInstantiationLoc_InstantLoc) {
190 InstantiationFileID = LastInstantiationLoc_Result;
191 } else {
192 // Add a FileID for this. FIXME: should cache these!
193 FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
194 PhysLoc.getFileID()));
195 InstantiationFileID = FileIDs.size();
196
197 // Remember this in the single-entry cache for next time.
198 LastInstantiationLoc_MacroFID = PhysLoc.getFileID();
199 LastInstantiationLoc_InstantLoc = InstantLoc;
200 LastInstantiationLoc_Result = InstantiationFileID;
201 }
Chris Lattner9dc1f532007-07-20 16:37:10 +0000202 return SourceLocation::getMacroLoc(InstantiationFileID,
203 PhysLoc.getRawFilePos());
204#endif
Reid Spencer5f016e22007-07-11 17:01:13 +0000205}
206
207
208
209/// getCharacterData - Return a pointer to the start of the specified location
210/// in the appropriate MemoryBuffer.
211const char *SourceManager::getCharacterData(SourceLocation SL) const {
212 // Note that this is a hot function in the getSpelling() path, which is
213 // heavily used by -E mode.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000214 SL = getPhysicalLoc(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000215
Chris Lattner9dc1f532007-07-20 16:37:10 +0000216 return getFileInfo(SL.getFileID())->Buffer->getBufferStart() +
217 getFullFilePos(SL);
Reid Spencer5f016e22007-07-11 17:01:13 +0000218}
219
Reid Spencer5f016e22007-07-11 17:01:13 +0000220
Chris Lattner9dc1f532007-07-20 16:37:10 +0000221/// getColumnNumber - Return the column # for the specified file position.
Reid Spencer5f016e22007-07-11 17:01:13 +0000222/// this is significantly cheaper to compute than the line number. This returns
223/// zero if the column number isn't known.
224unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
Reid Spencer5f016e22007-07-11 17:01:13 +0000225 unsigned FileID = Loc.getFileID();
226 if (FileID == 0) return 0;
227
Chris Lattner9dc1f532007-07-20 16:37:10 +0000228 unsigned FilePos = getFullFilePos(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000229 const MemoryBuffer *Buffer = getBuffer(FileID);
230 const char *Buf = Buffer->getBufferStart();
231
232 unsigned LineStart = FilePos;
233 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
234 --LineStart;
235 return FilePos-LineStart+1;
236}
237
238/// getSourceName - This method returns the name of the file or buffer that
239/// the SourceLocation specifies. This can be modified with #line directives,
240/// etc.
241std::string SourceManager::getSourceName(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000242 unsigned FileID = Loc.getFileID();
243 if (FileID == 0) return "";
244 return getFileInfo(FileID)->Buffer->getBufferIdentifier();
245}
246
247
248/// getLineNumber - Given a SourceLocation, return the physical line number
249/// for the position indicated. This requires building and caching a table of
250/// line offsets for the MemoryBuffer, so this is not cheap: use only when
251/// about to emit a diagnostic.
252unsigned SourceManager::getLineNumber(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000253 unsigned FileID = Loc.getFileID();
254 if (FileID == 0) return 0;
255 FileInfo *FileInfo = getFileInfo(FileID);
256
257 // If this is the first use of line information for this buffer, compute the
258 /// SourceLineCache for it on demand.
259 if (FileInfo->SourceLineCache == 0) {
260 const MemoryBuffer *Buffer = FileInfo->Buffer;
261
262 // Find the file offsets of all of the *physical* source lines. This does
263 // not look at trigraphs, escaped newlines, or anything else tricky.
264 std::vector<unsigned> LineOffsets;
265
266 // Line #1 starts at char 0.
267 LineOffsets.push_back(0);
268
269 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
270 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
271 unsigned Offs = 0;
272 while (1) {
273 // Skip over the contents of the line.
274 // TODO: Vectorize this? This is very performance sensitive for programs
275 // with lots of diagnostics and in -E mode.
276 const unsigned char *NextBuf = (const unsigned char *)Buf;
277 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
278 ++NextBuf;
279 Offs += NextBuf-Buf;
280 Buf = NextBuf;
281
282 if (Buf[0] == '\n' || Buf[0] == '\r') {
283 // If this is \n\r or \r\n, skip both characters.
284 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
285 ++Offs, ++Buf;
286 ++Offs, ++Buf;
287 LineOffsets.push_back(Offs);
288 } else {
289 // Otherwise, this is a null. If end of file, exit.
290 if (Buf == End) break;
291 // Otherwise, skip the null.
292 ++Offs, ++Buf;
293 }
294 }
295 LineOffsets.push_back(Offs);
296
297 // Copy the offsets into the FileInfo structure.
298 FileInfo->NumLines = LineOffsets.size();
299 FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
300 std::copy(LineOffsets.begin(), LineOffsets.end(),
301 FileInfo->SourceLineCache);
302 }
303
304 // Okay, we know we have a line number table. Do a binary search to find the
305 // line number that this character position lands on.
306 unsigned NumLines = FileInfo->NumLines;
307 unsigned *SourceLineCache = FileInfo->SourceLineCache;
308
309 // TODO: If this is performance sensitive, we could try doing simple radix
310 // type approaches to make good (tight?) initial guesses based on the
311 // assumption that all lines are the same average size.
312 unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
Chris Lattner9dc1f532007-07-20 16:37:10 +0000313 getFullFilePos(Loc)+1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000314 return Pos-SourceLineCache;
315}
316
Reid Spencer5f016e22007-07-11 17:01:13 +0000317/// PrintStats - Print statistics to stderr.
318///
319void SourceManager::PrintStats() const {
320 std::cerr << "\n*** Source Manager Stats:\n";
321 std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
322 << " mem buffers mapped, " << FileIDs.size()
323 << " file ID's allocated.\n";
Chris Lattner9dc1f532007-07-20 16:37:10 +0000324 std::cerr << " " << FileIDs.size() << " normal buffer FileID's, "
325 << MacroIDs.size() << " macro expansion FileID's.\n";
Reid Spencer5f016e22007-07-11 17:01:13 +0000326
327
328
329 unsigned NumLineNumsComputed = 0;
330 unsigned NumFileBytesMapped = 0;
331 for (std::map<const FileEntry *, FileInfo>::const_iterator I =
332 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
333 NumLineNumsComputed += I->second.SourceLineCache != 0;
334 NumFileBytesMapped += I->second.Buffer->getBufferSize();
335 }
336 std::cerr << NumFileBytesMapped << " bytes of files mapped, "
337 << NumLineNumsComputed << " files with line #'s computed.\n";
338}