blob: 0adcf410dff8323d463586e2069166b03ff26874 [file] [log] [blame]
Ted Kremenek274b2082008-11-12 21:37:15 +00001//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000014#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
Ted Kremenek274b2082008-11-12 21:37:15 +000017#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000019#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000026
Ted Kremenek274b2082008-11-12 21:37:15 +000027using namespace clang;
28
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000029PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
30 PTHManager& PM)
Ted Kremenekcd223442008-12-11 22:41:47 +000031 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
32 PTHMgr(PM),
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000033 NeedsFetching(true) {
34 // Make sure the EofToken is completely clean.
35 EofToken.startToken();
36 }
Ted Kremenek274b2082008-11-12 21:37:15 +000037
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000038Token PTHLexer::GetToken() {
39 // Read the next token, or if we haven't advanced yet, get the last
40 // token read.
41 if (NeedsFetching) {
42 NeedsFetching = false;
43 ReadToken(LastFetched);
44 }
45
46 Token Tok = LastFetched;
Ted Kremenek89d7ee92008-11-20 19:49:00 +000047
48 // If we are in raw mode, zero out identifier pointers. This is
49 // needed for 'pragma poison'. Note that this requires that the Preprocessor
50 // can go back to the original source when it calls getSpelling().
51 if (LexingRawMode && Tok.is(tok::identifier))
52 Tok.setIdentifierInfo(0);
53
54 return Tok;
55}
56
Ted Kremenek274b2082008-11-12 21:37:15 +000057void PTHLexer::Lex(Token& Tok) {
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000058LexNextToken:
Ted Kremenek31aba422008-11-20 16:32:22 +000059 Tok = GetToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000060
Ted Kremenekcd4e2ae2008-11-21 00:58:35 +000061 if (AtLastToken()) {
62 Preprocessor *PPCache = PP;
63
64 if (LexEndOfFile(Tok))
65 return;
66
67 assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
68 return PPCache->Lex(Tok);
69 }
70
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000071 // Don't advance to the next token yet. Check if we are at the
72 // start of a new line and we're processing a directive. If so, we
73 // consume this token twice, once as an tok::eom.
74 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
75 ParsingPreprocessorDirective = false;
76 Tok.setKind(tok::eom);
Ted Kremenek274b2082008-11-12 21:37:15 +000077 MIOpt.ReadToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000078 return;
79 }
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000080
81 // Advance to the next token.
Ted Kremenek31aba422008-11-20 16:32:22 +000082 AdvanceToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000083
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000084 if (Tok.is(tok::hash)) {
85 if (Tok.isAtStartOfLine() && !LexingRawMode) {
Ted Kremenekcd223442008-12-11 22:41:47 +000086 LastHashTokPtr = CurPtr;
87
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000088 PP->HandleDirective(Tok);
89
90 if (PP->isCurrentLexer(this))
91 goto LexNextToken;
92
93 return PP->Lex(Tok);
94 }
95 }
96
Ted Kremenek274b2082008-11-12 21:37:15 +000097 MIOpt.ReadToken();
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000098
99 if (Tok.is(tok::identifier)) {
100 if (LexingRawMode) return;
101 return PP->HandleIdentifier(Tok);
102 }
Ted Kremenek274b2082008-11-12 21:37:15 +0000103}
104
Ted Kremenekcd4e2ae2008-11-21 00:58:35 +0000105bool PTHLexer::LexEndOfFile(Token &Tok) {
106
107 if (ParsingPreprocessorDirective) {
108 ParsingPreprocessorDirective = false;
109 Tok.setKind(tok::eom);
110 MIOpt.ReadToken();
111 return true; // Have a token.
112 }
113
114 if (LexingRawMode) {
115 MIOpt.ReadToken();
116 return true; // Have an eof token.
117 }
118
119 // FIXME: Issue diagnostics similar to Lexer.
120 return PP->HandleEndOfFile(Tok, false);
121}
122
Ted Kremenek274b2082008-11-12 21:37:15 +0000123void PTHLexer::setEOF(Token& Tok) {
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000124 assert(!EofToken.is(tok::eof));
125 Tok = EofToken;
Ted Kremenek274b2082008-11-12 21:37:15 +0000126}
Ted Kremenek17ff58a2008-11-19 22:21:33 +0000127
128void PTHLexer::DiscardToEndOfLine() {
129 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
130 "Must be in a preprocessing directive!");
Ted Kremenek4d35da22008-11-20 01:16:50 +0000131
132 // Already at end-of-file?
Ted Kremenek31aba422008-11-20 16:32:22 +0000133 if (AtLastToken())
Ted Kremenek4d35da22008-11-20 01:16:50 +0000134 return;
135
136 // Find the first token that is not the start of the *current* line.
Ted Kremenekd2bdeed2008-11-21 23:28:56 +0000137 Token T;
138 for (Lex(T); !AtLastToken(); Lex(T))
Ted Kremenek31aba422008-11-20 16:32:22 +0000139 if (GetToken().isAtStartOfLine())
Ted Kremenek4d35da22008-11-20 01:16:50 +0000140 return;
Ted Kremenek17ff58a2008-11-19 22:21:33 +0000141}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000142
143//===----------------------------------------------------------------------===//
144// Utility methods for reading from the mmap'ed PTH file.
145//===----------------------------------------------------------------------===//
146
147static inline uint8_t Read8(const char*& data) {
148 return (uint8_t) *(data++);
149}
150
151static inline uint32_t Read32(const char*& data) {
152 uint32_t V = (uint32_t) Read8(data);
153 V |= (((uint32_t) Read8(data)) << 8);
154 V |= (((uint32_t) Read8(data)) << 16);
155 V |= (((uint32_t) Read8(data)) << 24);
156 return V;
157}
158
159//===----------------------------------------------------------------------===//
160// Token reconstruction from the PTH file.
161//===----------------------------------------------------------------------===//
162
163void PTHLexer::ReadToken(Token& T) {
164 // Clear the token.
165 // FIXME: Setting the flags directly should obviate this step.
166 T.startToken();
167
168 // Read the type of the token.
Ted Kremenekcd223442008-12-11 22:41:47 +0000169 T.setKind((tok::TokenKind) Read8(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000170
171 // Set flags. This is gross, since we are really setting multiple flags.
Ted Kremenekcd223442008-12-11 22:41:47 +0000172 T.setFlag((Token::TokenFlags) Read8(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000173
174 // Set the IdentifierInfo* (if any).
Ted Kremenekcd223442008-12-11 22:41:47 +0000175 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000176
177 // Set the SourceLocation. Since all tokens are constructed using a
178 // raw lexer, they will all be offseted from the same FileID.
Ted Kremenekcd223442008-12-11 22:41:47 +0000179 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr)));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000180
181 // Finally, read and set the length of the token.
Ted Kremenekcd223442008-12-11 22:41:47 +0000182 T.setLength(Read32(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000183}
184
185//===----------------------------------------------------------------------===//
186// Internal Data Structures for PTH file lookup and resolving identifiers.
187//===----------------------------------------------------------------------===//
188
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000189
190/// PTHFileLookup - This internal data structure is used by the PTHManager
191/// to map from FileEntry objects managed by FileManager to offsets within
192/// the PTH file.
193namespace {
194class VISIBILITY_HIDDEN PTHFileLookup {
195public:
196 class Val {
Ted Kremenekfb645b62008-12-11 23:36:38 +0000197 uint32_t TokenOff;
198 uint32_t PPCondOff;
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000199
200 public:
Ted Kremenekfb645b62008-12-11 23:36:38 +0000201 Val() : TokenOff(~0) {}
202 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000203
Ted Kremenekfb645b62008-12-11 23:36:38 +0000204 uint32_t getTokenOffset() const {
205 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
206 return TokenOff;
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000207 }
208
Ted Kremenekfb645b62008-12-11 23:36:38 +0000209 uint32_t gettPPCondOffset() const {
210 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
211 return PPCondOff;
212 }
213
214 bool isValid() const { return TokenOff != ~((uint32_t)0); }
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000215 };
216
217private:
218 llvm::StringMap<Val> FileMap;
219
220public:
221 PTHFileLookup() {};
222
223 Val Lookup(const FileEntry* FE) {
224 const char* s = FE->getName();
225 unsigned size = strlen(s);
226 return FileMap.GetOrCreateValue(s, s+size).getValue();
227 }
228
229 void ReadTable(const char* D) {
230 uint32_t N = Read32(D); // Read the length of the table.
231
232 for ( ; N > 0; --N) { // The rest of the data is the table itself.
233 uint32_t len = Read32(D);
234 const char* s = D;
235 D += len;
Ted Kremenekfb645b62008-12-11 23:36:38 +0000236 uint32_t TokenOff = Read32(D);
237 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000238 }
239 }
240};
241} // end anonymous namespace
242
243//===----------------------------------------------------------------------===//
244// PTHManager methods.
245//===----------------------------------------------------------------------===//
246
247PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
Ted Kremenekcf58e622008-12-10 19:40:23 +0000248 const char* idDataTable, IdentifierInfo** perIDCache,
Ted Kremenek6183e482008-12-03 01:16:39 +0000249 Preprocessor& pp)
250: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
251 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000252
253PTHManager::~PTHManager() {
254 delete Buf;
255 delete (PTHFileLookup*) FileLookup;
Ted Kremenek0e50b6e2008-12-04 22:47:11 +0000256 free(PerIDCache);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000257}
258
259PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
260
261 // Memory map the PTH file.
262 llvm::OwningPtr<llvm::MemoryBuffer>
263 File(llvm::MemoryBuffer::getFile(file.c_str()));
264
265 if (!File)
266 return 0;
267
268 // Get the buffer ranges and check if there are at least three 32-bit
269 // words at the end of the file.
270 const char* BufBeg = File->getBufferStart();
271 const char* BufEnd = File->getBufferEnd();
272
273 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
274 assert(false && "Invalid PTH file.");
275 return 0; // FIXME: Proper error diagnostic?
276 }
277
278 // Compute the address of the index table at the end of the PTH file.
279 // This table contains the offset of the file lookup table, the
280 // persistent ID -> identifer data table.
281 const char* EndTable = BufEnd - sizeof(uint32_t)*3;
282
283 // Construct the file lookup table. This will be used for mapping from
284 // FileEntry*'s to cached tokens.
285 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
286 const char* FileTable = BufBeg + Read32(FileTableOffset);
287
288 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
289 assert(false && "Invalid PTH file.");
290 return 0; // FIXME: Proper error diagnostic?
291 }
292
293 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
294 FL->ReadTable(FileTable);
295
296 // Get the location of the table mapping from persistent ids to the
297 // data needed to reconstruct identifiers.
298 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
299 const char* IData = BufBeg + Read32(IDTableOffset);
300 if (!(IData > BufBeg && IData < BufEnd)) {
301 assert(false && "Invalid PTH file.");
302 return 0; // FIXME: Proper error diagnostic?
303 }
304
Ted Kremenek6183e482008-12-03 01:16:39 +0000305 // Get the number of IdentifierInfos and pre-allocate the identifier cache.
306 uint32_t NumIds = Read32(IData);
307
308 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc()
309 // so that we in the best case only zero out memory once when the OS returns
310 // us new pages.
311 IdentifierInfo** PerIDCache =
312 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
313
314 if (!PerIDCache) {
315 assert(false && "Could not allocate Persistent ID cache.");
316 return 0;
317 }
318
319 // Create the new lexer.
320 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000321}
322
323IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
324 // Read the persistent ID from the PTH file.
325 uint32_t persistentID = Read32(D);
326
327 // A persistent ID of '0' always maps to NULL.
328 if (!persistentID)
329 return 0;
330
331 // Adjust the persistent ID by subtracting '1' so that it can be used
332 // as an index within a table in the PTH file.
333 --persistentID;
334
335 // Check if the IdentifierInfo has already been resolved.
Ted Kremenekcf58e622008-12-10 19:40:23 +0000336 IdentifierInfo*& II = PerIDCache[persistentID];
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000337 if (II) return II;
338
339 // Look in the PTH file for the string data for the IdentifierInfo object.
340 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
341 const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
342 assert(IDData < Buf->getBufferEnd());
343
344 // Read the length of the string.
345 uint32_t len = Read32(IDData);
346
347 // Get the IdentifierInfo* with the specified string.
348 II = &ITable.get(IDData, IDData+len);
349 return II;
350}
351
352PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
353
354 if (!FE)
355 return 0;
356
357 // Lookup the FileEntry object in our file lookup data structure. It will
358 // return a variant that indicates whether or not there is an offset within
359 // the PTH file that contains cached tokens.
Ted Kremenekfb645b62008-12-11 23:36:38 +0000360 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000361
Ted Kremenekfb645b62008-12-11 23:36:38 +0000362 if (!FileData.isValid()) // No tokens available.
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000363 return 0;
364
365 // Compute the offset of the token data within the buffer.
Ted Kremenekfb645b62008-12-11 23:36:38 +0000366 const char* data = Buf->getBufferStart() + FileData.getTokenOffset();
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000367 assert(data < Buf->getBufferEnd());
368 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this);
369}