blob: 65f27c5bab33f0fb18572ba42e657087b465d456 [file] [log] [blame]
Ted Kremenekca820862008-11-12 21:37:15 +00001//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
Ted Kremenek325cd302008-12-03 00:38:03 +000014#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
Ted Kremenekca820862008-11-12 21:37:15 +000017#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
Ted Kremenek325cd302008-12-03 00:38:03 +000019#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
Ted Kremenek325cd302008-12-03 00:38:03 +000026
Ted Kremenekca820862008-11-12 21:37:15 +000027using namespace clang;
28
Ted Kremenekc07091c2008-12-12 18:34:08 +000029#define DISK_TOKEN_SIZE (2+3*4)
30
Ted Kremenek325cd302008-12-03 00:38:03 +000031PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
Ted Kremenekc07091c2008-12-12 18:34:08 +000032 const char* ppcond, PTHManager& PM)
Ted Kremenekfe5c62d2008-12-11 22:41:47 +000033 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
Ted Kremenekc07091c2008-12-12 18:34:08 +000034 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
Ted Kremenek325cd302008-12-03 00:38:03 +000035 // Make sure the EofToken is completely clean.
36 EofToken.startToken();
37 }
Ted Kremenekca820862008-11-12 21:37:15 +000038
Ted Kremenek325cd302008-12-03 00:38:03 +000039Token PTHLexer::GetToken() {
40 // Read the next token, or if we haven't advanced yet, get the last
41 // token read.
42 if (NeedsFetching) {
43 NeedsFetching = false;
44 ReadToken(LastFetched);
45 }
46
47 Token Tok = LastFetched;
Ted Kremenek444b6bf2008-11-20 19:49:00 +000048
49 // If we are in raw mode, zero out identifier pointers. This is
50 // needed for 'pragma poison'. Note that this requires that the Preprocessor
51 // can go back to the original source when it calls getSpelling().
52 if (LexingRawMode && Tok.is(tok::identifier))
53 Tok.setIdentifierInfo(0);
54
55 return Tok;
56}
57
Ted Kremenekca820862008-11-12 21:37:15 +000058void PTHLexer::Lex(Token& Tok) {
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000059LexNextToken:
Ted Kremenekc37a3a22008-11-20 16:32:22 +000060 Tok = GetToken();
Ted Kremenekca820862008-11-12 21:37:15 +000061
Ted Kremenek4491ce52008-11-21 00:58:35 +000062 if (AtLastToken()) {
63 Preprocessor *PPCache = PP;
64
65 if (LexEndOfFile(Tok))
66 return;
67
68 assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
69 return PPCache->Lex(Tok);
70 }
71
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000072 // Don't advance to the next token yet. Check if we are at the
73 // start of a new line and we're processing a directive. If so, we
74 // consume this token twice, once as an tok::eom.
75 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
76 ParsingPreprocessorDirective = false;
77 Tok.setKind(tok::eom);
Ted Kremenekca820862008-11-12 21:37:15 +000078 MIOpt.ReadToken();
Ted Kremenekca820862008-11-12 21:37:15 +000079 return;
80 }
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000081
82 // Advance to the next token.
Ted Kremenekc37a3a22008-11-20 16:32:22 +000083 AdvanceToken();
Ted Kremenekca820862008-11-12 21:37:15 +000084
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000085 if (Tok.is(tok::hash)) {
Ted Kremenekc07091c2008-12-12 18:34:08 +000086 if (Tok.isAtStartOfLine()) {
87 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
88 if (!LexingRawMode) {
89 PP->HandleDirective(Tok);
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000090
Ted Kremenekc07091c2008-12-12 18:34:08 +000091 if (PP->isCurrentLexer(this))
92 goto LexNextToken;
93
94 return PP->Lex(Tok);
95 }
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000096 }
97 }
98
Ted Kremenekca820862008-11-12 21:37:15 +000099 MIOpt.ReadToken();
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +0000100
101 if (Tok.is(tok::identifier)) {
102 if (LexingRawMode) return;
103 return PP->HandleIdentifier(Tok);
104 }
Ted Kremenekca820862008-11-12 21:37:15 +0000105}
106
Ted Kremenek4491ce52008-11-21 00:58:35 +0000107bool PTHLexer::LexEndOfFile(Token &Tok) {
108
109 if (ParsingPreprocessorDirective) {
110 ParsingPreprocessorDirective = false;
111 Tok.setKind(tok::eom);
112 MIOpt.ReadToken();
113 return true; // Have a token.
114 }
115
116 if (LexingRawMode) {
117 MIOpt.ReadToken();
118 return true; // Have an eof token.
119 }
120
121 // FIXME: Issue diagnostics similar to Lexer.
122 return PP->HandleEndOfFile(Tok, false);
123}
124
Ted Kremenekca820862008-11-12 21:37:15 +0000125void PTHLexer::setEOF(Token& Tok) {
Ted Kremenek325cd302008-12-03 00:38:03 +0000126 assert(!EofToken.is(tok::eof));
127 Tok = EofToken;
Ted Kremenekca820862008-11-12 21:37:15 +0000128}
Ted Kremenekb53b1f42008-11-19 22:21:33 +0000129
130void PTHLexer::DiscardToEndOfLine() {
131 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
132 "Must be in a preprocessing directive!");
Ted Kremeneka295ef42008-11-20 01:16:50 +0000133
134 // Already at end-of-file?
Ted Kremenekc37a3a22008-11-20 16:32:22 +0000135 if (AtLastToken())
Ted Kremeneka295ef42008-11-20 01:16:50 +0000136 return;
137
138 // Find the first token that is not the start of the *current* line.
Ted Kremenek5464c8c2008-11-21 23:28:56 +0000139 Token T;
140 for (Lex(T); !AtLastToken(); Lex(T))
Ted Kremenekc37a3a22008-11-20 16:32:22 +0000141 if (GetToken().isAtStartOfLine())
Ted Kremeneka295ef42008-11-20 01:16:50 +0000142 return;
Ted Kremenekb53b1f42008-11-19 22:21:33 +0000143}
Ted Kremenek325cd302008-12-03 00:38:03 +0000144
145//===----------------------------------------------------------------------===//
146// Utility methods for reading from the mmap'ed PTH file.
147//===----------------------------------------------------------------------===//
148
149static inline uint8_t Read8(const char*& data) {
150 return (uint8_t) *(data++);
151}
152
153static inline uint32_t Read32(const char*& data) {
154 uint32_t V = (uint32_t) Read8(data);
155 V |= (((uint32_t) Read8(data)) << 8);
156 V |= (((uint32_t) Read8(data)) << 16);
157 V |= (((uint32_t) Read8(data)) << 24);
158 return V;
159}
160
Ted Kremenekc07091c2008-12-12 18:34:08 +0000161/// SkipBlock - Used by Preprocessor to skip the current conditional block.
162bool PTHLexer::SkipBlock() {
163 assert(CurPPCondPtr && "No cached PP conditional information.");
164 assert(LastHashTokPtr && "No known '#' token.");
165
166 const char* Next = 0;
167 uint32_t Offset;
168 uint32_t TableIdx;
169
170 do {
171 Offset = Read32(CurPPCondPtr);
172 TableIdx = Read32(CurPPCondPtr);
173 Next = TokBuf + Offset;
174 }
175 while (Next < LastHashTokPtr);
176 assert(Next == LastHashTokPtr && "No PP-cond entry found for '#'");
177 assert(TableIdx && "No jumping from #endifs.");
178
179 // Update our side-table iterator.
180 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
181 assert(NextPPCondPtr >= CurPPCondPtr);
182 CurPPCondPtr = NextPPCondPtr;
183
184 // Read where we should jump to.
185 Next = TokBuf + Read32(NextPPCondPtr);
186 uint32_t NextIdx = Read32(NextPPCondPtr);
187
188 // By construction NextIdx will be zero if this is a #endif. This is useful
189 // to know to obviate lexing another token.
190 bool isEndif = NextIdx == 0;
191 NeedsFetching = true;
192
193 // This case can occur when we see something like this:
194 //
195 // #if ...
196 // /* a comment or nothing */
197 // #elif
198 //
199 // If we are skipping the first #if block it will be the case that CurPtr
200 // already points 'elif'. Just return.
201
202 if (CurPtr > Next) {
203 assert(CurPtr == Next + DISK_TOKEN_SIZE);
204 // Did we reach a #endif? If so, go ahead and consume that token as well.
205 if (isEndif)
206 CurPtr += DISK_TOKEN_SIZE;
207 else
208 LastHashTokPtr = Next;
209
210 return isEndif;
211 }
212
213 // Otherwise, we need to advance. Update CurPtr to point to the '#' token.
214 CurPtr = Next;
215
216 // Update the location of the last observed '#'. This is useful if we
217 // are skipping multiple blocks.
218 LastHashTokPtr = CurPtr;
219
220#ifndef DEBUG
221 // In a debug build we should verify that the token is really a '#' that
222 // appears at the start of the line.
223 Token Tok;
224 ReadToken(Tok);
225 assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
226#else
227 // In a full release build we can just skip the token entirely.
228 CurPtr += DISK_TOKEN_SIZE;
229#endif
230
231 // Did we reach a #endif? If so, go ahead and consume that token as well.
232 if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
233
234 return isEndif;
235}
236
Ted Kremenek325cd302008-12-03 00:38:03 +0000237//===----------------------------------------------------------------------===//
238// Token reconstruction from the PTH file.
239//===----------------------------------------------------------------------===//
240
241void PTHLexer::ReadToken(Token& T) {
242 // Clear the token.
243 // FIXME: Setting the flags directly should obviate this step.
244 T.startToken();
245
246 // Read the type of the token.
Ted Kremenekfe5c62d2008-12-11 22:41:47 +0000247 T.setKind((tok::TokenKind) Read8(CurPtr));
Ted Kremenek325cd302008-12-03 00:38:03 +0000248
249 // Set flags. This is gross, since we are really setting multiple flags.
Ted Kremenekfe5c62d2008-12-11 22:41:47 +0000250 T.setFlag((Token::TokenFlags) Read8(CurPtr));
Ted Kremenek325cd302008-12-03 00:38:03 +0000251
252 // Set the IdentifierInfo* (if any).
Ted Kremenekfe5c62d2008-12-11 22:41:47 +0000253 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtr));
Ted Kremenek325cd302008-12-03 00:38:03 +0000254
255 // Set the SourceLocation. Since all tokens are constructed using a
256 // raw lexer, they will all be offseted from the same FileID.
Ted Kremenekfe5c62d2008-12-11 22:41:47 +0000257 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr)));
Ted Kremenek325cd302008-12-03 00:38:03 +0000258
259 // Finally, read and set the length of the token.
Ted Kremenekc07091c2008-12-12 18:34:08 +0000260 T.setLength(Read32(CurPtr));
Ted Kremenek325cd302008-12-03 00:38:03 +0000261}
262
263//===----------------------------------------------------------------------===//
264// Internal Data Structures for PTH file lookup and resolving identifiers.
265//===----------------------------------------------------------------------===//
266
Ted Kremenek325cd302008-12-03 00:38:03 +0000267
268/// PTHFileLookup - This internal data structure is used by the PTHManager
269/// to map from FileEntry objects managed by FileManager to offsets within
270/// the PTH file.
271namespace {
272class VISIBILITY_HIDDEN PTHFileLookup {
273public:
274 class Val {
Ted Kremenek8309c922008-12-11 23:36:38 +0000275 uint32_t TokenOff;
276 uint32_t PPCondOff;
Ted Kremenek325cd302008-12-03 00:38:03 +0000277
278 public:
Ted Kremenek8309c922008-12-11 23:36:38 +0000279 Val() : TokenOff(~0) {}
280 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {}
Ted Kremenek325cd302008-12-03 00:38:03 +0000281
Ted Kremenek8309c922008-12-11 23:36:38 +0000282 uint32_t getTokenOffset() const {
283 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
284 return TokenOff;
Ted Kremenek325cd302008-12-03 00:38:03 +0000285 }
286
Ted Kremenek8309c922008-12-11 23:36:38 +0000287 uint32_t gettPPCondOffset() const {
288 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
289 return PPCondOff;
290 }
291
292 bool isValid() const { return TokenOff != ~((uint32_t)0); }
Ted Kremenek325cd302008-12-03 00:38:03 +0000293 };
294
295private:
296 llvm::StringMap<Val> FileMap;
297
298public:
299 PTHFileLookup() {};
300
301 Val Lookup(const FileEntry* FE) {
302 const char* s = FE->getName();
303 unsigned size = strlen(s);
304 return FileMap.GetOrCreateValue(s, s+size).getValue();
305 }
306
307 void ReadTable(const char* D) {
308 uint32_t N = Read32(D); // Read the length of the table.
309
310 for ( ; N > 0; --N) { // The rest of the data is the table itself.
311 uint32_t len = Read32(D);
312 const char* s = D;
313 D += len;
Ted Kremenek8309c922008-12-11 23:36:38 +0000314 uint32_t TokenOff = Read32(D);
315 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D));
Ted Kremenek325cd302008-12-03 00:38:03 +0000316 }
317 }
318};
319} // end anonymous namespace
320
321//===----------------------------------------------------------------------===//
322// PTHManager methods.
323//===----------------------------------------------------------------------===//
324
325PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
Ted Kremenek802fbd82008-12-10 19:40:23 +0000326 const char* idDataTable, IdentifierInfo** perIDCache,
Ted Kremenekdb4c8e82008-12-03 01:16:39 +0000327 Preprocessor& pp)
328: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
329 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
Ted Kremenek325cd302008-12-03 00:38:03 +0000330
331PTHManager::~PTHManager() {
332 delete Buf;
333 delete (PTHFileLookup*) FileLookup;
Ted Kremenek93bdc492008-12-04 22:47:11 +0000334 free(PerIDCache);
Ted Kremenek325cd302008-12-03 00:38:03 +0000335}
336
337PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
338
339 // Memory map the PTH file.
340 llvm::OwningPtr<llvm::MemoryBuffer>
341 File(llvm::MemoryBuffer::getFile(file.c_str()));
342
343 if (!File)
344 return 0;
345
346 // Get the buffer ranges and check if there are at least three 32-bit
347 // words at the end of the file.
348 const char* BufBeg = File->getBufferStart();
349 const char* BufEnd = File->getBufferEnd();
350
351 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
352 assert(false && "Invalid PTH file.");
353 return 0; // FIXME: Proper error diagnostic?
354 }
355
356 // Compute the address of the index table at the end of the PTH file.
357 // This table contains the offset of the file lookup table, the
358 // persistent ID -> identifer data table.
359 const char* EndTable = BufEnd - sizeof(uint32_t)*3;
360
361 // Construct the file lookup table. This will be used for mapping from
362 // FileEntry*'s to cached tokens.
363 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
364 const char* FileTable = BufBeg + Read32(FileTableOffset);
365
366 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
367 assert(false && "Invalid PTH file.");
368 return 0; // FIXME: Proper error diagnostic?
369 }
370
371 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
372 FL->ReadTable(FileTable);
373
374 // Get the location of the table mapping from persistent ids to the
375 // data needed to reconstruct identifiers.
376 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
377 const char* IData = BufBeg + Read32(IDTableOffset);
378 if (!(IData > BufBeg && IData < BufEnd)) {
379 assert(false && "Invalid PTH file.");
380 return 0; // FIXME: Proper error diagnostic?
381 }
382
Ted Kremenekdb4c8e82008-12-03 01:16:39 +0000383 // Get the number of IdentifierInfos and pre-allocate the identifier cache.
384 uint32_t NumIds = Read32(IData);
385
386 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc()
387 // so that we in the best case only zero out memory once when the OS returns
388 // us new pages.
389 IdentifierInfo** PerIDCache =
390 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
391
392 if (!PerIDCache) {
393 assert(false && "Could not allocate Persistent ID cache.");
394 return 0;
395 }
396
397 // Create the new lexer.
398 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
Ted Kremenek325cd302008-12-03 00:38:03 +0000399}
400
401IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
402 // Read the persistent ID from the PTH file.
403 uint32_t persistentID = Read32(D);
404
405 // A persistent ID of '0' always maps to NULL.
406 if (!persistentID)
407 return 0;
408
409 // Adjust the persistent ID by subtracting '1' so that it can be used
410 // as an index within a table in the PTH file.
411 --persistentID;
412
413 // Check if the IdentifierInfo has already been resolved.
Ted Kremenek802fbd82008-12-10 19:40:23 +0000414 IdentifierInfo*& II = PerIDCache[persistentID];
Ted Kremenek325cd302008-12-03 00:38:03 +0000415 if (II) return II;
416
417 // Look in the PTH file for the string data for the IdentifierInfo object.
418 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
419 const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
420 assert(IDData < Buf->getBufferEnd());
421
422 // Read the length of the string.
423 uint32_t len = Read32(IDData);
424
425 // Get the IdentifierInfo* with the specified string.
426 II = &ITable.get(IDData, IDData+len);
427 return II;
428}
429
430PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
431
432 if (!FE)
433 return 0;
434
435 // Lookup the FileEntry object in our file lookup data structure. It will
436 // return a variant that indicates whether or not there is an offset within
437 // the PTH file that contains cached tokens.
Ted Kremenek8309c922008-12-11 23:36:38 +0000438 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE);
Ted Kremenek325cd302008-12-03 00:38:03 +0000439
Ted Kremenek8309c922008-12-11 23:36:38 +0000440 if (!FileData.isValid()) // No tokens available.
Ted Kremenek325cd302008-12-03 00:38:03 +0000441 return 0;
442
443 // Compute the offset of the token data within the buffer.
Ted Kremenek8309c922008-12-11 23:36:38 +0000444 const char* data = Buf->getBufferStart() + FileData.getTokenOffset();
Ted Kremenekc07091c2008-12-12 18:34:08 +0000445
446 // Get the location of pp-conditional table.
447 const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset();
448 uint32_t len = Read32(ppcond);
449 if (len == 0) ppcond = 0;
450
Ted Kremenek325cd302008-12-03 00:38:03 +0000451 assert(data < Buf->getBufferEnd());
Ted Kremenekc07091c2008-12-12 18:34:08 +0000452 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
453 *this);
Ted Kremenek325cd302008-12-03 00:38:03 +0000454}