blob: dc8f842cec23d1f42516b5f16d73bde2db0cf3c8 [file] [log] [blame]
Ted Kremenek274b2082008-11-12 21:37:15 +00001//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000014#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
Ted Kremenek274b2082008-11-12 21:37:15 +000017#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000019#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000026
Ted Kremenek274b2082008-11-12 21:37:15 +000027using namespace clang;
28
Ted Kremenek8f174e12008-12-23 02:52:12 +000029#define DISK_TOKEN_SIZE (2+4+4+2)
Ted Kremenek268ee702008-12-12 18:34:08 +000030
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000031//===----------------------------------------------------------------------===//
32// Utility methods for reading from the mmap'ed PTH file.
33//===----------------------------------------------------------------------===//
34
35static inline uint8_t Read8(const char*& data) {
36 return (uint8_t) *(data++);
37}
38
39static inline uint32_t Read32(const char*& data) {
40 uint32_t V = (uint32_t) Read8(data);
41 V |= (((uint32_t) Read8(data)) << 8);
42 V |= (((uint32_t) Read8(data)) << 16);
43 V |= (((uint32_t) Read8(data)) << 24);
44 return V;
45}
46
Ted Kremeneke5680f32008-12-23 01:30:52 +000047//===----------------------------------------------------------------------===//
48// PTHLexer methods.
49//===----------------------------------------------------------------------===//
50
51PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
52 const char* ppcond, PTHManager& PM)
53 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
54 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {}
55
56void PTHLexer::Lex(Token& Tok) {
57LexNextToken:
Ted Kremeneke5680f32008-12-23 01:30:52 +000058
Ted Kremenek866bdf72008-12-23 02:30:15 +000059 //===--------------------------------------==//
60 // Read the raw token data.
61 //===--------------------------------------==//
62
63 // Shadow CurPtr into an automatic variable.
64 const unsigned char *CurPtrShadow = (const unsigned char*) CurPtr;
65
66 // Read in the data for the token. 14 bytes in total.
67 tok::TokenKind k = (tok::TokenKind) CurPtrShadow[0];
68 Token::TokenFlags flags = (Token::TokenFlags) CurPtrShadow[1];
69
Ted Kremenek6b1c9702008-12-23 18:27:26 +000070 uint32_t perID = ((uint32_t) CurPtrShadow[2])
Ted Kremenek866bdf72008-12-23 02:30:15 +000071 | (((uint32_t) CurPtrShadow[3]) << 8)
72 | (((uint32_t) CurPtrShadow[4]) << 16)
73 | (((uint32_t) CurPtrShadow[5]) << 24);
74
Ted Kremenek866bdf72008-12-23 02:30:15 +000075 uint32_t FileOffset = ((uint32_t) CurPtrShadow[6])
76 | (((uint32_t) CurPtrShadow[7]) << 8)
77 | (((uint32_t) CurPtrShadow[8]) << 16)
78 | (((uint32_t) CurPtrShadow[9]) << 24);
79
80 uint32_t Len = ((uint32_t) CurPtrShadow[10])
Ted Kremenek8f174e12008-12-23 02:52:12 +000081 | (((uint32_t) CurPtrShadow[11]) << 8);
Ted Kremenek866bdf72008-12-23 02:30:15 +000082
83 CurPtr = (const char*) (CurPtrShadow + DISK_TOKEN_SIZE);
84
85 //===--------------------------------------==//
86 // Construct the token itself.
87 //===--------------------------------------==//
88
89 Tok.startToken();
90 Tok.setKind(k);
91 Tok.setFlag(flags);
Ted Kremenek6b1c9702008-12-23 18:27:26 +000092 Tok.setIdentifierInfo(perID ? PTHMgr.GetIdentifierInfo(perID-1) : 0);
Ted Kremenek866bdf72008-12-23 02:30:15 +000093 Tok.setLocation(SourceLocation::getFileLoc(FileID, FileOffset));
94 Tok.setLength(Len);
95
96 //===--------------------------------------==//
97 // Process the token.
98 //===--------------------------------------==//
Ted Kremeneke5680f32008-12-23 01:30:52 +000099
100 if (Tok.is(tok::eof)) {
101 // Save the end-of-file token.
102 EofToken = Tok;
103
104 Preprocessor *PPCache = PP;
105
106 if (LexEndOfFile(Tok))
107 return;
108
109 assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
110 return PPCache->Lex(Tok);
111 }
112
113 MIOpt.ReadToken();
114
115 if (Tok.is(tok::eom)) {
116 ParsingPreprocessorDirective = false;
117 return;
118 }
119
120#if 0
121 SourceManager& SM = PP->getSourceManager();
122 SourceLocation L = Tok.getLocation();
123
124 static const char* last = 0;
125 const char* next = SM.getContentCacheForLoc(L)->Entry->getName();
126 if (next != last) {
127 last = next;
128 llvm::cerr << next << '\n';
129 }
130
131 llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " <<
132 SM.getLogicalColumnNumber(L) << '\n';
133#endif
134
135 if (Tok.is(tok::hash)) {
136 if (Tok.isAtStartOfLine()) {
137 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
138 if (!LexingRawMode) {
139 PP->HandleDirective(Tok);
140
141 if (PP->isCurrentLexer(this))
142 goto LexNextToken;
143
144 return PP->Lex(Tok);
145 }
146 }
147 }
148
149 if (Tok.is(tok::identifier)) {
150 if (LexingRawMode) {
151 Tok.setIdentifierInfo(0);
152 return;
153 }
154
155 return PP->HandleIdentifier(Tok);
156 }
157
158
159 assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective);
160}
161
162// FIXME: This method can just be inlined into Lex().
163bool PTHLexer::LexEndOfFile(Token &Tok) {
164 assert(!ParsingPreprocessorDirective);
165 assert(!LexingRawMode);
166
167 // FIXME: Issue diagnostics similar to Lexer.
168 return PP->HandleEndOfFile(Tok, false);
169}
170
171// FIXME: We can just grab the last token instead of storing a copy
172// into EofToken.
173void PTHLexer::setEOF(Token& Tok) {
174 assert(!EofToken.is(tok::eof));
175 Tok = EofToken;
176}
177
178void PTHLexer::DiscardToEndOfLine() {
179 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
180 "Must be in a preprocessing directive!");
181
182 // We assume that if the preprocessor wishes to discard to the end of
183 // the line that it also means to end the current preprocessor directive.
184 ParsingPreprocessorDirective = false;
185
186 // Skip tokens by only peeking at their token kind and the flags.
187 // We don't need to actually reconstruct full tokens from the token buffer.
188 // This saves some copies and it also reduces IdentifierInfo* lookup.
189 const char* p = CurPtr;
190 while (1) {
191 // Read the token kind. Are we at the end of the file?
192 tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
193 if (x == tok::eof) break;
194
195 // Read the token flags. Are we at the start of the next line?
196 Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
197 if (y & Token::StartOfLine) break;
198
199 // Skip to the next token.
200 p += DISK_TOKEN_SIZE;
201 }
202
203 CurPtr = p;
204}
205
Ted Kremenek268ee702008-12-12 18:34:08 +0000206/// SkipBlock - Used by Preprocessor to skip the current conditional block.
207bool PTHLexer::SkipBlock() {
208 assert(CurPPCondPtr && "No cached PP conditional information.");
209 assert(LastHashTokPtr && "No known '#' token.");
210
Ted Kremenek41a26602008-12-12 22:05:38 +0000211 const char* HashEntryI = 0;
Ted Kremenek268ee702008-12-12 18:34:08 +0000212 uint32_t Offset;
213 uint32_t TableIdx;
214
215 do {
Ted Kremenek41a26602008-12-12 22:05:38 +0000216 // Read the token offset from the side-table.
Ted Kremenek268ee702008-12-12 18:34:08 +0000217 Offset = Read32(CurPPCondPtr);
Ted Kremenek41a26602008-12-12 22:05:38 +0000218
219 // Read the target table index from the side-table.
Ted Kremenek268ee702008-12-12 18:34:08 +0000220 TableIdx = Read32(CurPPCondPtr);
Ted Kremenek41a26602008-12-12 22:05:38 +0000221
222 // Compute the actual memory address of the '#' token data for this entry.
223 HashEntryI = TokBuf + Offset;
224
225 // Optmization: "Sibling jumping". #if...#else...#endif blocks can
226 // contain nested blocks. In the side-table we can jump over these
227 // nested blocks instead of doing a linear search if the next "sibling"
228 // entry is not at a location greater than LastHashTokPtr.
229 if (HashEntryI < LastHashTokPtr && TableIdx) {
230 // In the side-table we are still at an entry for a '#' token that
231 // is earlier than the last one we saw. Check if the location we would
232 // stride gets us closer.
233 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
234 assert(NextPPCondPtr >= CurPPCondPtr);
235 // Read where we should jump to.
236 uint32_t TmpOffset = Read32(NextPPCondPtr);
237 const char* HashEntryJ = TokBuf + TmpOffset;
238
239 if (HashEntryJ <= LastHashTokPtr) {
240 // Jump directly to the next entry in the side table.
241 HashEntryI = HashEntryJ;
242 Offset = TmpOffset;
243 TableIdx = Read32(NextPPCondPtr);
244 CurPPCondPtr = NextPPCondPtr;
245 }
246 }
Ted Kremenek268ee702008-12-12 18:34:08 +0000247 }
Ted Kremenek41a26602008-12-12 22:05:38 +0000248 while (HashEntryI < LastHashTokPtr);
249 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
Ted Kremenek268ee702008-12-12 18:34:08 +0000250 assert(TableIdx && "No jumping from #endifs.");
251
252 // Update our side-table iterator.
253 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
254 assert(NextPPCondPtr >= CurPPCondPtr);
255 CurPPCondPtr = NextPPCondPtr;
256
257 // Read where we should jump to.
Ted Kremenek41a26602008-12-12 22:05:38 +0000258 HashEntryI = TokBuf + Read32(NextPPCondPtr);
Ted Kremenek268ee702008-12-12 18:34:08 +0000259 uint32_t NextIdx = Read32(NextPPCondPtr);
260
261 // By construction NextIdx will be zero if this is a #endif. This is useful
262 // to know to obviate lexing another token.
263 bool isEndif = NextIdx == 0;
Ted Kremenek268ee702008-12-12 18:34:08 +0000264
265 // This case can occur when we see something like this:
266 //
267 // #if ...
268 // /* a comment or nothing */
269 // #elif
270 //
271 // If we are skipping the first #if block it will be the case that CurPtr
272 // already points 'elif'. Just return.
273
Ted Kremenek41a26602008-12-12 22:05:38 +0000274 if (CurPtr > HashEntryI) {
275 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
Ted Kremenek268ee702008-12-12 18:34:08 +0000276 // Did we reach a #endif? If so, go ahead and consume that token as well.
277 if (isEndif)
Ted Kremeneke5680f32008-12-23 01:30:52 +0000278 CurPtr += DISK_TOKEN_SIZE*2;
Ted Kremenek268ee702008-12-12 18:34:08 +0000279 else
Ted Kremenek41a26602008-12-12 22:05:38 +0000280 LastHashTokPtr = HashEntryI;
Ted Kremenek268ee702008-12-12 18:34:08 +0000281
282 return isEndif;
283 }
284
285 // Otherwise, we need to advance. Update CurPtr to point to the '#' token.
Ted Kremenek41a26602008-12-12 22:05:38 +0000286 CurPtr = HashEntryI;
Ted Kremenek268ee702008-12-12 18:34:08 +0000287
288 // Update the location of the last observed '#'. This is useful if we
289 // are skipping multiple blocks.
290 LastHashTokPtr = CurPtr;
Ted Kremenek268ee702008-12-12 18:34:08 +0000291
Ted Kremeneke5680f32008-12-23 01:30:52 +0000292 // Skip the '#' token.
293 assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash);
294 CurPtr += DISK_TOKEN_SIZE;
295
Ted Kremenek268ee702008-12-12 18:34:08 +0000296 // Did we reach a #endif? If so, go ahead and consume that token as well.
Ted Kremeneke5680f32008-12-23 01:30:52 +0000297 if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
Ted Kremenek268ee702008-12-12 18:34:08 +0000298
299 return isEndif;
300}
301
Ted Kremenek30a12ec2008-12-17 23:36:32 +0000302SourceLocation PTHLexer::getSourceLocation() {
303 // getLocation is not on the hot path. It is used to get the location of
304 // the next token when transitioning back to this lexer when done
305 // handling a #included file. Just read the necessary data from the token
306 // data buffer to construct the SourceLocation object.
307 // NOTE: This is a virtual function; hence it is defined out-of-line.
308 const char* p = CurPtr + (1 + 1 + 4);
309 uint32_t offset =
310 ((uint32_t) ((uint8_t) p[0]))
311 | (((uint32_t) ((uint8_t) p[1])) << 8)
312 | (((uint32_t) ((uint8_t) p[2])) << 16)
313 | (((uint32_t) ((uint8_t) p[3])) << 24);
314 return SourceLocation::getFileLoc(FileID, offset);
315}
316
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000317//===----------------------------------------------------------------------===//
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000318// Internal Data Structures for PTH file lookup and resolving identifiers.
319//===----------------------------------------------------------------------===//
320
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000321
322/// PTHFileLookup - This internal data structure is used by the PTHManager
323/// to map from FileEntry objects managed by FileManager to offsets within
324/// the PTH file.
325namespace {
326class VISIBILITY_HIDDEN PTHFileLookup {
327public:
328 class Val {
Ted Kremenekfb645b62008-12-11 23:36:38 +0000329 uint32_t TokenOff;
330 uint32_t PPCondOff;
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000331
332 public:
Ted Kremenekfb645b62008-12-11 23:36:38 +0000333 Val() : TokenOff(~0) {}
334 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000335
Ted Kremenekfb645b62008-12-11 23:36:38 +0000336 uint32_t getTokenOffset() const {
337 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
338 return TokenOff;
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000339 }
340
Ted Kremenekfb645b62008-12-11 23:36:38 +0000341 uint32_t gettPPCondOffset() const {
342 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
343 return PPCondOff;
344 }
345
346 bool isValid() const { return TokenOff != ~((uint32_t)0); }
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000347 };
348
349private:
350 llvm::StringMap<Val> FileMap;
351
352public:
353 PTHFileLookup() {};
354
355 Val Lookup(const FileEntry* FE) {
356 const char* s = FE->getName();
357 unsigned size = strlen(s);
358 return FileMap.GetOrCreateValue(s, s+size).getValue();
359 }
360
361 void ReadTable(const char* D) {
362 uint32_t N = Read32(D); // Read the length of the table.
363
364 for ( ; N > 0; --N) { // The rest of the data is the table itself.
365 uint32_t len = Read32(D);
366 const char* s = D;
367 D += len;
Ted Kremenekfb645b62008-12-11 23:36:38 +0000368 uint32_t TokenOff = Read32(D);
369 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000370 }
371 }
372};
373} // end anonymous namespace
374
375//===----------------------------------------------------------------------===//
376// PTHManager methods.
377//===----------------------------------------------------------------------===//
378
379PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
Ted Kremenekcf58e622008-12-10 19:40:23 +0000380 const char* idDataTable, IdentifierInfo** perIDCache,
Ted Kremenek6183e482008-12-03 01:16:39 +0000381 Preprocessor& pp)
382: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
383 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000384
385PTHManager::~PTHManager() {
386 delete Buf;
387 delete (PTHFileLookup*) FileLookup;
Ted Kremenek0e50b6e2008-12-04 22:47:11 +0000388 free(PerIDCache);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000389}
390
391PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
392
393 // Memory map the PTH file.
394 llvm::OwningPtr<llvm::MemoryBuffer>
395 File(llvm::MemoryBuffer::getFile(file.c_str()));
396
397 if (!File)
398 return 0;
399
400 // Get the buffer ranges and check if there are at least three 32-bit
401 // words at the end of the file.
402 const char* BufBeg = File->getBufferStart();
403 const char* BufEnd = File->getBufferEnd();
404
405 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
406 assert(false && "Invalid PTH file.");
407 return 0; // FIXME: Proper error diagnostic?
408 }
409
410 // Compute the address of the index table at the end of the PTH file.
411 // This table contains the offset of the file lookup table, the
412 // persistent ID -> identifer data table.
413 const char* EndTable = BufEnd - sizeof(uint32_t)*3;
414
415 // Construct the file lookup table. This will be used for mapping from
416 // FileEntry*'s to cached tokens.
417 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
418 const char* FileTable = BufBeg + Read32(FileTableOffset);
419
420 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
421 assert(false && "Invalid PTH file.");
422 return 0; // FIXME: Proper error diagnostic?
423 }
424
425 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
426 FL->ReadTable(FileTable);
427
428 // Get the location of the table mapping from persistent ids to the
429 // data needed to reconstruct identifiers.
430 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
431 const char* IData = BufBeg + Read32(IDTableOffset);
432 if (!(IData > BufBeg && IData < BufEnd)) {
433 assert(false && "Invalid PTH file.");
434 return 0; // FIXME: Proper error diagnostic?
435 }
436
Ted Kremenek6183e482008-12-03 01:16:39 +0000437 // Get the number of IdentifierInfos and pre-allocate the identifier cache.
438 uint32_t NumIds = Read32(IData);
439
440 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc()
441 // so that we in the best case only zero out memory once when the OS returns
442 // us new pages.
443 IdentifierInfo** PerIDCache =
444 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
445
446 if (!PerIDCache) {
447 assert(false && "Could not allocate Persistent ID cache.");
448 return 0;
449 }
450
451 // Create the new lexer.
452 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000453}
454
Ted Kremenek866bdf72008-12-23 02:30:15 +0000455IdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) {
456
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000457 // Check if the IdentifierInfo has already been resolved.
Ted Kremenekcf58e622008-12-10 19:40:23 +0000458 IdentifierInfo*& II = PerIDCache[persistentID];
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000459 if (II) return II;
460
461 // Look in the PTH file for the string data for the IdentifierInfo object.
462 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
463 const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
464 assert(IDData < Buf->getBufferEnd());
465
466 // Read the length of the string.
467 uint32_t len = Read32(IDData);
468
469 // Get the IdentifierInfo* with the specified string.
470 II = &ITable.get(IDData, IDData+len);
471 return II;
472}
473
474PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
475
476 if (!FE)
477 return 0;
478
479 // Lookup the FileEntry object in our file lookup data structure. It will
480 // return a variant that indicates whether or not there is an offset within
481 // the PTH file that contains cached tokens.
Ted Kremenekfb645b62008-12-11 23:36:38 +0000482 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000483
Ted Kremenekfb645b62008-12-11 23:36:38 +0000484 if (!FileData.isValid()) // No tokens available.
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000485 return 0;
486
487 // Compute the offset of the token data within the buffer.
Ted Kremenekfb645b62008-12-11 23:36:38 +0000488 const char* data = Buf->getBufferStart() + FileData.getTokenOffset();
Ted Kremenek268ee702008-12-12 18:34:08 +0000489
490 // Get the location of pp-conditional table.
491 const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset();
492 uint32_t len = Read32(ppcond);
493 if (len == 0) ppcond = 0;
494
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000495 assert(data < Buf->getBufferEnd());
Ted Kremenek268ee702008-12-12 18:34:08 +0000496 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
497 *this);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000498}