blob: 3afbb5b71763380da08a659bb6a4f98ff70b886f [file] [log] [blame]
Ted Kremenek274b2082008-11-12 21:37:15 +00001//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000014#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
Ted Kremenek274b2082008-11-12 21:37:15 +000017#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000019#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000026
Ted Kremenek274b2082008-11-12 21:37:15 +000027using namespace clang;
28
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000029PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
30 PTHManager& PM)
Ted Kremenekcd223442008-12-11 22:41:47 +000031 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
32 PTHMgr(PM),
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000033 NeedsFetching(true) {
34 // Make sure the EofToken is completely clean.
35 EofToken.startToken();
36 }
Ted Kremenek274b2082008-11-12 21:37:15 +000037
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000038Token PTHLexer::GetToken() {
39 // Read the next token, or if we haven't advanced yet, get the last
40 // token read.
41 if (NeedsFetching) {
42 NeedsFetching = false;
43 ReadToken(LastFetched);
44 }
45
46 Token Tok = LastFetched;
Ted Kremenek89d7ee92008-11-20 19:49:00 +000047
48 // If we are in raw mode, zero out identifier pointers. This is
49 // needed for 'pragma poison'. Note that this requires that the Preprocessor
50 // can go back to the original source when it calls getSpelling().
51 if (LexingRawMode && Tok.is(tok::identifier))
52 Tok.setIdentifierInfo(0);
53
54 return Tok;
55}
56
Ted Kremenek274b2082008-11-12 21:37:15 +000057void PTHLexer::Lex(Token& Tok) {
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000058LexNextToken:
Ted Kremenek31aba422008-11-20 16:32:22 +000059 Tok = GetToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000060
Ted Kremenekcd4e2ae2008-11-21 00:58:35 +000061 if (AtLastToken()) {
62 Preprocessor *PPCache = PP;
63
64 if (LexEndOfFile(Tok))
65 return;
66
67 assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
68 return PPCache->Lex(Tok);
69 }
70
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000071 // Don't advance to the next token yet. Check if we are at the
72 // start of a new line and we're processing a directive. If so, we
73 // consume this token twice, once as an tok::eom.
74 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
75 ParsingPreprocessorDirective = false;
76 Tok.setKind(tok::eom);
Ted Kremenek274b2082008-11-12 21:37:15 +000077 MIOpt.ReadToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000078 return;
79 }
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000080
81 // Advance to the next token.
Ted Kremenek31aba422008-11-20 16:32:22 +000082 AdvanceToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000083
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000084 if (Tok.is(tok::hash)) {
85 if (Tok.isAtStartOfLine() && !LexingRawMode) {
Ted Kremenekcd223442008-12-11 22:41:47 +000086 LastHashTokPtr = CurPtr;
87
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000088 PP->HandleDirective(Tok);
89
90 if (PP->isCurrentLexer(this))
91 goto LexNextToken;
92
93 return PP->Lex(Tok);
94 }
95 }
96
Ted Kremenek274b2082008-11-12 21:37:15 +000097 MIOpt.ReadToken();
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000098
99 if (Tok.is(tok::identifier)) {
100 if (LexingRawMode) return;
101 return PP->HandleIdentifier(Tok);
102 }
Ted Kremenek274b2082008-11-12 21:37:15 +0000103}
104
Ted Kremenekcd4e2ae2008-11-21 00:58:35 +0000105bool PTHLexer::LexEndOfFile(Token &Tok) {
106
107 if (ParsingPreprocessorDirective) {
108 ParsingPreprocessorDirective = false;
109 Tok.setKind(tok::eom);
110 MIOpt.ReadToken();
111 return true; // Have a token.
112 }
113
114 if (LexingRawMode) {
115 MIOpt.ReadToken();
116 return true; // Have an eof token.
117 }
118
119 // FIXME: Issue diagnostics similar to Lexer.
120 return PP->HandleEndOfFile(Tok, false);
121}
122
Ted Kremenek274b2082008-11-12 21:37:15 +0000123void PTHLexer::setEOF(Token& Tok) {
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000124 assert(!EofToken.is(tok::eof));
125 Tok = EofToken;
Ted Kremenek274b2082008-11-12 21:37:15 +0000126}
Ted Kremenek17ff58a2008-11-19 22:21:33 +0000127
128void PTHLexer::DiscardToEndOfLine() {
129 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
130 "Must be in a preprocessing directive!");
Ted Kremenek4d35da22008-11-20 01:16:50 +0000131
132 // Already at end-of-file?
Ted Kremenek31aba422008-11-20 16:32:22 +0000133 if (AtLastToken())
Ted Kremenek4d35da22008-11-20 01:16:50 +0000134 return;
135
136 // Find the first token that is not the start of the *current* line.
Ted Kremenekd2bdeed2008-11-21 23:28:56 +0000137 Token T;
138 for (Lex(T); !AtLastToken(); Lex(T))
Ted Kremenek31aba422008-11-20 16:32:22 +0000139 if (GetToken().isAtStartOfLine())
Ted Kremenek4d35da22008-11-20 01:16:50 +0000140 return;
Ted Kremenek17ff58a2008-11-19 22:21:33 +0000141}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000142
143//===----------------------------------------------------------------------===//
144// Utility methods for reading from the mmap'ed PTH file.
145//===----------------------------------------------------------------------===//
146
147static inline uint8_t Read8(const char*& data) {
148 return (uint8_t) *(data++);
149}
150
151static inline uint32_t Read32(const char*& data) {
152 uint32_t V = (uint32_t) Read8(data);
153 V |= (((uint32_t) Read8(data)) << 8);
154 V |= (((uint32_t) Read8(data)) << 16);
155 V |= (((uint32_t) Read8(data)) << 24);
156 return V;
157}
158
159//===----------------------------------------------------------------------===//
160// Token reconstruction from the PTH file.
161//===----------------------------------------------------------------------===//
162
163void PTHLexer::ReadToken(Token& T) {
164 // Clear the token.
165 // FIXME: Setting the flags directly should obviate this step.
166 T.startToken();
167
168 // Read the type of the token.
Ted Kremenekcd223442008-12-11 22:41:47 +0000169 T.setKind((tok::TokenKind) Read8(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000170
171 // Set flags. This is gross, since we are really setting multiple flags.
Ted Kremenekcd223442008-12-11 22:41:47 +0000172 T.setFlag((Token::TokenFlags) Read8(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000173
174 // Set the IdentifierInfo* (if any).
Ted Kremenekcd223442008-12-11 22:41:47 +0000175 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000176
177 // Set the SourceLocation. Since all tokens are constructed using a
178 // raw lexer, they will all be offseted from the same FileID.
Ted Kremenekcd223442008-12-11 22:41:47 +0000179 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr)));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000180
181 // Finally, read and set the length of the token.
Ted Kremenekcd223442008-12-11 22:41:47 +0000182 T.setLength(Read32(CurPtr));
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000183}
184
185//===----------------------------------------------------------------------===//
186// Internal Data Structures for PTH file lookup and resolving identifiers.
187//===----------------------------------------------------------------------===//
188
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000189
190/// PTHFileLookup - This internal data structure is used by the PTHManager
191/// to map from FileEntry objects managed by FileManager to offsets within
192/// the PTH file.
193namespace {
194class VISIBILITY_HIDDEN PTHFileLookup {
195public:
196 class Val {
197 uint32_t v;
198
199 public:
200 Val() : v(~0) {}
201 Val(uint32_t x) : v(x) {}
202
203 operator uint32_t() const {
204 assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
205 return v;
206 }
207
208 Val& operator=(uint32_t x) { v = x; return *this; }
209 bool isValid() const { return v != ~((uint32_t)0); }
210 };
211
212private:
213 llvm::StringMap<Val> FileMap;
214
215public:
216 PTHFileLookup() {};
217
218 Val Lookup(const FileEntry* FE) {
219 const char* s = FE->getName();
220 unsigned size = strlen(s);
221 return FileMap.GetOrCreateValue(s, s+size).getValue();
222 }
223
224 void ReadTable(const char* D) {
225 uint32_t N = Read32(D); // Read the length of the table.
226
227 for ( ; N > 0; --N) { // The rest of the data is the table itself.
228 uint32_t len = Read32(D);
229 const char* s = D;
230 D += len;
231 FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D);
232 }
233 }
234};
235} // end anonymous namespace
236
237//===----------------------------------------------------------------------===//
238// PTHManager methods.
239//===----------------------------------------------------------------------===//
240
241PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
Ted Kremenekcf58e622008-12-10 19:40:23 +0000242 const char* idDataTable, IdentifierInfo** perIDCache,
Ted Kremenek6183e482008-12-03 01:16:39 +0000243 Preprocessor& pp)
244: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
245 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000246
247PTHManager::~PTHManager() {
248 delete Buf;
249 delete (PTHFileLookup*) FileLookup;
Ted Kremenek0e50b6e2008-12-04 22:47:11 +0000250 free(PerIDCache);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000251}
252
253PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
254
255 // Memory map the PTH file.
256 llvm::OwningPtr<llvm::MemoryBuffer>
257 File(llvm::MemoryBuffer::getFile(file.c_str()));
258
259 if (!File)
260 return 0;
261
262 // Get the buffer ranges and check if there are at least three 32-bit
263 // words at the end of the file.
264 const char* BufBeg = File->getBufferStart();
265 const char* BufEnd = File->getBufferEnd();
266
267 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
268 assert(false && "Invalid PTH file.");
269 return 0; // FIXME: Proper error diagnostic?
270 }
271
272 // Compute the address of the index table at the end of the PTH file.
273 // This table contains the offset of the file lookup table, the
274 // persistent ID -> identifer data table.
275 const char* EndTable = BufEnd - sizeof(uint32_t)*3;
276
277 // Construct the file lookup table. This will be used for mapping from
278 // FileEntry*'s to cached tokens.
279 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
280 const char* FileTable = BufBeg + Read32(FileTableOffset);
281
282 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
283 assert(false && "Invalid PTH file.");
284 return 0; // FIXME: Proper error diagnostic?
285 }
286
287 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
288 FL->ReadTable(FileTable);
289
290 // Get the location of the table mapping from persistent ids to the
291 // data needed to reconstruct identifiers.
292 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
293 const char* IData = BufBeg + Read32(IDTableOffset);
294 if (!(IData > BufBeg && IData < BufEnd)) {
295 assert(false && "Invalid PTH file.");
296 return 0; // FIXME: Proper error diagnostic?
297 }
298
Ted Kremenek6183e482008-12-03 01:16:39 +0000299 // Get the number of IdentifierInfos and pre-allocate the identifier cache.
300 uint32_t NumIds = Read32(IData);
301
302 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc()
303 // so that we in the best case only zero out memory once when the OS returns
304 // us new pages.
305 IdentifierInfo** PerIDCache =
306 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
307
308 if (!PerIDCache) {
309 assert(false && "Could not allocate Persistent ID cache.");
310 return 0;
311 }
312
313 // Create the new lexer.
314 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000315}
316
317IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
318 // Read the persistent ID from the PTH file.
319 uint32_t persistentID = Read32(D);
320
321 // A persistent ID of '0' always maps to NULL.
322 if (!persistentID)
323 return 0;
324
325 // Adjust the persistent ID by subtracting '1' so that it can be used
326 // as an index within a table in the PTH file.
327 --persistentID;
328
329 // Check if the IdentifierInfo has already been resolved.
Ted Kremenekcf58e622008-12-10 19:40:23 +0000330 IdentifierInfo*& II = PerIDCache[persistentID];
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000331 if (II) return II;
332
333 // Look in the PTH file for the string data for the IdentifierInfo object.
334 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
335 const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
336 assert(IDData < Buf->getBufferEnd());
337
338 // Read the length of the string.
339 uint32_t len = Read32(IDData);
340
341 // Get the IdentifierInfo* with the specified string.
342 II = &ITable.get(IDData, IDData+len);
343 return II;
344}
345
346PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
347
348 if (!FE)
349 return 0;
350
351 // Lookup the FileEntry object in our file lookup data structure. It will
352 // return a variant that indicates whether or not there is an offset within
353 // the PTH file that contains cached tokens.
354 PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE);
355
356 if (!Off.isValid()) // No tokens available.
357 return 0;
358
359 // Compute the offset of the token data within the buffer.
360 const char* data = Buf->getBufferStart() + Off;
361 assert(data < Buf->getBufferEnd());
362 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this);
363}