blob: ebbc96801f6eaf6e1ce4a4295bd9d456d5588abd [file] [log] [blame]
Ted Kremenek274b2082008-11-12 21:37:15 +00001//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000014#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
Ted Kremenek274b2082008-11-12 21:37:15 +000017#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000019#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000026
Ted Kremenek274b2082008-11-12 21:37:15 +000027using namespace clang;
28
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000029PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
30 PTHManager& PM)
Ted Kremenek74153262008-12-11 20:39:48 +000031 : PreprocessorLexer(&pp, fileloc), TokBuf(D), PTHMgr(PM),
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000032 NeedsFetching(true) {
33 // Make sure the EofToken is completely clean.
34 EofToken.startToken();
35 }
Ted Kremenek274b2082008-11-12 21:37:15 +000036
Ted Kremenek0c6a77b2008-12-03 00:38:03 +000037Token PTHLexer::GetToken() {
38 // Read the next token, or if we haven't advanced yet, get the last
39 // token read.
40 if (NeedsFetching) {
41 NeedsFetching = false;
42 ReadToken(LastFetched);
43 }
44
45 Token Tok = LastFetched;
Ted Kremenek89d7ee92008-11-20 19:49:00 +000046
47 // If we are in raw mode, zero out identifier pointers. This is
48 // needed for 'pragma poison'. Note that this requires that the Preprocessor
49 // can go back to the original source when it calls getSpelling().
50 if (LexingRawMode && Tok.is(tok::identifier))
51 Tok.setIdentifierInfo(0);
52
53 return Tok;
54}
55
Ted Kremenek274b2082008-11-12 21:37:15 +000056void PTHLexer::Lex(Token& Tok) {
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000057LexNextToken:
Ted Kremenek31aba422008-11-20 16:32:22 +000058 Tok = GetToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000059
Ted Kremenekcd4e2ae2008-11-21 00:58:35 +000060 if (AtLastToken()) {
61 Preprocessor *PPCache = PP;
62
63 if (LexEndOfFile(Tok))
64 return;
65
66 assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
67 return PPCache->Lex(Tok);
68 }
69
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000070 // Don't advance to the next token yet. Check if we are at the
71 // start of a new line and we're processing a directive. If so, we
72 // consume this token twice, once as an tok::eom.
73 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
74 ParsingPreprocessorDirective = false;
75 Tok.setKind(tok::eom);
Ted Kremenek274b2082008-11-12 21:37:15 +000076 MIOpt.ReadToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000077 return;
78 }
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000079
80 // Advance to the next token.
Ted Kremenek31aba422008-11-20 16:32:22 +000081 AdvanceToken();
Ted Kremenek274b2082008-11-12 21:37:15 +000082
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000083 if (Tok.is(tok::hash)) {
84 if (Tok.isAtStartOfLine() && !LexingRawMode) {
85 PP->HandleDirective(Tok);
86
87 if (PP->isCurrentLexer(this))
88 goto LexNextToken;
89
90 return PP->Lex(Tok);
91 }
92 }
93
Ted Kremenek274b2082008-11-12 21:37:15 +000094 MIOpt.ReadToken();
Ted Kremenekd6f53dc2008-11-20 07:58:05 +000095
96 if (Tok.is(tok::identifier)) {
97 if (LexingRawMode) return;
98 return PP->HandleIdentifier(Tok);
99 }
Ted Kremenek274b2082008-11-12 21:37:15 +0000100}
101
Ted Kremenekcd4e2ae2008-11-21 00:58:35 +0000102bool PTHLexer::LexEndOfFile(Token &Tok) {
103
104 if (ParsingPreprocessorDirective) {
105 ParsingPreprocessorDirective = false;
106 Tok.setKind(tok::eom);
107 MIOpt.ReadToken();
108 return true; // Have a token.
109 }
110
111 if (LexingRawMode) {
112 MIOpt.ReadToken();
113 return true; // Have an eof token.
114 }
115
116 // FIXME: Issue diagnostics similar to Lexer.
117 return PP->HandleEndOfFile(Tok, false);
118}
119
Ted Kremenek274b2082008-11-12 21:37:15 +0000120void PTHLexer::setEOF(Token& Tok) {
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000121 assert(!EofToken.is(tok::eof));
122 Tok = EofToken;
Ted Kremenek274b2082008-11-12 21:37:15 +0000123}
Ted Kremenek17ff58a2008-11-19 22:21:33 +0000124
125void PTHLexer::DiscardToEndOfLine() {
126 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
127 "Must be in a preprocessing directive!");
Ted Kremenek4d35da22008-11-20 01:16:50 +0000128
129 // Already at end-of-file?
Ted Kremenek31aba422008-11-20 16:32:22 +0000130 if (AtLastToken())
Ted Kremenek4d35da22008-11-20 01:16:50 +0000131 return;
132
133 // Find the first token that is not the start of the *current* line.
Ted Kremenekd2bdeed2008-11-21 23:28:56 +0000134 Token T;
135 for (Lex(T); !AtLastToken(); Lex(T))
Ted Kremenek31aba422008-11-20 16:32:22 +0000136 if (GetToken().isAtStartOfLine())
Ted Kremenek4d35da22008-11-20 01:16:50 +0000137 return;
Ted Kremenek17ff58a2008-11-19 22:21:33 +0000138}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000139
140//===----------------------------------------------------------------------===//
141// Utility methods for reading from the mmap'ed PTH file.
142//===----------------------------------------------------------------------===//
143
144static inline uint8_t Read8(const char*& data) {
145 return (uint8_t) *(data++);
146}
147
148static inline uint32_t Read32(const char*& data) {
149 uint32_t V = (uint32_t) Read8(data);
150 V |= (((uint32_t) Read8(data)) << 8);
151 V |= (((uint32_t) Read8(data)) << 16);
152 V |= (((uint32_t) Read8(data)) << 24);
153 return V;
154}
155
156//===----------------------------------------------------------------------===//
157// Token reconstruction from the PTH file.
158//===----------------------------------------------------------------------===//
159
160void PTHLexer::ReadToken(Token& T) {
161 // Clear the token.
162 // FIXME: Setting the flags directly should obviate this step.
163 T.startToken();
164
165 // Read the type of the token.
166 T.setKind((tok::TokenKind) Read8(TokBuf));
167
168 // Set flags. This is gross, since we are really setting multiple flags.
169 T.setFlag((Token::TokenFlags) Read8(TokBuf));
170
171 // Set the IdentifierInfo* (if any).
172 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(TokBuf));
173
174 // Set the SourceLocation. Since all tokens are constructed using a
175 // raw lexer, they will all be offseted from the same FileID.
176 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(TokBuf)));
177
178 // Finally, read and set the length of the token.
179 T.setLength(Read32(TokBuf));
180}
181
182//===----------------------------------------------------------------------===//
183// Internal Data Structures for PTH file lookup and resolving identifiers.
184//===----------------------------------------------------------------------===//
185
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000186
187/// PTHFileLookup - This internal data structure is used by the PTHManager
188/// to map from FileEntry objects managed by FileManager to offsets within
189/// the PTH file.
190namespace {
191class VISIBILITY_HIDDEN PTHFileLookup {
192public:
193 class Val {
194 uint32_t v;
195
196 public:
197 Val() : v(~0) {}
198 Val(uint32_t x) : v(x) {}
199
200 operator uint32_t() const {
201 assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
202 return v;
203 }
204
205 Val& operator=(uint32_t x) { v = x; return *this; }
206 bool isValid() const { return v != ~((uint32_t)0); }
207 };
208
209private:
210 llvm::StringMap<Val> FileMap;
211
212public:
213 PTHFileLookup() {};
214
215 Val Lookup(const FileEntry* FE) {
216 const char* s = FE->getName();
217 unsigned size = strlen(s);
218 return FileMap.GetOrCreateValue(s, s+size).getValue();
219 }
220
221 void ReadTable(const char* D) {
222 uint32_t N = Read32(D); // Read the length of the table.
223
224 for ( ; N > 0; --N) { // The rest of the data is the table itself.
225 uint32_t len = Read32(D);
226 const char* s = D;
227 D += len;
228 FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D);
229 }
230 }
231};
232} // end anonymous namespace
233
234//===----------------------------------------------------------------------===//
235// PTHManager methods.
236//===----------------------------------------------------------------------===//
237
238PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
Ted Kremenekcf58e622008-12-10 19:40:23 +0000239 const char* idDataTable, IdentifierInfo** perIDCache,
Ted Kremenek6183e482008-12-03 01:16:39 +0000240 Preprocessor& pp)
241: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
242 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000243
244PTHManager::~PTHManager() {
245 delete Buf;
246 delete (PTHFileLookup*) FileLookup;
Ted Kremenek0e50b6e2008-12-04 22:47:11 +0000247 free(PerIDCache);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000248}
249
250PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
251
252 // Memory map the PTH file.
253 llvm::OwningPtr<llvm::MemoryBuffer>
254 File(llvm::MemoryBuffer::getFile(file.c_str()));
255
256 if (!File)
257 return 0;
258
259 // Get the buffer ranges and check if there are at least three 32-bit
260 // words at the end of the file.
261 const char* BufBeg = File->getBufferStart();
262 const char* BufEnd = File->getBufferEnd();
263
264 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
265 assert(false && "Invalid PTH file.");
266 return 0; // FIXME: Proper error diagnostic?
267 }
268
269 // Compute the address of the index table at the end of the PTH file.
270 // This table contains the offset of the file lookup table, the
271 // persistent ID -> identifer data table.
272 const char* EndTable = BufEnd - sizeof(uint32_t)*3;
273
274 // Construct the file lookup table. This will be used for mapping from
275 // FileEntry*'s to cached tokens.
276 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
277 const char* FileTable = BufBeg + Read32(FileTableOffset);
278
279 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
280 assert(false && "Invalid PTH file.");
281 return 0; // FIXME: Proper error diagnostic?
282 }
283
284 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
285 FL->ReadTable(FileTable);
286
287 // Get the location of the table mapping from persistent ids to the
288 // data needed to reconstruct identifiers.
289 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
290 const char* IData = BufBeg + Read32(IDTableOffset);
291 if (!(IData > BufBeg && IData < BufEnd)) {
292 assert(false && "Invalid PTH file.");
293 return 0; // FIXME: Proper error diagnostic?
294 }
295
Ted Kremenek6183e482008-12-03 01:16:39 +0000296 // Get the number of IdentifierInfos and pre-allocate the identifier cache.
297 uint32_t NumIds = Read32(IData);
298
299 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc()
300 // so that we in the best case only zero out memory once when the OS returns
301 // us new pages.
302 IdentifierInfo** PerIDCache =
303 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
304
305 if (!PerIDCache) {
306 assert(false && "Could not allocate Persistent ID cache.");
307 return 0;
308 }
309
310 // Create the new lexer.
311 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000312}
313
314IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
315 // Read the persistent ID from the PTH file.
316 uint32_t persistentID = Read32(D);
317
318 // A persistent ID of '0' always maps to NULL.
319 if (!persistentID)
320 return 0;
321
322 // Adjust the persistent ID by subtracting '1' so that it can be used
323 // as an index within a table in the PTH file.
324 --persistentID;
325
326 // Check if the IdentifierInfo has already been resolved.
Ted Kremenekcf58e622008-12-10 19:40:23 +0000327 IdentifierInfo*& II = PerIDCache[persistentID];
Ted Kremenek0c6a77b2008-12-03 00:38:03 +0000328 if (II) return II;
329
330 // Look in the PTH file for the string data for the IdentifierInfo object.
331 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
332 const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
333 assert(IDData < Buf->getBufferEnd());
334
335 // Read the length of the string.
336 uint32_t len = Read32(IDData);
337
338 // Get the IdentifierInfo* with the specified string.
339 II = &ITable.get(IDData, IDData+len);
340 return II;
341}
342
343PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
344
345 if (!FE)
346 return 0;
347
348 // Lookup the FileEntry object in our file lookup data structure. It will
349 // return a variant that indicates whether or not there is an offset within
350 // the PTH file that contains cached tokens.
351 PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE);
352
353 if (!Off.isValid()) // No tokens available.
354 return 0;
355
356 // Compute the offset of the token data within the buffer.
357 const char* data = Buf->getBufferStart() + Off;
358 assert(data < Buf->getBufferEnd());
359 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this);
360}