blob: 846a17e8daf12c8b3fb6a5eafcd25d9c22d83cb9 [file] [log] [blame]
Ted Kremenekca820862008-11-12 21:37:15 +00001//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
Ted Kremenek325cd302008-12-03 00:38:03 +000014#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
Ted Kremenekca820862008-11-12 21:37:15 +000017#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
Ted Kremenek325cd302008-12-03 00:38:03 +000019#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
26#include "llvm/ADT/DenseMap.h"
27
Ted Kremenekca820862008-11-12 21:37:15 +000028using namespace clang;
29
Ted Kremenek325cd302008-12-03 00:38:03 +000030PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
31 PTHManager& PM)
32 : TokBuf(D), PreprocessorLexer(&pp, fileloc), CurTokenIdx(0), PTHMgr(PM),
33 NeedsFetching(true) {
34 // Make sure the EofToken is completely clean.
35 EofToken.startToken();
36 }
Ted Kremenekca820862008-11-12 21:37:15 +000037
Ted Kremenek325cd302008-12-03 00:38:03 +000038Token PTHLexer::GetToken() {
39 // Read the next token, or if we haven't advanced yet, get the last
40 // token read.
41 if (NeedsFetching) {
42 NeedsFetching = false;
43 ReadToken(LastFetched);
44 }
45
46 Token Tok = LastFetched;
Ted Kremenek444b6bf2008-11-20 19:49:00 +000047
48 // If we are in raw mode, zero out identifier pointers. This is
49 // needed for 'pragma poison'. Note that this requires that the Preprocessor
50 // can go back to the original source when it calls getSpelling().
51 if (LexingRawMode && Tok.is(tok::identifier))
52 Tok.setIdentifierInfo(0);
53
54 return Tok;
55}
56
Ted Kremenekca820862008-11-12 21:37:15 +000057void PTHLexer::Lex(Token& Tok) {
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000058LexNextToken:
Ted Kremenekc37a3a22008-11-20 16:32:22 +000059 Tok = GetToken();
Ted Kremenekca820862008-11-12 21:37:15 +000060
Ted Kremenek4491ce52008-11-21 00:58:35 +000061 if (AtLastToken()) {
62 Preprocessor *PPCache = PP;
63
64 if (LexEndOfFile(Tok))
65 return;
66
67 assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
68 return PPCache->Lex(Tok);
69 }
70
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000071 // Don't advance to the next token yet. Check if we are at the
72 // start of a new line and we're processing a directive. If so, we
73 // consume this token twice, once as an tok::eom.
74 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
75 ParsingPreprocessorDirective = false;
76 Tok.setKind(tok::eom);
Ted Kremenekca820862008-11-12 21:37:15 +000077 MIOpt.ReadToken();
Ted Kremenekca820862008-11-12 21:37:15 +000078 return;
79 }
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000080
81 // Advance to the next token.
Ted Kremenekc37a3a22008-11-20 16:32:22 +000082 AdvanceToken();
Ted Kremenekca820862008-11-12 21:37:15 +000083
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000084 if (Tok.is(tok::hash)) {
85 if (Tok.isAtStartOfLine() && !LexingRawMode) {
86 PP->HandleDirective(Tok);
87
88 if (PP->isCurrentLexer(this))
89 goto LexNextToken;
90
91 return PP->Lex(Tok);
92 }
93 }
94
Ted Kremenekca820862008-11-12 21:37:15 +000095 MIOpt.ReadToken();
Ted Kremeneke7cdb0a2008-11-20 07:58:05 +000096
97 if (Tok.is(tok::identifier)) {
98 if (LexingRawMode) return;
99 return PP->HandleIdentifier(Tok);
100 }
Ted Kremenekca820862008-11-12 21:37:15 +0000101}
102
Ted Kremenek4491ce52008-11-21 00:58:35 +0000103bool PTHLexer::LexEndOfFile(Token &Tok) {
104
105 if (ParsingPreprocessorDirective) {
106 ParsingPreprocessorDirective = false;
107 Tok.setKind(tok::eom);
108 MIOpt.ReadToken();
109 return true; // Have a token.
110 }
111
112 if (LexingRawMode) {
113 MIOpt.ReadToken();
114 return true; // Have an eof token.
115 }
116
117 // FIXME: Issue diagnostics similar to Lexer.
118 return PP->HandleEndOfFile(Tok, false);
119}
120
Ted Kremenekca820862008-11-12 21:37:15 +0000121void PTHLexer::setEOF(Token& Tok) {
Ted Kremenek325cd302008-12-03 00:38:03 +0000122 assert(!EofToken.is(tok::eof));
123 Tok = EofToken;
Ted Kremenekca820862008-11-12 21:37:15 +0000124}
Ted Kremenekb53b1f42008-11-19 22:21:33 +0000125
126void PTHLexer::DiscardToEndOfLine() {
127 assert(ParsingPreprocessorDirective && ParsingFilename == false &&
128 "Must be in a preprocessing directive!");
Ted Kremeneka295ef42008-11-20 01:16:50 +0000129
130 // Already at end-of-file?
Ted Kremenekc37a3a22008-11-20 16:32:22 +0000131 if (AtLastToken())
Ted Kremeneka295ef42008-11-20 01:16:50 +0000132 return;
133
134 // Find the first token that is not the start of the *current* line.
Ted Kremenek5464c8c2008-11-21 23:28:56 +0000135 Token T;
136 for (Lex(T); !AtLastToken(); Lex(T))
Ted Kremenekc37a3a22008-11-20 16:32:22 +0000137 if (GetToken().isAtStartOfLine())
Ted Kremeneka295ef42008-11-20 01:16:50 +0000138 return;
Ted Kremenekb53b1f42008-11-19 22:21:33 +0000139}
Ted Kremenek325cd302008-12-03 00:38:03 +0000140
141//===----------------------------------------------------------------------===//
142// Utility methods for reading from the mmap'ed PTH file.
143//===----------------------------------------------------------------------===//
144
145static inline uint8_t Read8(const char*& data) {
146 return (uint8_t) *(data++);
147}
148
149static inline uint32_t Read32(const char*& data) {
150 uint32_t V = (uint32_t) Read8(data);
151 V |= (((uint32_t) Read8(data)) << 8);
152 V |= (((uint32_t) Read8(data)) << 16);
153 V |= (((uint32_t) Read8(data)) << 24);
154 return V;
155}
156
157//===----------------------------------------------------------------------===//
158// Token reconstruction from the PTH file.
159//===----------------------------------------------------------------------===//
160
161void PTHLexer::ReadToken(Token& T) {
162 // Clear the token.
163 // FIXME: Setting the flags directly should obviate this step.
164 T.startToken();
165
166 // Read the type of the token.
167 T.setKind((tok::TokenKind) Read8(TokBuf));
168
169 // Set flags. This is gross, since we are really setting multiple flags.
170 T.setFlag((Token::TokenFlags) Read8(TokBuf));
171
172 // Set the IdentifierInfo* (if any).
173 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(TokBuf));
174
175 // Set the SourceLocation. Since all tokens are constructed using a
176 // raw lexer, they will all be offseted from the same FileID.
177 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(TokBuf)));
178
179 // Finally, read and set the length of the token.
180 T.setLength(Read32(TokBuf));
181}
182
183//===----------------------------------------------------------------------===//
184// Internal Data Structures for PTH file lookup and resolving identifiers.
185//===----------------------------------------------------------------------===//
186
187typedef llvm::DenseMap<uint32_t, IdentifierInfo*> IDCache;
188
189/// PTHFileLookup - This internal data structure is used by the PTHManager
190/// to map from FileEntry objects managed by FileManager to offsets within
191/// the PTH file.
192namespace {
193class VISIBILITY_HIDDEN PTHFileLookup {
194public:
195 class Val {
196 uint32_t v;
197
198 public:
199 Val() : v(~0) {}
200 Val(uint32_t x) : v(x) {}
201
202 operator uint32_t() const {
203 assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
204 return v;
205 }
206
207 Val& operator=(uint32_t x) { v = x; return *this; }
208 bool isValid() const { return v != ~((uint32_t)0); }
209 };
210
211private:
212 llvm::StringMap<Val> FileMap;
213
214public:
215 PTHFileLookup() {};
216
217 Val Lookup(const FileEntry* FE) {
218 const char* s = FE->getName();
219 unsigned size = strlen(s);
220 return FileMap.GetOrCreateValue(s, s+size).getValue();
221 }
222
223 void ReadTable(const char* D) {
224 uint32_t N = Read32(D); // Read the length of the table.
225
226 for ( ; N > 0; --N) { // The rest of the data is the table itself.
227 uint32_t len = Read32(D);
228 const char* s = D;
229 D += len;
230 FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D);
231 }
232 }
233};
234} // end anonymous namespace
235
236//===----------------------------------------------------------------------===//
237// PTHManager methods.
238//===----------------------------------------------------------------------===//
239
240PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
241 const char* idDataTable, Preprocessor& pp)
242: Buf(buf), PersistentIDCache(0), FileLookup(fileLookup),
243IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
244
245PTHManager::~PTHManager() {
246 delete Buf;
247 delete (PTHFileLookup*) FileLookup;
248 delete (IDCache*) PersistentIDCache;
249}
250
251PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
252
253 // Memory map the PTH file.
254 llvm::OwningPtr<llvm::MemoryBuffer>
255 File(llvm::MemoryBuffer::getFile(file.c_str()));
256
257 if (!File)
258 return 0;
259
260 // Get the buffer ranges and check if there are at least three 32-bit
261 // words at the end of the file.
262 const char* BufBeg = File->getBufferStart();
263 const char* BufEnd = File->getBufferEnd();
264
265 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
266 assert(false && "Invalid PTH file.");
267 return 0; // FIXME: Proper error diagnostic?
268 }
269
270 // Compute the address of the index table at the end of the PTH file.
271 // This table contains the offset of the file lookup table, the
272 // persistent ID -> identifer data table.
273 const char* EndTable = BufEnd - sizeof(uint32_t)*3;
274
275 // Construct the file lookup table. This will be used for mapping from
276 // FileEntry*'s to cached tokens.
277 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
278 const char* FileTable = BufBeg + Read32(FileTableOffset);
279
280 if (!(FileTable > BufBeg && FileTable < BufEnd)) {
281 assert(false && "Invalid PTH file.");
282 return 0; // FIXME: Proper error diagnostic?
283 }
284
285 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
286 FL->ReadTable(FileTable);
287
288 // Get the location of the table mapping from persistent ids to the
289 // data needed to reconstruct identifiers.
290 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
291 const char* IData = BufBeg + Read32(IDTableOffset);
292 if (!(IData > BufBeg && IData < BufEnd)) {
293 assert(false && "Invalid PTH file.");
294 return 0; // FIXME: Proper error diagnostic?
295 }
296
297 return new PTHManager(File.take(), FL.take(), IData, PP);
298}
299
300IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
301 // Read the persistent ID from the PTH file.
302 uint32_t persistentID = Read32(D);
303
304 // A persistent ID of '0' always maps to NULL.
305 if (!persistentID)
306 return 0;
307
308 // Adjust the persistent ID by subtracting '1' so that it can be used
309 // as an index within a table in the PTH file.
310 --persistentID;
311
312 // Check if the IdentifierInfo has already been resolved.
313 if (!PersistentIDCache)
314 PersistentIDCache = new IDCache();
315
316 // FIXME: We can make this an array, but what is the performance tradeoff?
317 IdentifierInfo*& II = (*((IDCache*) PersistentIDCache))[persistentID];
318 if (II) return II;
319
320 // Look in the PTH file for the string data for the IdentifierInfo object.
321 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
322 const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
323 assert(IDData < Buf->getBufferEnd());
324
325 // Read the length of the string.
326 uint32_t len = Read32(IDData);
327
328 // Get the IdentifierInfo* with the specified string.
329 II = &ITable.get(IDData, IDData+len);
330 return II;
331}
332
333PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
334
335 if (!FE)
336 return 0;
337
338 // Lookup the FileEntry object in our file lookup data structure. It will
339 // return a variant that indicates whether or not there is an offset within
340 // the PTH file that contains cached tokens.
341 PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE);
342
343 if (!Off.isValid()) // No tokens available.
344 return 0;
345
346 // Compute the offset of the token data within the buffer.
347 const char* data = Buf->getBufferStart() + Off;
348 assert(data < Buf->getBufferEnd());
349 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this);
350}