blob: d27073b4dcfde902325bb0a341c108a2282d5315 [file] [log] [blame]
Ted Kremenek85888962008-10-21 00:54:44 +00001//===--- CacheTokens.cpp - Caching of lexer tokens for PCH support --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a possible implementation of PCH support for Clang that is
11// based on caching lexed tokens and identifiers.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Basic/FileManager.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Basic/IdentifierTable.h"
19#include "clang/Basic/Diagnostic.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace clang;
26
27typedef llvm::DenseMap<const FileEntry*,uint64_t> PCHMap;
28typedef llvm::DenseMap<const IdentifierInfo*,uint64_t> IDMap;
29
30static void Emit32(llvm::raw_ostream& Out, uint32_t V) {
Ted Kremeneka3d764c2008-11-26 03:36:26 +000031#if 0
Ted Kremenek85888962008-10-21 00:54:44 +000032 Out << (unsigned char)(V);
33 Out << (unsigned char)(V >> 8);
34 Out << (unsigned char)(V >> 16);
35 Out << (unsigned char)(V >> 24);
Ted Kremeneka3d764c2008-11-26 03:36:26 +000036#else
37 Out << V;
38#endif
39}
40
41static void EmitOffset(llvm::raw_ostream& Out, uint64_t V) {
42 assert(((uint32_t) V) == V && "Offset exceeds 32 bits.");
43 Emit32(Out, (uint32_t) V);
Ted Kremenek85888962008-10-21 00:54:44 +000044}
45
46static void Emit8(llvm::raw_ostream& Out, uint32_t V) {
47 Out << (unsigned char)(V);
48}
49
50static void EmitBuf(llvm::raw_ostream& Out, const char* I, const char* E) {
51 for ( ; I != E ; ++I) Out << *I;
52}
53
54static uint32_t ResolveID(IDMap& IM, uint32_t& idx, const IdentifierInfo* II) {
55 IDMap::iterator I = IM.find(II);
56
57 if (I == IM.end()) {
58 IM[II] = idx;
59 return idx++;
60 }
61
62 return I->second;
63}
64
65static void EmitToken(llvm::raw_ostream& Out, const Token& T,
66 uint32_t& idcount, IDMap& IM) {
67 Emit8(Out, T.getKind());
68 Emit8(Out, T.getFlags());
69 Emit32(Out, ResolveID(IM, idcount, T.getIdentifierInfo()));
70 Emit32(Out, T.getLocation().getRawEncoding());
71 Emit32(Out, T.getLength());
72}
73
74
75static void EmitIdentifier(llvm::raw_ostream& Out, const IdentifierInfo& II) {
76 uint32_t X = (uint32_t) II.getTokenID() << 19;
77 X |= (uint32_t) II.getBuiltinID() << 9;
78 X |= (uint32_t) II.getObjCKeywordID() << 4;
79 if (II.hasMacroDefinition()) X |= 0x8;
80 if (II.isExtensionToken()) X |= 0x4;
81 if (II.isPoisoned()) X |= 0x2;
82 if (II.isCPlusPlusOperatorKeyword()) X |= 0x1;
83
84 Emit32(Out, X);
85}
86
Ted Kremeneka3d764c2008-11-26 03:36:26 +000087static uint64_t EmitIdentifierTable(llvm::raw_fd_ostream& Out,
88 const IdentifierTable& T, const IDMap& IM) {
89
90 // Record the location within the PTH file.
91 uint64_t Off = Out.tell();
Ted Kremenek85888962008-10-21 00:54:44 +000092
93 for (IdentifierTable::const_iterator I=T.begin(), E=T.end(); I!=E; ++I) {
94 const IdentifierInfo& II = I->getValue();
95
96 // Write out the persistent identifier.
97 IDMap::const_iterator IItr = IM.find(&II);
98 if (IItr == IM.end()) continue;
99 Emit32(Out, IItr->second);
100 EmitIdentifier(Out, II);
101
102 // Write out the keyword.
103 unsigned len = I->getKeyLength();
104 Emit32(Out, len);
105 const char* buf = I->getKeyData();
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000106 EmitBuf(Out, buf, buf+len);
Ted Kremenek85888962008-10-21 00:54:44 +0000107 }
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000108
109 return Off;
Ted Kremenek85888962008-10-21 00:54:44 +0000110}
111
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000112static uint64_t EmitFileTable(llvm::raw_fd_ostream& Out, SourceManager& SM,
113 PCHMap& PM) {
114
115 uint64_t off = Out.tell();
116 assert (0 && "Write out the table.");
117 return off;
118}
119
120static uint64_t LexTokens(llvm::raw_fd_ostream& Out, Lexer& L, Preprocessor& PP,
121 uint32_t& idcount, IDMap& IM) {
122
123 // Record the location within the token file.
124 uint64_t off = Out.tell();
125
126 Token Tok;
127
128 do {
129 L.LexFromRawLexer(Tok);
130
131 if (Tok.is(tok::identifier)) {
132 Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
133 }
134 else if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
135 // Special processing for #include. Store the '#' token and lex
136 // the next token.
137 EmitToken(Out, Tok, idcount, IM);
138 L.LexFromRawLexer(Tok);
139
140 // Did we see 'include'/'import'/'include_next'?
141 if (!Tok.is(tok::identifier))
142 continue;
143
144 IdentifierInfo* II = PP.LookUpIdentifierInfo(Tok);
145 Tok.setIdentifierInfo(II);
146 tok::PPKeywordKind K = II->getPPKeywordID();
147
148 if (K == tok::pp_include || K == tok::pp_import ||
149 K == tok::pp_include_next) {
150
151 // Save the 'include' token.
152 EmitToken(Out, Tok, idcount, IM);
153
154 // Lex the next token as an include string.
155 L.setParsingPreprocessorDirective(true);
156 L.LexIncludeFilename(Tok);
157 L.setParsingPreprocessorDirective(false);
158
159 if (Tok.is(tok::identifier))
160 Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
161 }
162 }
163 }
164 while (EmitToken(Out, Tok, idcount, IM), Tok.isNot(tok::eof));
165
166 return off;
167}
Ted Kremenek85888962008-10-21 00:54:44 +0000168
169void clang::CacheTokens(Preprocessor& PP, const std::string& OutFile) {
170 // Lex through the entire file. This will populate SourceManager with
171 // all of the header information.
172 Token Tok;
173 PP.EnterMainSourceFile();
174 do { PP.Lex(Tok); } while (Tok.isNot(tok::eof));
175
176 // Iterate over all the files in SourceManager. Create a lexer
177 // for each file and cache the tokens.
178 SourceManager& SM = PP.getSourceManager();
179 const LangOptions& LOpts = PP.getLangOptions();
180 llvm::raw_ostream& os = llvm::errs();
181
182 PCHMap PM;
183 IDMap IM;
Ted Kremenek85888962008-10-21 00:54:44 +0000184 uint32_t idcount = 0;
185
186 std::string ErrMsg;
Daniel Dunbar26fb2722008-11-13 05:09:21 +0000187 llvm::raw_fd_ostream Out(OutFile.c_str(), true, ErrMsg);
Ted Kremenek85888962008-10-21 00:54:44 +0000188
189 if (!ErrMsg.empty()) {
190 os << "PCH error: " << ErrMsg << "\n";
191 return;
192 }
193
194 for (SourceManager::fileid_iterator I=SM.fileid_begin(), E=SM.fileid_end();
195 I!=E; ++I) {
196
197 const SrcMgr::ContentCache* C = I.getFileIDInfo().getContentCache();
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000198 if (!C) continue;
Ted Kremenek85888962008-10-21 00:54:44 +0000199
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000200 const FileEntry* FE = C->Entry; // Does this entry correspond to a file?
201 if (!FE) continue;
Ted Kremenek85888962008-10-21 00:54:44 +0000202
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000203 PCHMap::iterator PI = PM.find(FE); // Have we already processed this file?
204 if (PI != PM.end()) continue;
205
206 const llvm::MemoryBuffer* B = C->Buffer;
207 if (!B) continue;
208
Ted Kremenek85888962008-10-21 00:54:44 +0000209 Lexer L(SourceLocation::getFileLoc(I.getFileID(), 0), LOpts,
210 B->getBufferStart(), B->getBufferEnd(), B);
Daniel Dunbar31309ab2008-11-26 02:18:33 +0000211
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000212 PM[FE] = LexTokens(Out, L, PP, idcount, IM);
Daniel Dunbar31309ab2008-11-26 02:18:33 +0000213 }
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000214
215 // Write out the identifier table.
216 uint64_t IdTableOff = EmitIdentifierTable(Out, PP.getIdentifierTable(), IM);
Ted Kremenek85888962008-10-21 00:54:44 +0000217
Ted Kremeneka3d764c2008-11-26 03:36:26 +0000218 // Write out the file table.
219 uint64_t FileTableOff = EmitFileTable(Out, SM, PM);
220
221 // Finally, write out the offset table at the end.
222 EmitOffset(Out, IdTableOff);
223 EmitOffset(Out, FileTableOff);
Ted Kremenek85888962008-10-21 00:54:44 +0000224}