blob: 2ca1225b176fc5df879aaffdd240ebc0b8639a51 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Basic/LangOptions.h"
Steve Narofff9e80db2007-10-05 18:42:47 +000018#include "llvm/ADT/FoldingSet.h"
Chris Lattner4b009652007-07-25 00:24:17 +000019using namespace clang;
20
21//===----------------------------------------------------------------------===//
Steve Naroff87c329f2007-08-23 18:16:40 +000022// Token Implementation
23//===----------------------------------------------------------------------===//
24
25/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
26bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
27 return getKind() == tok::identifier &&
28 getIdentifierInfo()->getObjCKeywordID() == objcKey;
29}
30
31/// getObjCKeywordID - Return the ObjC keyword kind.
32tok::ObjCKeywordKind Token::getObjCKeywordID() const {
33 IdentifierInfo *specId = getIdentifierInfo();
34 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
35}
36
37//===----------------------------------------------------------------------===//
Chris Lattner4b009652007-07-25 00:24:17 +000038// IdentifierInfo Implementation
39//===----------------------------------------------------------------------===//
40
41IdentifierInfo::IdentifierInfo() {
42 Macro = 0;
43 TokenID = tok::identifier;
44 PPID = tok::pp_not_keyword;
45 ObjCID = tok::objc_not_keyword;
46 BuiltinID = 0;
47 IsExtension = false;
48 IsPoisoned = false;
49 IsOtherTargetMacro = false;
50 IsCPPOperatorKeyword = false;
51 IsNonPortableBuiltin = false;
52 FETokenInfo = 0;
53}
54
55IdentifierInfo::~IdentifierInfo() {
56 delete Macro;
57}
58
59//===----------------------------------------------------------------------===//
60// IdentifierTable Implementation
61//===----------------------------------------------------------------------===//
62
63IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
64 // Start with space for 8K identifiers.
65 : HashTable(8192) {
66
67 // Populate the identifier table with info about keywords for the current
68 // language.
69 AddKeywords(LangOpts);
70}
71
72//===----------------------------------------------------------------------===//
73// Language Keyword Implementation
74//===----------------------------------------------------------------------===//
75
76/// AddKeyword - This method is used to associate a token ID with specific
77/// identifiers because they are language keywords. This causes the lexer to
78/// automatically map matching identifiers to specialized token codes.
79///
80/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
81/// enabled in the specified langauge, set to 1 if it is an extension
82/// in the specified language, and set to 2 if disabled in the
83/// specified language.
84static void AddKeyword(const char *Keyword, unsigned KWLen,
85 tok::TokenKind TokenCode,
86 int C90, int C99, int CXX, int CXX0x,
87 const LangOptions &LangOpts, IdentifierTable &Table) {
88 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
89 : (LangOpts.C99 ? C99 : C90);
90
91 // Don't add this keyword if disabled in this language or if an extension
92 // and extensions are disabled.
93 if (Flags + LangOpts.NoExtensions >= 2) return;
94
95 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
96 Info.setTokenID(TokenCode);
97 Info.setIsExtensionToken(Flags == 1);
98}
99
100static void AddAlias(const char *Keyword, unsigned KWLen,
101 const char *AliaseeKeyword, unsigned AliaseeKWLen,
102 const LangOptions &LangOpts, IdentifierTable &Table) {
103 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
104 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
105 AliaseeKeyword+AliaseeKWLen);
106 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
107 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
108}
109
110/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
111/// "elif".
112static void AddPPKeyword(tok::PPKeywordKind PPID,
113 const char *Name, unsigned NameLen,
114 IdentifierTable &Table) {
115 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
116}
117
118/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
119/// representations.
120static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
121 tok::TokenKind TokenCode,
122 IdentifierTable &Table) {
123 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
124 Info.setTokenID(TokenCode);
125 Info.setIsCPlusplusOperatorKeyword();
126}
127
128/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
129/// "property".
130static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
131 const char *Name, unsigned NameLen,
132 IdentifierTable &Table) {
133 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
134}
135
136/// AddKeywords - Add all keywords to the symbol table.
137///
138void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
139 enum {
140 C90Shift = 0,
141 EXTC90 = 1 << C90Shift,
142 NOTC90 = 2 << C90Shift,
143 C99Shift = 2,
144 EXTC99 = 1 << C99Shift,
145 NOTC99 = 2 << C99Shift,
146 CPPShift = 4,
147 EXTCPP = 1 << CPPShift,
148 NOTCPP = 2 << CPPShift,
149 CPP0xShift = 6,
150 EXTCPP0x = 1 << CPP0xShift,
151 NOTCPP0x = 2 << CPP0xShift,
152 Mask = 3
153 };
154
155 // Add keywords and tokens for the current language.
156#define KEYWORD(NAME, FLAGS) \
157 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
158 ((FLAGS) >> C90Shift) & Mask, \
159 ((FLAGS) >> C99Shift) & Mask, \
160 ((FLAGS) >> CPPShift) & Mask, \
161 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
162#define ALIAS(NAME, TOK) \
163 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
164#define PPKEYWORD(NAME) \
165 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
166#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
167 if (LangOpts.CXXOperatorNames) \
168 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
169#define OBJC1_AT_KEYWORD(NAME) \
170 if (LangOpts.ObjC1) \
171 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
172#define OBJC2_AT_KEYWORD(NAME) \
173 if (LangOpts.ObjC2) \
174 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
175#include "clang/Basic/TokenKinds.def"
176}
177
178
179//===----------------------------------------------------------------------===//
180// Stats Implementation
181//===----------------------------------------------------------------------===//
182
183/// PrintStats - Print statistics about how well the identifier table is doing
184/// at hashing identifiers.
185void IdentifierTable::PrintStats() const {
186 unsigned NumBuckets = HashTable.getNumBuckets();
187 unsigned NumIdentifiers = HashTable.getNumItems();
188 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
189 unsigned AverageIdentifierSize = 0;
190 unsigned MaxIdentifierLength = 0;
191
192 // TODO: Figure out maximum times an identifier had to probe for -stats.
193 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
194 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
195 unsigned IdLen = I->getKeyLength();
196 AverageIdentifierSize += IdLen;
197 if (MaxIdentifierLength < IdLen)
198 MaxIdentifierLength = IdLen;
199 }
200
201 fprintf(stderr, "\n*** Identifier Table Stats:\n");
202 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
203 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
204 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
205 NumIdentifiers/(double)NumBuckets);
206 fprintf(stderr, "Ave identifier length: %f\n",
207 (AverageIdentifierSize/(double)NumIdentifiers));
208 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
209
210 // Compute statistics about the memory allocated for identifiers.
211 HashTable.getAllocator().PrintStats();
212}
Steve Naroff4ed9d662007-09-27 14:38:14 +0000213
Steve Narofff9e80db2007-10-05 18:42:47 +0000214//===----------------------------------------------------------------------===//
215// SelectorTable Implementation
216//===----------------------------------------------------------------------===//
217
218/// MultiKeywordSelector - One of these variable length records is kept for each
219/// selector containing more than one keyword. We use a folding set
220/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
221/// this class is provided strictly through Selector.
222class MultiKeywordSelector : public llvm::FoldingSetNode {
223public:
224 unsigned NumArgs;
225
226 // Constructor for keyword selectors.
227 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
228 assert((nKeys > 1) && "not a multi-keyword selector");
229 NumArgs = nKeys;
230 // Fill in the trailing keyword array.
231 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
232 for (unsigned i = 0; i != nKeys; ++i)
233 KeyInfo[i] = IIV[i];
234 }
235 // Derive the full selector name, placing the result into methodBuffer.
236 // As a convenience, a pointer to the first character is returned.
237 // Example usage: llvm::SmallString<128> mbuf; Selector->getName(mbuf);
238 char *getName(llvm::SmallVectorImpl<char> &methodBuffer);
239
240 unsigned getNumArgs() const { return NumArgs; }
241
242 typedef IdentifierInfo *const *keyword_iterator;
243 keyword_iterator keyword_begin() const {
244 return reinterpret_cast<keyword_iterator>(this+1);
245 }
246 keyword_iterator keyword_end() const {
247 return keyword_begin()+NumArgs;
248 }
249 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) {
250 assert((i < NumArgs) && "getIdentifierInfoForSlot(): illegal index");
251 return keyword_begin()[i];
252 }
253 static void Profile(llvm::FoldingSetNodeID &ID,
254 keyword_iterator ArgTys, unsigned NumArgs) {
255 ID.AddInteger(NumArgs);
256 if (NumArgs) { // handle keyword selector.
257 for (unsigned i = 0; i != NumArgs; ++i)
258 ID.AddPointer(ArgTys[i]);
259 } else // handle unary selector.
260 ID.AddPointer(ArgTys[0]);
261 }
262 void Profile(llvm::FoldingSetNodeID &ID) {
263 Profile(ID, keyword_begin(), NumArgs);
264 }
265};
266
267unsigned Selector::getNumArgs() const {
268 unsigned IIF = getIdentifierInfoFlag();
269 if (IIF == ZeroArg)
270 return 0;
271 if (IIF == OneArg)
272 return 1;
273 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
274 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
275 return SI->getNumArgs();
276}
277
278IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) {
279 IdentifierInfo *II = getAsIdentifierInfo();
280 if (II) {
281 assert(((argIndex == 0) || (argIndex == 1)) && "illegal keyword index");
282 return II;
283 }
284 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
285 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
286 return SI->getIdentifierInfoForSlot(argIndex);
287}
288
289char *MultiKeywordSelector::getName(llvm::SmallVectorImpl<char> &methodName) {
290 methodName[0] = '\0';
291 keyword_iterator KeyIter = keyword_begin();
292 for (unsigned int i = 0; i < NumArgs; i++) {
293 if (KeyIter[i]) {
294 unsigned KeyLen = KeyIter[i]->getLength();
295 methodName.append(KeyIter[i]->getName(), KeyIter[i]->getName()+KeyLen);
296 }
297 methodName.push_back(':');
298 }
299 methodName.push_back('\0');
300 return &methodName[0];
301}
302
303char *Selector::getName(llvm::SmallVectorImpl<char> &methodName) {
304 methodName[0] = '\0';
305 IdentifierInfo *II = getAsIdentifierInfo();
306 if (II) {
307 unsigned NameLen = II->getLength();
308 methodName.append(II->getName(), II->getName()+NameLen);
309 if (getNumArgs() == 1)
310 methodName.push_back(':');
311 methodName.push_back('\0');
312 } else { // We have a multiple keyword selector (no embedded flags).
313 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
314 SI->getName(methodName);
315 }
316 return &methodName[0];
317}
318
319
320Selector SelectorTable::getKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
321{
322 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
323
324 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
325
326 // Unique selector, to guarantee there is one per name.
327 llvm::FoldingSetNodeID ID;
328 MultiKeywordSelector::Profile(ID, IIV, nKeys);
329
330 void *InsertPos = 0;
331 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos)) {
332 return Selector(SI);
333 }
334 // MultiKeywordSelector objects are not allocated with new because they have a
335 // variable size array (for parameter types) at the end of them.
336 MultiKeywordSelector *SI =
337 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
338 nKeys*sizeof(IdentifierInfo *));
339 new (SI) MultiKeywordSelector(nKeys, IIV);
340 SelTab->InsertNode(SI, InsertPos);
341 return Selector(SI);
342}
343
344Selector SelectorTable::getUnarySelector(IdentifierInfo *ID) {
345 return Selector(ID, 1);
346}
347
348Selector SelectorTable::getNullarySelector(IdentifierInfo *ID) {
349 return Selector(ID, 0);
350}
351
352SelectorTable::SelectorTable() {
353 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
354}
355
356SelectorTable::~SelectorTable() {
357 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
358}
359
360