blob: 865356c7f3d89d68c2a2a39d7c600a046f3c7975 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000018#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000019#include "llvm/ADT/DenseMap.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000020using namespace clang;
21
Chris Lattner4365a7e2007-10-07 07:09:52 +000022static llvm::DenseMap<const IdentifierInfo*, MacroInfo*> Macros;
23
24MacroInfo *IdentifierInfo::getMacroInfoInternal() const {
25 return Macros[this];
26}
27void IdentifierInfo::setMacroInfo(MacroInfo *I) {
28 if (I == 0) {
29 if (HasMacro) {
30 Macros.erase(this);
31 HasMacro = false;
32 }
33 } else {
34 Macros[this] = I;
35 HasMacro = true;
36 }
37}
38
39
Reid Spencer5f016e22007-07-11 17:01:13 +000040//===----------------------------------------------------------------------===//
Steve Naroff861cf3e2007-08-23 18:16:40 +000041// Token Implementation
42//===----------------------------------------------------------------------===//
43
44/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
45bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
46 return getKind() == tok::identifier &&
47 getIdentifierInfo()->getObjCKeywordID() == objcKey;
48}
49
50/// getObjCKeywordID - Return the ObjC keyword kind.
51tok::ObjCKeywordKind Token::getObjCKeywordID() const {
52 IdentifierInfo *specId = getIdentifierInfo();
53 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
54}
55
56//===----------------------------------------------------------------------===//
Reid Spencer5f016e22007-07-11 17:01:13 +000057// IdentifierInfo Implementation
58//===----------------------------------------------------------------------===//
59
60IdentifierInfo::IdentifierInfo() {
Reid Spencer5f016e22007-07-11 17:01:13 +000061 TokenID = tok::identifier;
Reid Spencer5f016e22007-07-11 17:01:13 +000062 ObjCID = tok::objc_not_keyword;
63 BuiltinID = 0;
Chris Lattner4365a7e2007-10-07 07:09:52 +000064 HasMacro = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000065 IsExtension = false;
66 IsPoisoned = false;
67 IsOtherTargetMacro = false;
68 IsCPPOperatorKeyword = false;
Chris Lattner938867c2007-07-19 00:11:19 +000069 IsNonPortableBuiltin = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000070 FETokenInfo = 0;
71}
72
73IdentifierInfo::~IdentifierInfo() {
Chris Lattner4365a7e2007-10-07 07:09:52 +000074 if (MacroInfo *Macro = getMacroInfo())
75 delete Macro;
Reid Spencer5f016e22007-07-11 17:01:13 +000076}
77
78//===----------------------------------------------------------------------===//
79// IdentifierTable Implementation
80//===----------------------------------------------------------------------===//
81
82IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
83 // Start with space for 8K identifiers.
84 : HashTable(8192) {
85
86 // Populate the identifier table with info about keywords for the current
87 // language.
88 AddKeywords(LangOpts);
89}
90
91//===----------------------------------------------------------------------===//
92// Language Keyword Implementation
93//===----------------------------------------------------------------------===//
94
95/// AddKeyword - This method is used to associate a token ID with specific
96/// identifiers because they are language keywords. This causes the lexer to
97/// automatically map matching identifiers to specialized token codes.
98///
Chris Lattnerd4b80f12007-07-16 04:18:29 +000099/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
100/// enabled in the specified langauge, set to 1 if it is an extension
101/// in the specified language, and set to 2 if disabled in the
102/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +0000103static void AddKeyword(const char *Keyword, unsigned KWLen,
104 tok::TokenKind TokenCode,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000105 int C90, int C99, int CXX, int CXX0x,
Reid Spencer5f016e22007-07-11 17:01:13 +0000106 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000107 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
108 : (LangOpts.C99 ? C99 : C90);
Reid Spencer5f016e22007-07-11 17:01:13 +0000109
110 // Don't add this keyword if disabled in this language or if an extension
111 // and extensions are disabled.
112 if (Flags + LangOpts.NoExtensions >= 2) return;
113
114 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
115 Info.setTokenID(TokenCode);
116 Info.setIsExtensionToken(Flags == 1);
117}
118
119static void AddAlias(const char *Keyword, unsigned KWLen,
120 const char *AliaseeKeyword, unsigned AliaseeKWLen,
121 const LangOptions &LangOpts, IdentifierTable &Table) {
122 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
123 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
124 AliaseeKeyword+AliaseeKWLen);
125 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
126 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
127}
128
Reid Spencer5f016e22007-07-11 17:01:13 +0000129/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
130/// representations.
131static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
132 tok::TokenKind TokenCode,
133 IdentifierTable &Table) {
134 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
135 Info.setTokenID(TokenCode);
136 Info.setIsCPlusplusOperatorKeyword();
137}
138
139/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
140/// "property".
141static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
142 const char *Name, unsigned NameLen,
143 IdentifierTable &Table) {
144 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
145}
146
147/// AddKeywords - Add all keywords to the symbol table.
148///
149void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
150 enum {
151 C90Shift = 0,
152 EXTC90 = 1 << C90Shift,
153 NOTC90 = 2 << C90Shift,
154 C99Shift = 2,
155 EXTC99 = 1 << C99Shift,
156 NOTC99 = 2 << C99Shift,
157 CPPShift = 4,
158 EXTCPP = 1 << CPPShift,
159 NOTCPP = 2 << CPPShift,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000160 CPP0xShift = 6,
161 EXTCPP0x = 1 << CPP0xShift,
162 NOTCPP0x = 2 << CPP0xShift,
Reid Spencer5f016e22007-07-11 17:01:13 +0000163 Mask = 3
164 };
165
166 // Add keywords and tokens for the current language.
167#define KEYWORD(NAME, FLAGS) \
168 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
169 ((FLAGS) >> C90Shift) & Mask, \
170 ((FLAGS) >> C99Shift) & Mask, \
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000171 ((FLAGS) >> CPPShift) & Mask, \
172 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000173#define ALIAS(NAME, TOK) \
174 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000175#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
176 if (LangOpts.CXXOperatorNames) \
177 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
178#define OBJC1_AT_KEYWORD(NAME) \
179 if (LangOpts.ObjC1) \
180 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
181#define OBJC2_AT_KEYWORD(NAME) \
182 if (LangOpts.ObjC2) \
183 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
184#include "clang/Basic/TokenKinds.def"
185}
186
Chris Lattner387b98d2007-10-07 07:52:34 +0000187tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
188 // We use a perfect hash function here involving the length of the keyword,
189 // the first and third character. For preprocessor ID's there are no
190 // collisions (if there were, the switch below would complain about duplicate
191 // case values). Note that this depends on 'if' being null terminated.
192
193#define HASH(LEN, FIRST, THIRD) \
194 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
195#define CASE(LEN, FIRST, THIRD, NAME) \
196 case HASH(LEN, FIRST, THIRD): \
197 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
198
199 unsigned Len = getLength();
200 const char *Name = getName();
201 switch (HASH(Len, Name[0], Name[2])) {
202 default: return tok::pp_not_keyword;
203 CASE( 2, 'i', '\0', if);
204 CASE( 4, 'e', 'i', elif);
205 CASE( 4, 'e', 's', else);
206 CASE( 4, 'l', 'n', line);
207 CASE( 4, 's', 'c', sccs);
208 CASE( 5, 'e', 'd', endif);
209 CASE( 5, 'e', 'r', error);
210 CASE( 5, 'i', 'e', ident);
211 CASE( 5, 'i', 'd', ifdef);
212 CASE( 5, 'u', 'd', undef);
213
214 CASE( 6, 'a', 's', assert);
215 CASE( 6, 'd', 'f', define);
216 CASE( 6, 'i', 'n', ifndef);
217 CASE( 6, 'i', 'p', import);
218 CASE( 6, 'p', 'a', pragma);
219
220 CASE( 7, 'd', 'f', defined);
221 CASE( 7, 'i', 'c', include);
222 CASE( 7, 'w', 'r', warning);
223
224 CASE( 8, 'u', 'a', unassert);
225 CASE(12, 'i', 'c', include_next);
226 CASE(13, 'd', 'f', define_target);
227 CASE(19, 'd', 'f', define_other_target);
228#undef CASE
229#undef HASH
230 }
231}
Reid Spencer5f016e22007-07-11 17:01:13 +0000232
233//===----------------------------------------------------------------------===//
234// Stats Implementation
235//===----------------------------------------------------------------------===//
236
237/// PrintStats - Print statistics about how well the identifier table is doing
238/// at hashing identifiers.
239void IdentifierTable::PrintStats() const {
240 unsigned NumBuckets = HashTable.getNumBuckets();
241 unsigned NumIdentifiers = HashTable.getNumItems();
242 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
243 unsigned AverageIdentifierSize = 0;
244 unsigned MaxIdentifierLength = 0;
245
246 // TODO: Figure out maximum times an identifier had to probe for -stats.
247 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
248 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
249 unsigned IdLen = I->getKeyLength();
250 AverageIdentifierSize += IdLen;
251 if (MaxIdentifierLength < IdLen)
252 MaxIdentifierLength = IdLen;
253 }
254
255 fprintf(stderr, "\n*** Identifier Table Stats:\n");
256 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
257 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
258 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
259 NumIdentifiers/(double)NumBuckets);
260 fprintf(stderr, "Ave identifier length: %f\n",
261 (AverageIdentifierSize/(double)NumIdentifiers));
262 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
263
264 // Compute statistics about the memory allocated for identifiers.
265 HashTable.getAllocator().PrintStats();
266}
Steve Naroff68d331a2007-09-27 14:38:14 +0000267
Steve Naroff29238a02007-10-05 18:42:47 +0000268//===----------------------------------------------------------------------===//
269// SelectorTable Implementation
270//===----------------------------------------------------------------------===//
271
Chris Lattner85994262007-10-05 20:15:24 +0000272unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
273 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
274}
275
276
Steve Naroff29238a02007-10-05 18:42:47 +0000277/// MultiKeywordSelector - One of these variable length records is kept for each
278/// selector containing more than one keyword. We use a folding set
279/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
280/// this class is provided strictly through Selector.
Chris Lattner85994262007-10-05 20:15:24 +0000281namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000282class MultiKeywordSelector : public llvm::FoldingSetNode {
283public:
284 unsigned NumArgs;
285
286 // Constructor for keyword selectors.
287 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
288 assert((nKeys > 1) && "not a multi-keyword selector");
289 NumArgs = nKeys;
290 // Fill in the trailing keyword array.
291 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
292 for (unsigned i = 0; i != nKeys; ++i)
293 KeyInfo[i] = IIV[i];
294 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000295 // getName - Derive the full selector name and return it.
296 std::string getName() const;
297
Steve Naroff29238a02007-10-05 18:42:47 +0000298 unsigned getNumArgs() const { return NumArgs; }
299
300 typedef IdentifierInfo *const *keyword_iterator;
301 keyword_iterator keyword_begin() const {
302 return reinterpret_cast<keyword_iterator>(this+1);
303 }
304 keyword_iterator keyword_end() const {
305 return keyword_begin()+NumArgs;
306 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000307 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
308 assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000309 return keyword_begin()[i];
310 }
311 static void Profile(llvm::FoldingSetNodeID &ID,
312 keyword_iterator ArgTys, unsigned NumArgs) {
313 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000314 for (unsigned i = 0; i != NumArgs; ++i)
315 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000316 }
317 void Profile(llvm::FoldingSetNodeID &ID) {
318 Profile(ID, keyword_begin(), NumArgs);
319 }
320};
Chris Lattner85994262007-10-05 20:15:24 +0000321} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000322
323unsigned Selector::getNumArgs() const {
324 unsigned IIF = getIdentifierInfoFlag();
325 if (IIF == ZeroArg)
326 return 0;
327 if (IIF == OneArg)
328 return 1;
329 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
330 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
331 return SI->getNumArgs();
332}
333
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000334IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
335 if (IdentifierInfo *II = getAsIdentifierInfo()) {
336 assert(argIndex == 0 && "illegal keyword index");
Steve Naroff29238a02007-10-05 18:42:47 +0000337 return II;
338 }
339 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
340 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
341 return SI->getIdentifierInfoForSlot(argIndex);
342}
343
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000344std::string MultiKeywordSelector::getName() const {
345 std::string Result;
346 unsigned Length = 0;
347 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
348 if (*I)
349 Length += (*I)->getLength();
350 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000351 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000352
353 Result.reserve(Length);
354
355 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
356 if (*I)
357 Result.insert(Result.end(), (*I)->getName(),
358 (*I)->getName()+(*I)->getLength());
359 Result.push_back(':');
360 }
361
362 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000363}
364
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000365std::string Selector::getName() const {
366 if (IdentifierInfo *II = getAsIdentifierInfo()) {
367 if (getNumArgs() == 0)
368 return II->getName();
369
370 std::string Res = II->getName();
371 Res += ":";
372 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000373 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000374
375 // We have a multiple keyword selector (no embedded flags).
376 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000377}
378
379
Chris Lattnerff384912007-10-07 02:00:24 +0000380Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
381 if (nKeys < 2)
382 return Selector(IIV[0], nKeys);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000383
Steve Naroff29238a02007-10-05 18:42:47 +0000384 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
385
386 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
387
388 // Unique selector, to guarantee there is one per name.
389 llvm::FoldingSetNodeID ID;
390 MultiKeywordSelector::Profile(ID, IIV, nKeys);
391
392 void *InsertPos = 0;
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000393 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000394 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000395
Steve Naroff29238a02007-10-05 18:42:47 +0000396 // MultiKeywordSelector objects are not allocated with new because they have a
397 // variable size array (for parameter types) at the end of them.
398 MultiKeywordSelector *SI =
399 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
400 nKeys*sizeof(IdentifierInfo *));
401 new (SI) MultiKeywordSelector(nKeys, IIV);
402 SelTab->InsertNode(SI, InsertPos);
403 return Selector(SI);
404}
405
Steve Naroff29238a02007-10-05 18:42:47 +0000406SelectorTable::SelectorTable() {
407 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
408}
409
410SelectorTable::~SelectorTable() {
411 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
412}
413
414