blob: b1c2c63f233fe6e756b85cfe2a3553039fe74c64 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000018#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000019#include "llvm/ADT/DenseMap.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000020using namespace clang;
21
Chris Lattner4365a7e2007-10-07 07:09:52 +000022static llvm::DenseMap<const IdentifierInfo*, MacroInfo*> Macros;
23
24MacroInfo *IdentifierInfo::getMacroInfoInternal() const {
25 return Macros[this];
26}
27void IdentifierInfo::setMacroInfo(MacroInfo *I) {
28 if (I == 0) {
29 if (HasMacro) {
30 Macros.erase(this);
31 HasMacro = false;
32 }
33 } else {
34 Macros[this] = I;
35 HasMacro = true;
36 }
37}
38
39
Reid Spencer5f016e22007-07-11 17:01:13 +000040//===----------------------------------------------------------------------===//
Steve Naroff861cf3e2007-08-23 18:16:40 +000041// Token Implementation
42//===----------------------------------------------------------------------===//
43
44/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
45bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
46 return getKind() == tok::identifier &&
47 getIdentifierInfo()->getObjCKeywordID() == objcKey;
48}
49
50/// getObjCKeywordID - Return the ObjC keyword kind.
51tok::ObjCKeywordKind Token::getObjCKeywordID() const {
52 IdentifierInfo *specId = getIdentifierInfo();
53 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
54}
55
56//===----------------------------------------------------------------------===//
Reid Spencer5f016e22007-07-11 17:01:13 +000057// IdentifierInfo Implementation
58//===----------------------------------------------------------------------===//
59
60IdentifierInfo::IdentifierInfo() {
Reid Spencer5f016e22007-07-11 17:01:13 +000061 TokenID = tok::identifier;
62 PPID = tok::pp_not_keyword;
63 ObjCID = tok::objc_not_keyword;
64 BuiltinID = 0;
Chris Lattner4365a7e2007-10-07 07:09:52 +000065 HasMacro = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000066 IsExtension = false;
67 IsPoisoned = false;
68 IsOtherTargetMacro = false;
69 IsCPPOperatorKeyword = false;
Chris Lattner938867c2007-07-19 00:11:19 +000070 IsNonPortableBuiltin = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000071 FETokenInfo = 0;
72}
73
74IdentifierInfo::~IdentifierInfo() {
Chris Lattner4365a7e2007-10-07 07:09:52 +000075 if (MacroInfo *Macro = getMacroInfo())
76 delete Macro;
Reid Spencer5f016e22007-07-11 17:01:13 +000077}
78
79//===----------------------------------------------------------------------===//
80// IdentifierTable Implementation
81//===----------------------------------------------------------------------===//
82
83IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
84 // Start with space for 8K identifiers.
85 : HashTable(8192) {
86
87 // Populate the identifier table with info about keywords for the current
88 // language.
89 AddKeywords(LangOpts);
90}
91
92//===----------------------------------------------------------------------===//
93// Language Keyword Implementation
94//===----------------------------------------------------------------------===//
95
96/// AddKeyword - This method is used to associate a token ID with specific
97/// identifiers because they are language keywords. This causes the lexer to
98/// automatically map matching identifiers to specialized token codes.
99///
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000100/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
101/// enabled in the specified langauge, set to 1 if it is an extension
102/// in the specified language, and set to 2 if disabled in the
103/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +0000104static void AddKeyword(const char *Keyword, unsigned KWLen,
105 tok::TokenKind TokenCode,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000106 int C90, int C99, int CXX, int CXX0x,
Reid Spencer5f016e22007-07-11 17:01:13 +0000107 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000108 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
109 : (LangOpts.C99 ? C99 : C90);
Reid Spencer5f016e22007-07-11 17:01:13 +0000110
111 // Don't add this keyword if disabled in this language or if an extension
112 // and extensions are disabled.
113 if (Flags + LangOpts.NoExtensions >= 2) return;
114
115 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
116 Info.setTokenID(TokenCode);
117 Info.setIsExtensionToken(Flags == 1);
118}
119
120static void AddAlias(const char *Keyword, unsigned KWLen,
121 const char *AliaseeKeyword, unsigned AliaseeKWLen,
122 const LangOptions &LangOpts, IdentifierTable &Table) {
123 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
124 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
125 AliaseeKeyword+AliaseeKWLen);
126 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
127 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
128}
129
130/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
131/// "elif".
132static void AddPPKeyword(tok::PPKeywordKind PPID,
133 const char *Name, unsigned NameLen,
134 IdentifierTable &Table) {
135 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
136}
137
138/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
139/// representations.
140static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
141 tok::TokenKind TokenCode,
142 IdentifierTable &Table) {
143 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
144 Info.setTokenID(TokenCode);
145 Info.setIsCPlusplusOperatorKeyword();
146}
147
148/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
149/// "property".
150static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
151 const char *Name, unsigned NameLen,
152 IdentifierTable &Table) {
153 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
154}
155
156/// AddKeywords - Add all keywords to the symbol table.
157///
158void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
159 enum {
160 C90Shift = 0,
161 EXTC90 = 1 << C90Shift,
162 NOTC90 = 2 << C90Shift,
163 C99Shift = 2,
164 EXTC99 = 1 << C99Shift,
165 NOTC99 = 2 << C99Shift,
166 CPPShift = 4,
167 EXTCPP = 1 << CPPShift,
168 NOTCPP = 2 << CPPShift,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000169 CPP0xShift = 6,
170 EXTCPP0x = 1 << CPP0xShift,
171 NOTCPP0x = 2 << CPP0xShift,
Reid Spencer5f016e22007-07-11 17:01:13 +0000172 Mask = 3
173 };
174
175 // Add keywords and tokens for the current language.
176#define KEYWORD(NAME, FLAGS) \
177 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
178 ((FLAGS) >> C90Shift) & Mask, \
179 ((FLAGS) >> C99Shift) & Mask, \
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000180 ((FLAGS) >> CPPShift) & Mask, \
181 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000182#define ALIAS(NAME, TOK) \
183 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
184#define PPKEYWORD(NAME) \
185 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
186#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
187 if (LangOpts.CXXOperatorNames) \
188 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
189#define OBJC1_AT_KEYWORD(NAME) \
190 if (LangOpts.ObjC1) \
191 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
192#define OBJC2_AT_KEYWORD(NAME) \
193 if (LangOpts.ObjC2) \
194 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
195#include "clang/Basic/TokenKinds.def"
196}
197
198
199//===----------------------------------------------------------------------===//
200// Stats Implementation
201//===----------------------------------------------------------------------===//
202
203/// PrintStats - Print statistics about how well the identifier table is doing
204/// at hashing identifiers.
205void IdentifierTable::PrintStats() const {
206 unsigned NumBuckets = HashTable.getNumBuckets();
207 unsigned NumIdentifiers = HashTable.getNumItems();
208 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
209 unsigned AverageIdentifierSize = 0;
210 unsigned MaxIdentifierLength = 0;
211
212 // TODO: Figure out maximum times an identifier had to probe for -stats.
213 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
214 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
215 unsigned IdLen = I->getKeyLength();
216 AverageIdentifierSize += IdLen;
217 if (MaxIdentifierLength < IdLen)
218 MaxIdentifierLength = IdLen;
219 }
220
221 fprintf(stderr, "\n*** Identifier Table Stats:\n");
222 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
223 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
224 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
225 NumIdentifiers/(double)NumBuckets);
226 fprintf(stderr, "Ave identifier length: %f\n",
227 (AverageIdentifierSize/(double)NumIdentifiers));
228 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
229
230 // Compute statistics about the memory allocated for identifiers.
231 HashTable.getAllocator().PrintStats();
232}
Steve Naroff68d331a2007-09-27 14:38:14 +0000233
Steve Naroff29238a02007-10-05 18:42:47 +0000234//===----------------------------------------------------------------------===//
235// SelectorTable Implementation
236//===----------------------------------------------------------------------===//
237
Chris Lattner85994262007-10-05 20:15:24 +0000238unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
239 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
240}
241
242
Steve Naroff29238a02007-10-05 18:42:47 +0000243/// MultiKeywordSelector - One of these variable length records is kept for each
244/// selector containing more than one keyword. We use a folding set
245/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
246/// this class is provided strictly through Selector.
Chris Lattner85994262007-10-05 20:15:24 +0000247namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000248class MultiKeywordSelector : public llvm::FoldingSetNode {
249public:
250 unsigned NumArgs;
251
252 // Constructor for keyword selectors.
253 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
254 assert((nKeys > 1) && "not a multi-keyword selector");
255 NumArgs = nKeys;
256 // Fill in the trailing keyword array.
257 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
258 for (unsigned i = 0; i != nKeys; ++i)
259 KeyInfo[i] = IIV[i];
260 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000261 // getName - Derive the full selector name and return it.
262 std::string getName() const;
263
Steve Naroff29238a02007-10-05 18:42:47 +0000264 unsigned getNumArgs() const { return NumArgs; }
265
266 typedef IdentifierInfo *const *keyword_iterator;
267 keyword_iterator keyword_begin() const {
268 return reinterpret_cast<keyword_iterator>(this+1);
269 }
270 keyword_iterator keyword_end() const {
271 return keyword_begin()+NumArgs;
272 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000273 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
274 assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000275 return keyword_begin()[i];
276 }
277 static void Profile(llvm::FoldingSetNodeID &ID,
278 keyword_iterator ArgTys, unsigned NumArgs) {
279 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000280 for (unsigned i = 0; i != NumArgs; ++i)
281 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000282 }
283 void Profile(llvm::FoldingSetNodeID &ID) {
284 Profile(ID, keyword_begin(), NumArgs);
285 }
286};
Chris Lattner85994262007-10-05 20:15:24 +0000287} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000288
289unsigned Selector::getNumArgs() const {
290 unsigned IIF = getIdentifierInfoFlag();
291 if (IIF == ZeroArg)
292 return 0;
293 if (IIF == OneArg)
294 return 1;
295 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
296 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
297 return SI->getNumArgs();
298}
299
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000300IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
301 if (IdentifierInfo *II = getAsIdentifierInfo()) {
302 assert(argIndex == 0 && "illegal keyword index");
Steve Naroff29238a02007-10-05 18:42:47 +0000303 return II;
304 }
305 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
306 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
307 return SI->getIdentifierInfoForSlot(argIndex);
308}
309
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000310std::string MultiKeywordSelector::getName() const {
311 std::string Result;
312 unsigned Length = 0;
313 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
314 if (*I)
315 Length += (*I)->getLength();
316 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000317 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000318
319 Result.reserve(Length);
320
321 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
322 if (*I)
323 Result.insert(Result.end(), (*I)->getName(),
324 (*I)->getName()+(*I)->getLength());
325 Result.push_back(':');
326 }
327
328 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000329}
330
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000331std::string Selector::getName() const {
332 if (IdentifierInfo *II = getAsIdentifierInfo()) {
333 if (getNumArgs() == 0)
334 return II->getName();
335
336 std::string Res = II->getName();
337 Res += ":";
338 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000339 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000340
341 // We have a multiple keyword selector (no embedded flags).
342 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000343}
344
345
Chris Lattnerff384912007-10-07 02:00:24 +0000346Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
347 if (nKeys < 2)
348 return Selector(IIV[0], nKeys);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000349
Steve Naroff29238a02007-10-05 18:42:47 +0000350 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
351
352 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
353
354 // Unique selector, to guarantee there is one per name.
355 llvm::FoldingSetNodeID ID;
356 MultiKeywordSelector::Profile(ID, IIV, nKeys);
357
358 void *InsertPos = 0;
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000359 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000360 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000361
Steve Naroff29238a02007-10-05 18:42:47 +0000362 // MultiKeywordSelector objects are not allocated with new because they have a
363 // variable size array (for parameter types) at the end of them.
364 MultiKeywordSelector *SI =
365 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
366 nKeys*sizeof(IdentifierInfo *));
367 new (SI) MultiKeywordSelector(nKeys, IIV);
368 SelTab->InsertNode(SI, InsertPos);
369 return Selector(SI);
370}
371
Steve Naroff29238a02007-10-05 18:42:47 +0000372SelectorTable::SelectorTable() {
373 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
374}
375
376SelectorTable::~SelectorTable() {
377 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
378}
379
380