blob: aba3f9d1f2624e5a7a92a4d9bbade6d2769d978c [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000018#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000019#include "llvm/ADT/DenseMap.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000020using namespace clang;
21
22//===----------------------------------------------------------------------===//
Steve Naroff861cf3e2007-08-23 18:16:40 +000023// Token Implementation
24//===----------------------------------------------------------------------===//
25
26/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
27bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
28 return getKind() == tok::identifier &&
29 getIdentifierInfo()->getObjCKeywordID() == objcKey;
30}
31
32/// getObjCKeywordID - Return the ObjC keyword kind.
33tok::ObjCKeywordKind Token::getObjCKeywordID() const {
34 IdentifierInfo *specId = getIdentifierInfo();
35 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
36}
37
38//===----------------------------------------------------------------------===//
Reid Spencer5f016e22007-07-11 17:01:13 +000039// IdentifierInfo Implementation
40//===----------------------------------------------------------------------===//
41
42IdentifierInfo::IdentifierInfo() {
43 Macro = 0;
44 TokenID = tok::identifier;
45 PPID = tok::pp_not_keyword;
46 ObjCID = tok::objc_not_keyword;
47 BuiltinID = 0;
48 IsExtension = false;
49 IsPoisoned = false;
50 IsOtherTargetMacro = false;
51 IsCPPOperatorKeyword = false;
Chris Lattner938867c2007-07-19 00:11:19 +000052 IsNonPortableBuiltin = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000053 FETokenInfo = 0;
54}
55
56IdentifierInfo::~IdentifierInfo() {
57 delete Macro;
58}
59
60//===----------------------------------------------------------------------===//
61// IdentifierTable Implementation
62//===----------------------------------------------------------------------===//
63
64IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
65 // Start with space for 8K identifiers.
66 : HashTable(8192) {
67
68 // Populate the identifier table with info about keywords for the current
69 // language.
70 AddKeywords(LangOpts);
71}
72
73//===----------------------------------------------------------------------===//
74// Language Keyword Implementation
75//===----------------------------------------------------------------------===//
76
77/// AddKeyword - This method is used to associate a token ID with specific
78/// identifiers because they are language keywords. This causes the lexer to
79/// automatically map matching identifiers to specialized token codes.
80///
Chris Lattnerd4b80f12007-07-16 04:18:29 +000081/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
82/// enabled in the specified langauge, set to 1 if it is an extension
83/// in the specified language, and set to 2 if disabled in the
84/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +000085static void AddKeyword(const char *Keyword, unsigned KWLen,
86 tok::TokenKind TokenCode,
Chris Lattnerd4b80f12007-07-16 04:18:29 +000087 int C90, int C99, int CXX, int CXX0x,
Reid Spencer5f016e22007-07-11 17:01:13 +000088 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnerd4b80f12007-07-16 04:18:29 +000089 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
90 : (LangOpts.C99 ? C99 : C90);
Reid Spencer5f016e22007-07-11 17:01:13 +000091
92 // Don't add this keyword if disabled in this language or if an extension
93 // and extensions are disabled.
94 if (Flags + LangOpts.NoExtensions >= 2) return;
95
96 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
97 Info.setTokenID(TokenCode);
98 Info.setIsExtensionToken(Flags == 1);
99}
100
101static void AddAlias(const char *Keyword, unsigned KWLen,
102 const char *AliaseeKeyword, unsigned AliaseeKWLen,
103 const LangOptions &LangOpts, IdentifierTable &Table) {
104 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
105 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
106 AliaseeKeyword+AliaseeKWLen);
107 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
108 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
109}
110
111/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
112/// "elif".
113static void AddPPKeyword(tok::PPKeywordKind PPID,
114 const char *Name, unsigned NameLen,
115 IdentifierTable &Table) {
116 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
117}
118
119/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
120/// representations.
121static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
122 tok::TokenKind TokenCode,
123 IdentifierTable &Table) {
124 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
125 Info.setTokenID(TokenCode);
126 Info.setIsCPlusplusOperatorKeyword();
127}
128
129/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
130/// "property".
131static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
132 const char *Name, unsigned NameLen,
133 IdentifierTable &Table) {
134 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
135}
136
137/// AddKeywords - Add all keywords to the symbol table.
138///
139void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
140 enum {
141 C90Shift = 0,
142 EXTC90 = 1 << C90Shift,
143 NOTC90 = 2 << C90Shift,
144 C99Shift = 2,
145 EXTC99 = 1 << C99Shift,
146 NOTC99 = 2 << C99Shift,
147 CPPShift = 4,
148 EXTCPP = 1 << CPPShift,
149 NOTCPP = 2 << CPPShift,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000150 CPP0xShift = 6,
151 EXTCPP0x = 1 << CPP0xShift,
152 NOTCPP0x = 2 << CPP0xShift,
Reid Spencer5f016e22007-07-11 17:01:13 +0000153 Mask = 3
154 };
155
156 // Add keywords and tokens for the current language.
157#define KEYWORD(NAME, FLAGS) \
158 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
159 ((FLAGS) >> C90Shift) & Mask, \
160 ((FLAGS) >> C99Shift) & Mask, \
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000161 ((FLAGS) >> CPPShift) & Mask, \
162 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000163#define ALIAS(NAME, TOK) \
164 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
165#define PPKEYWORD(NAME) \
166 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
167#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
168 if (LangOpts.CXXOperatorNames) \
169 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
170#define OBJC1_AT_KEYWORD(NAME) \
171 if (LangOpts.ObjC1) \
172 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
173#define OBJC2_AT_KEYWORD(NAME) \
174 if (LangOpts.ObjC2) \
175 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
176#include "clang/Basic/TokenKinds.def"
177}
178
179
180//===----------------------------------------------------------------------===//
181// Stats Implementation
182//===----------------------------------------------------------------------===//
183
184/// PrintStats - Print statistics about how well the identifier table is doing
185/// at hashing identifiers.
186void IdentifierTable::PrintStats() const {
187 unsigned NumBuckets = HashTable.getNumBuckets();
188 unsigned NumIdentifiers = HashTable.getNumItems();
189 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
190 unsigned AverageIdentifierSize = 0;
191 unsigned MaxIdentifierLength = 0;
192
193 // TODO: Figure out maximum times an identifier had to probe for -stats.
194 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
195 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
196 unsigned IdLen = I->getKeyLength();
197 AverageIdentifierSize += IdLen;
198 if (MaxIdentifierLength < IdLen)
199 MaxIdentifierLength = IdLen;
200 }
201
202 fprintf(stderr, "\n*** Identifier Table Stats:\n");
203 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
204 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
205 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
206 NumIdentifiers/(double)NumBuckets);
207 fprintf(stderr, "Ave identifier length: %f\n",
208 (AverageIdentifierSize/(double)NumIdentifiers));
209 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
210
211 // Compute statistics about the memory allocated for identifiers.
212 HashTable.getAllocator().PrintStats();
213}
Steve Naroff68d331a2007-09-27 14:38:14 +0000214
Steve Naroff29238a02007-10-05 18:42:47 +0000215//===----------------------------------------------------------------------===//
216// SelectorTable Implementation
217//===----------------------------------------------------------------------===//
218
Chris Lattner85994262007-10-05 20:15:24 +0000219unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
220 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
221}
222
223
Steve Naroff29238a02007-10-05 18:42:47 +0000224/// MultiKeywordSelector - One of these variable length records is kept for each
225/// selector containing more than one keyword. We use a folding set
226/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
227/// this class is provided strictly through Selector.
Chris Lattner85994262007-10-05 20:15:24 +0000228namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000229class MultiKeywordSelector : public llvm::FoldingSetNode {
230public:
231 unsigned NumArgs;
232
233 // Constructor for keyword selectors.
234 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
235 assert((nKeys > 1) && "not a multi-keyword selector");
236 NumArgs = nKeys;
237 // Fill in the trailing keyword array.
238 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
239 for (unsigned i = 0; i != nKeys; ++i)
240 KeyInfo[i] = IIV[i];
241 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000242 // getName - Derive the full selector name and return it.
243 std::string getName() const;
244
Steve Naroff29238a02007-10-05 18:42:47 +0000245 unsigned getNumArgs() const { return NumArgs; }
246
247 typedef IdentifierInfo *const *keyword_iterator;
248 keyword_iterator keyword_begin() const {
249 return reinterpret_cast<keyword_iterator>(this+1);
250 }
251 keyword_iterator keyword_end() const {
252 return keyword_begin()+NumArgs;
253 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000254 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
255 assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000256 return keyword_begin()[i];
257 }
258 static void Profile(llvm::FoldingSetNodeID &ID,
259 keyword_iterator ArgTys, unsigned NumArgs) {
260 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000261 for (unsigned i = 0; i != NumArgs; ++i)
262 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000263 }
264 void Profile(llvm::FoldingSetNodeID &ID) {
265 Profile(ID, keyword_begin(), NumArgs);
266 }
267};
Chris Lattner85994262007-10-05 20:15:24 +0000268} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000269
270unsigned Selector::getNumArgs() const {
271 unsigned IIF = getIdentifierInfoFlag();
272 if (IIF == ZeroArg)
273 return 0;
274 if (IIF == OneArg)
275 return 1;
276 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
277 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
278 return SI->getNumArgs();
279}
280
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000281IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
282 if (IdentifierInfo *II = getAsIdentifierInfo()) {
283 assert(argIndex == 0 && "illegal keyword index");
Steve Naroff29238a02007-10-05 18:42:47 +0000284 return II;
285 }
286 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
287 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
288 return SI->getIdentifierInfoForSlot(argIndex);
289}
290
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000291std::string MultiKeywordSelector::getName() const {
292 std::string Result;
293 unsigned Length = 0;
294 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
295 if (*I)
296 Length += (*I)->getLength();
297 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000298 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000299
300 Result.reserve(Length);
301
302 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
303 if (*I)
304 Result.insert(Result.end(), (*I)->getName(),
305 (*I)->getName()+(*I)->getLength());
306 Result.push_back(':');
307 }
308
309 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000310}
311
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000312std::string Selector::getName() const {
313 if (IdentifierInfo *II = getAsIdentifierInfo()) {
314 if (getNumArgs() == 0)
315 return II->getName();
316
317 std::string Res = II->getName();
318 Res += ":";
319 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000320 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000321
322 // We have a multiple keyword selector (no embedded flags).
323 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000324}
325
326
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000327Selector SelectorTable::getKeywordSelector(unsigned nKeys,
328 IdentifierInfo **IIV) {
329 if (nKeys == 1)
330 return Selector(IIV[0], 1);
331
Steve Naroff29238a02007-10-05 18:42:47 +0000332 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
333
334 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
335
336 // Unique selector, to guarantee there is one per name.
337 llvm::FoldingSetNodeID ID;
338 MultiKeywordSelector::Profile(ID, IIV, nKeys);
339
340 void *InsertPos = 0;
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000341 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000342 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000343
Steve Naroff29238a02007-10-05 18:42:47 +0000344 // MultiKeywordSelector objects are not allocated with new because they have a
345 // variable size array (for parameter types) at the end of them.
346 MultiKeywordSelector *SI =
347 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
348 nKeys*sizeof(IdentifierInfo *));
349 new (SI) MultiKeywordSelector(nKeys, IIV);
350 SelTab->InsertNode(SI, InsertPos);
351 return Selector(SI);
352}
353
354Selector SelectorTable::getUnarySelector(IdentifierInfo *ID) {
355 return Selector(ID, 1);
356}
357
358Selector SelectorTable::getNullarySelector(IdentifierInfo *ID) {
359 return Selector(ID, 0);
360}
361
362SelectorTable::SelectorTable() {
363 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
364}
365
366SelectorTable::~SelectorTable() {
367 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
368}
369
370