blob: 51abcd22b58f2f360ecb6492ecf532dd9fbdc901 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
Chris Lattnerc7229c32007-10-07 08:58:51 +000015#include "clang/Basic/IdentifierTable.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000016#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000017#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000018#include "llvm/ADT/DenseMap.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000019using namespace clang;
20
21//===----------------------------------------------------------------------===//
22// IdentifierInfo Implementation
23//===----------------------------------------------------------------------===//
24
25IdentifierInfo::IdentifierInfo() {
Reid Spencer5f016e22007-07-11 17:01:13 +000026 TokenID = tok::identifier;
Reid Spencer5f016e22007-07-11 17:01:13 +000027 ObjCID = tok::objc_not_keyword;
28 BuiltinID = 0;
Chris Lattner4365a7e2007-10-07 07:09:52 +000029 HasMacro = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000030 IsExtension = false;
31 IsPoisoned = false;
32 IsOtherTargetMacro = false;
33 IsCPPOperatorKeyword = false;
Chris Lattner938867c2007-07-19 00:11:19 +000034 IsNonPortableBuiltin = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000035 FETokenInfo = 0;
36}
37
Reid Spencer5f016e22007-07-11 17:01:13 +000038//===----------------------------------------------------------------------===//
39// IdentifierTable Implementation
40//===----------------------------------------------------------------------===//
41
42IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
43 // Start with space for 8K identifiers.
44 : HashTable(8192) {
45
46 // Populate the identifier table with info about keywords for the current
47 // language.
48 AddKeywords(LangOpts);
49}
50
51//===----------------------------------------------------------------------===//
52// Language Keyword Implementation
53//===----------------------------------------------------------------------===//
54
55/// AddKeyword - This method is used to associate a token ID with specific
56/// identifiers because they are language keywords. This causes the lexer to
57/// automatically map matching identifiers to specialized token codes.
58///
Chris Lattnerd4b80f12007-07-16 04:18:29 +000059/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
60/// enabled in the specified langauge, set to 1 if it is an extension
61/// in the specified language, and set to 2 if disabled in the
62/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +000063static void AddKeyword(const char *Keyword, unsigned KWLen,
64 tok::TokenKind TokenCode,
Chris Lattnerd4b80f12007-07-16 04:18:29 +000065 int C90, int C99, int CXX, int CXX0x,
Reid Spencer5f016e22007-07-11 17:01:13 +000066 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnerd4b80f12007-07-16 04:18:29 +000067 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
68 : (LangOpts.C99 ? C99 : C90);
Reid Spencer5f016e22007-07-11 17:01:13 +000069
70 // Don't add this keyword if disabled in this language or if an extension
71 // and extensions are disabled.
72 if (Flags + LangOpts.NoExtensions >= 2) return;
73
74 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
75 Info.setTokenID(TokenCode);
76 Info.setIsExtensionToken(Flags == 1);
77}
78
79static void AddAlias(const char *Keyword, unsigned KWLen,
80 const char *AliaseeKeyword, unsigned AliaseeKWLen,
81 const LangOptions &LangOpts, IdentifierTable &Table) {
82 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
83 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
84 AliaseeKeyword+AliaseeKWLen);
85 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
86 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
87}
88
Reid Spencer5f016e22007-07-11 17:01:13 +000089/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
90/// representations.
91static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
92 tok::TokenKind TokenCode,
93 IdentifierTable &Table) {
94 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
95 Info.setTokenID(TokenCode);
96 Info.setIsCPlusplusOperatorKeyword();
97}
98
99/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
100/// "property".
101static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
102 const char *Name, unsigned NameLen,
103 IdentifierTable &Table) {
104 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
105}
106
107/// AddKeywords - Add all keywords to the symbol table.
108///
109void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
110 enum {
111 C90Shift = 0,
112 EXTC90 = 1 << C90Shift,
113 NOTC90 = 2 << C90Shift,
114 C99Shift = 2,
115 EXTC99 = 1 << C99Shift,
116 NOTC99 = 2 << C99Shift,
117 CPPShift = 4,
118 EXTCPP = 1 << CPPShift,
119 NOTCPP = 2 << CPPShift,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000120 CPP0xShift = 6,
121 EXTCPP0x = 1 << CPP0xShift,
122 NOTCPP0x = 2 << CPP0xShift,
Reid Spencer5f016e22007-07-11 17:01:13 +0000123 Mask = 3
124 };
125
126 // Add keywords and tokens for the current language.
127#define KEYWORD(NAME, FLAGS) \
128 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
129 ((FLAGS) >> C90Shift) & Mask, \
130 ((FLAGS) >> C99Shift) & Mask, \
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000131 ((FLAGS) >> CPPShift) & Mask, \
132 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000133#define ALIAS(NAME, TOK) \
134 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000135#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
136 if (LangOpts.CXXOperatorNames) \
137 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
138#define OBJC1_AT_KEYWORD(NAME) \
139 if (LangOpts.ObjC1) \
140 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
141#define OBJC2_AT_KEYWORD(NAME) \
142 if (LangOpts.ObjC2) \
143 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
144#include "clang/Basic/TokenKinds.def"
145}
146
Chris Lattner387b98d2007-10-07 07:52:34 +0000147tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
148 // We use a perfect hash function here involving the length of the keyword,
149 // the first and third character. For preprocessor ID's there are no
150 // collisions (if there were, the switch below would complain about duplicate
151 // case values). Note that this depends on 'if' being null terminated.
152
153#define HASH(LEN, FIRST, THIRD) \
154 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
155#define CASE(LEN, FIRST, THIRD, NAME) \
156 case HASH(LEN, FIRST, THIRD): \
157 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
158
159 unsigned Len = getLength();
160 const char *Name = getName();
161 switch (HASH(Len, Name[0], Name[2])) {
162 default: return tok::pp_not_keyword;
163 CASE( 2, 'i', '\0', if);
164 CASE( 4, 'e', 'i', elif);
165 CASE( 4, 'e', 's', else);
166 CASE( 4, 'l', 'n', line);
167 CASE( 4, 's', 'c', sccs);
168 CASE( 5, 'e', 'd', endif);
169 CASE( 5, 'e', 'r', error);
170 CASE( 5, 'i', 'e', ident);
171 CASE( 5, 'i', 'd', ifdef);
172 CASE( 5, 'u', 'd', undef);
173
174 CASE( 6, 'a', 's', assert);
175 CASE( 6, 'd', 'f', define);
176 CASE( 6, 'i', 'n', ifndef);
177 CASE( 6, 'i', 'p', import);
178 CASE( 6, 'p', 'a', pragma);
179
180 CASE( 7, 'd', 'f', defined);
181 CASE( 7, 'i', 'c', include);
182 CASE( 7, 'w', 'r', warning);
183
184 CASE( 8, 'u', 'a', unassert);
185 CASE(12, 'i', 'c', include_next);
186 CASE(13, 'd', 'f', define_target);
187 CASE(19, 'd', 'f', define_other_target);
188#undef CASE
189#undef HASH
190 }
191}
Reid Spencer5f016e22007-07-11 17:01:13 +0000192
193//===----------------------------------------------------------------------===//
194// Stats Implementation
195//===----------------------------------------------------------------------===//
196
197/// PrintStats - Print statistics about how well the identifier table is doing
198/// at hashing identifiers.
199void IdentifierTable::PrintStats() const {
200 unsigned NumBuckets = HashTable.getNumBuckets();
201 unsigned NumIdentifiers = HashTable.getNumItems();
202 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
203 unsigned AverageIdentifierSize = 0;
204 unsigned MaxIdentifierLength = 0;
205
206 // TODO: Figure out maximum times an identifier had to probe for -stats.
207 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
208 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
209 unsigned IdLen = I->getKeyLength();
210 AverageIdentifierSize += IdLen;
211 if (MaxIdentifierLength < IdLen)
212 MaxIdentifierLength = IdLen;
213 }
214
215 fprintf(stderr, "\n*** Identifier Table Stats:\n");
216 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
217 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
218 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
219 NumIdentifiers/(double)NumBuckets);
220 fprintf(stderr, "Ave identifier length: %f\n",
221 (AverageIdentifierSize/(double)NumIdentifiers));
222 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
223
224 // Compute statistics about the memory allocated for identifiers.
225 HashTable.getAllocator().PrintStats();
226}
Steve Naroff68d331a2007-09-27 14:38:14 +0000227
Steve Naroff29238a02007-10-05 18:42:47 +0000228//===----------------------------------------------------------------------===//
229// SelectorTable Implementation
230//===----------------------------------------------------------------------===//
231
Chris Lattner85994262007-10-05 20:15:24 +0000232unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
233 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
234}
235
236
Steve Naroff29238a02007-10-05 18:42:47 +0000237/// MultiKeywordSelector - One of these variable length records is kept for each
238/// selector containing more than one keyword. We use a folding set
239/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
240/// this class is provided strictly through Selector.
Chris Lattner85994262007-10-05 20:15:24 +0000241namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000242class MultiKeywordSelector : public llvm::FoldingSetNode {
243public:
244 unsigned NumArgs;
245
246 // Constructor for keyword selectors.
247 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
248 assert((nKeys > 1) && "not a multi-keyword selector");
249 NumArgs = nKeys;
250 // Fill in the trailing keyword array.
251 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
252 for (unsigned i = 0; i != nKeys; ++i)
253 KeyInfo[i] = IIV[i];
254 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000255 // getName - Derive the full selector name and return it.
256 std::string getName() const;
257
Steve Naroff29238a02007-10-05 18:42:47 +0000258 unsigned getNumArgs() const { return NumArgs; }
259
260 typedef IdentifierInfo *const *keyword_iterator;
261 keyword_iterator keyword_begin() const {
262 return reinterpret_cast<keyword_iterator>(this+1);
263 }
264 keyword_iterator keyword_end() const {
265 return keyword_begin()+NumArgs;
266 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000267 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
268 assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000269 return keyword_begin()[i];
270 }
271 static void Profile(llvm::FoldingSetNodeID &ID,
272 keyword_iterator ArgTys, unsigned NumArgs) {
273 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000274 for (unsigned i = 0; i != NumArgs; ++i)
275 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000276 }
277 void Profile(llvm::FoldingSetNodeID &ID) {
278 Profile(ID, keyword_begin(), NumArgs);
279 }
280};
Chris Lattner85994262007-10-05 20:15:24 +0000281} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000282
283unsigned Selector::getNumArgs() const {
284 unsigned IIF = getIdentifierInfoFlag();
285 if (IIF == ZeroArg)
286 return 0;
287 if (IIF == OneArg)
288 return 1;
289 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
290 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
291 return SI->getNumArgs();
292}
293
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000294IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
295 if (IdentifierInfo *II = getAsIdentifierInfo()) {
296 assert(argIndex == 0 && "illegal keyword index");
Steve Naroff29238a02007-10-05 18:42:47 +0000297 return II;
298 }
299 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
300 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
301 return SI->getIdentifierInfoForSlot(argIndex);
302}
303
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000304std::string MultiKeywordSelector::getName() const {
305 std::string Result;
306 unsigned Length = 0;
307 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
308 if (*I)
309 Length += (*I)->getLength();
310 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000311 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000312
313 Result.reserve(Length);
314
315 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
316 if (*I)
317 Result.insert(Result.end(), (*I)->getName(),
318 (*I)->getName()+(*I)->getLength());
319 Result.push_back(':');
320 }
321
322 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000323}
324
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000325std::string Selector::getName() const {
326 if (IdentifierInfo *II = getAsIdentifierInfo()) {
327 if (getNumArgs() == 0)
328 return II->getName();
329
330 std::string Res = II->getName();
331 Res += ":";
332 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000333 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000334
335 // We have a multiple keyword selector (no embedded flags).
336 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000337}
338
339
Chris Lattnerff384912007-10-07 02:00:24 +0000340Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
341 if (nKeys < 2)
342 return Selector(IIV[0], nKeys);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000343
Steve Naroff29238a02007-10-05 18:42:47 +0000344 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
345
346 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
347
348 // Unique selector, to guarantee there is one per name.
349 llvm::FoldingSetNodeID ID;
350 MultiKeywordSelector::Profile(ID, IIV, nKeys);
351
352 void *InsertPos = 0;
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000353 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000354 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000355
Steve Naroff29238a02007-10-05 18:42:47 +0000356 // MultiKeywordSelector objects are not allocated with new because they have a
357 // variable size array (for parameter types) at the end of them.
358 MultiKeywordSelector *SI =
359 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
360 nKeys*sizeof(IdentifierInfo *));
361 new (SI) MultiKeywordSelector(nKeys, IIV);
362 SelTab->InsertNode(SI, InsertPos);
363 return Selector(SI);
364}
365
Steve Naroff29238a02007-10-05 18:42:47 +0000366SelectorTable::SelectorTable() {
367 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
368}
369
370SelectorTable::~SelectorTable() {
371 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
372}
373
374