blob: 7c2681615fc2afb745350bd588e09d3cbe0f694e [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
Chris Lattnerc7229c32007-10-07 08:58:51 +000015#include "clang/Basic/IdentifierTable.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000016#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000017#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000018#include "llvm/ADT/DenseMap.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000019using namespace clang;
20
21//===----------------------------------------------------------------------===//
22// IdentifierInfo Implementation
23//===----------------------------------------------------------------------===//
24
25IdentifierInfo::IdentifierInfo() {
Reid Spencer5f016e22007-07-11 17:01:13 +000026 TokenID = tok::identifier;
Reid Spencer5f016e22007-07-11 17:01:13 +000027 ObjCID = tok::objc_not_keyword;
28 BuiltinID = 0;
Chris Lattner4365a7e2007-10-07 07:09:52 +000029 HasMacro = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000030 IsExtension = false;
31 IsPoisoned = false;
32 IsOtherTargetMacro = false;
33 IsCPPOperatorKeyword = false;
Chris Lattner938867c2007-07-19 00:11:19 +000034 IsNonPortableBuiltin = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000035 FETokenInfo = 0;
36}
37
Reid Spencer5f016e22007-07-11 17:01:13 +000038//===----------------------------------------------------------------------===//
39// IdentifierTable Implementation
40//===----------------------------------------------------------------------===//
41
42IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
43 // Start with space for 8K identifiers.
44 : HashTable(8192) {
45
46 // Populate the identifier table with info about keywords for the current
47 // language.
48 AddKeywords(LangOpts);
49}
50
51//===----------------------------------------------------------------------===//
52// Language Keyword Implementation
53//===----------------------------------------------------------------------===//
54
55/// AddKeyword - This method is used to associate a token ID with specific
56/// identifiers because they are language keywords. This causes the lexer to
57/// automatically map matching identifiers to specialized token codes.
58///
Chris Lattnerd4b80f12007-07-16 04:18:29 +000059/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
60/// enabled in the specified langauge, set to 1 if it is an extension
61/// in the specified language, and set to 2 if disabled in the
62/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +000063static void AddKeyword(const char *Keyword, unsigned KWLen,
64 tok::TokenKind TokenCode,
Chris Lattnerd4b80f12007-07-16 04:18:29 +000065 int C90, int C99, int CXX, int CXX0x,
Reid Spencer5f016e22007-07-11 17:01:13 +000066 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnerd4b80f12007-07-16 04:18:29 +000067 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
68 : (LangOpts.C99 ? C99 : C90);
Reid Spencer5f016e22007-07-11 17:01:13 +000069
70 // Don't add this keyword if disabled in this language or if an extension
71 // and extensions are disabled.
72 if (Flags + LangOpts.NoExtensions >= 2) return;
73
74 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
75 Info.setTokenID(TokenCode);
76 Info.setIsExtensionToken(Flags == 1);
77}
78
79static void AddAlias(const char *Keyword, unsigned KWLen,
80 const char *AliaseeKeyword, unsigned AliaseeKWLen,
81 const LangOptions &LangOpts, IdentifierTable &Table) {
82 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
83 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
84 AliaseeKeyword+AliaseeKWLen);
85 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
86 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
87}
88
Reid Spencer5f016e22007-07-11 17:01:13 +000089/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
90/// representations.
91static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
92 tok::TokenKind TokenCode,
93 IdentifierTable &Table) {
94 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
95 Info.setTokenID(TokenCode);
96 Info.setIsCPlusplusOperatorKeyword();
97}
98
99/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
100/// "property".
101static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
102 const char *Name, unsigned NameLen,
103 IdentifierTable &Table) {
104 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
105}
106
107/// AddKeywords - Add all keywords to the symbol table.
108///
109void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
110 enum {
111 C90Shift = 0,
112 EXTC90 = 1 << C90Shift,
113 NOTC90 = 2 << C90Shift,
114 C99Shift = 2,
115 EXTC99 = 1 << C99Shift,
116 NOTC99 = 2 << C99Shift,
117 CPPShift = 4,
118 EXTCPP = 1 << CPPShift,
119 NOTCPP = 2 << CPPShift,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000120 CPP0xShift = 6,
121 EXTCPP0x = 1 << CPP0xShift,
122 NOTCPP0x = 2 << CPP0xShift,
Reid Spencer5f016e22007-07-11 17:01:13 +0000123 Mask = 3
124 };
125
126 // Add keywords and tokens for the current language.
127#define KEYWORD(NAME, FLAGS) \
128 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
129 ((FLAGS) >> C90Shift) & Mask, \
130 ((FLAGS) >> C99Shift) & Mask, \
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000131 ((FLAGS) >> CPPShift) & Mask, \
132 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000133#define ALIAS(NAME, TOK) \
134 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000135#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
136 if (LangOpts.CXXOperatorNames) \
137 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
138#define OBJC1_AT_KEYWORD(NAME) \
139 if (LangOpts.ObjC1) \
140 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
141#define OBJC2_AT_KEYWORD(NAME) \
142 if (LangOpts.ObjC2) \
143 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
144#include "clang/Basic/TokenKinds.def"
145}
146
Chris Lattner387b98d2007-10-07 07:52:34 +0000147tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
148 // We use a perfect hash function here involving the length of the keyword,
149 // the first and third character. For preprocessor ID's there are no
150 // collisions (if there were, the switch below would complain about duplicate
151 // case values). Note that this depends on 'if' being null terminated.
152
153#define HASH(LEN, FIRST, THIRD) \
154 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
155#define CASE(LEN, FIRST, THIRD, NAME) \
156 case HASH(LEN, FIRST, THIRD): \
157 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
158
159 unsigned Len = getLength();
Chris Lattnera31f0302007-10-10 20:59:57 +0000160 if (Len < 2) return tok::pp_not_keyword;
Chris Lattner387b98d2007-10-07 07:52:34 +0000161 const char *Name = getName();
162 switch (HASH(Len, Name[0], Name[2])) {
163 default: return tok::pp_not_keyword;
164 CASE( 2, 'i', '\0', if);
165 CASE( 4, 'e', 'i', elif);
166 CASE( 4, 'e', 's', else);
167 CASE( 4, 'l', 'n', line);
168 CASE( 4, 's', 'c', sccs);
169 CASE( 5, 'e', 'd', endif);
170 CASE( 5, 'e', 'r', error);
171 CASE( 5, 'i', 'e', ident);
172 CASE( 5, 'i', 'd', ifdef);
173 CASE( 5, 'u', 'd', undef);
174
175 CASE( 6, 'a', 's', assert);
176 CASE( 6, 'd', 'f', define);
177 CASE( 6, 'i', 'n', ifndef);
178 CASE( 6, 'i', 'p', import);
179 CASE( 6, 'p', 'a', pragma);
180
181 CASE( 7, 'd', 'f', defined);
182 CASE( 7, 'i', 'c', include);
183 CASE( 7, 'w', 'r', warning);
184
185 CASE( 8, 'u', 'a', unassert);
186 CASE(12, 'i', 'c', include_next);
187 CASE(13, 'd', 'f', define_target);
188 CASE(19, 'd', 'f', define_other_target);
189#undef CASE
190#undef HASH
191 }
192}
Reid Spencer5f016e22007-07-11 17:01:13 +0000193
194//===----------------------------------------------------------------------===//
195// Stats Implementation
196//===----------------------------------------------------------------------===//
197
198/// PrintStats - Print statistics about how well the identifier table is doing
199/// at hashing identifiers.
200void IdentifierTable::PrintStats() const {
201 unsigned NumBuckets = HashTable.getNumBuckets();
202 unsigned NumIdentifiers = HashTable.getNumItems();
203 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
204 unsigned AverageIdentifierSize = 0;
205 unsigned MaxIdentifierLength = 0;
206
207 // TODO: Figure out maximum times an identifier had to probe for -stats.
208 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
209 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
210 unsigned IdLen = I->getKeyLength();
211 AverageIdentifierSize += IdLen;
212 if (MaxIdentifierLength < IdLen)
213 MaxIdentifierLength = IdLen;
214 }
215
216 fprintf(stderr, "\n*** Identifier Table Stats:\n");
217 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
218 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
219 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
220 NumIdentifiers/(double)NumBuckets);
221 fprintf(stderr, "Ave identifier length: %f\n",
222 (AverageIdentifierSize/(double)NumIdentifiers));
223 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
224
225 // Compute statistics about the memory allocated for identifiers.
226 HashTable.getAllocator().PrintStats();
227}
Steve Naroff68d331a2007-09-27 14:38:14 +0000228
Steve Naroff29238a02007-10-05 18:42:47 +0000229//===----------------------------------------------------------------------===//
230// SelectorTable Implementation
231//===----------------------------------------------------------------------===//
232
Chris Lattner85994262007-10-05 20:15:24 +0000233unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
234 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
235}
236
237
Steve Naroff29238a02007-10-05 18:42:47 +0000238/// MultiKeywordSelector - One of these variable length records is kept for each
239/// selector containing more than one keyword. We use a folding set
240/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
241/// this class is provided strictly through Selector.
Chris Lattner85994262007-10-05 20:15:24 +0000242namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000243class MultiKeywordSelector : public llvm::FoldingSetNode {
244public:
245 unsigned NumArgs;
246
247 // Constructor for keyword selectors.
248 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
249 assert((nKeys > 1) && "not a multi-keyword selector");
250 NumArgs = nKeys;
251 // Fill in the trailing keyword array.
252 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
253 for (unsigned i = 0; i != nKeys; ++i)
254 KeyInfo[i] = IIV[i];
255 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000256 // getName - Derive the full selector name and return it.
257 std::string getName() const;
258
Steve Naroff29238a02007-10-05 18:42:47 +0000259 unsigned getNumArgs() const { return NumArgs; }
260
261 typedef IdentifierInfo *const *keyword_iterator;
262 keyword_iterator keyword_begin() const {
263 return reinterpret_cast<keyword_iterator>(this+1);
264 }
265 keyword_iterator keyword_end() const {
266 return keyword_begin()+NumArgs;
267 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000268 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
269 assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000270 return keyword_begin()[i];
271 }
272 static void Profile(llvm::FoldingSetNodeID &ID,
273 keyword_iterator ArgTys, unsigned NumArgs) {
274 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000275 for (unsigned i = 0; i != NumArgs; ++i)
276 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000277 }
278 void Profile(llvm::FoldingSetNodeID &ID) {
279 Profile(ID, keyword_begin(), NumArgs);
280 }
281};
Chris Lattner85994262007-10-05 20:15:24 +0000282} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000283
284unsigned Selector::getNumArgs() const {
285 unsigned IIF = getIdentifierInfoFlag();
286 if (IIF == ZeroArg)
287 return 0;
288 if (IIF == OneArg)
289 return 1;
290 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
291 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
292 return SI->getNumArgs();
293}
294
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000295IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
296 if (IdentifierInfo *II = getAsIdentifierInfo()) {
297 assert(argIndex == 0 && "illegal keyword index");
Steve Naroff29238a02007-10-05 18:42:47 +0000298 return II;
299 }
300 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
301 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
302 return SI->getIdentifierInfoForSlot(argIndex);
303}
304
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000305std::string MultiKeywordSelector::getName() const {
306 std::string Result;
307 unsigned Length = 0;
308 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
309 if (*I)
310 Length += (*I)->getLength();
311 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000312 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000313
314 Result.reserve(Length);
315
316 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
317 if (*I)
318 Result.insert(Result.end(), (*I)->getName(),
319 (*I)->getName()+(*I)->getLength());
320 Result.push_back(':');
321 }
322
323 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000324}
325
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000326std::string Selector::getName() const {
327 if (IdentifierInfo *II = getAsIdentifierInfo()) {
328 if (getNumArgs() == 0)
329 return II->getName();
330
331 std::string Res = II->getName();
332 Res += ":";
333 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000334 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000335
336 // We have a multiple keyword selector (no embedded flags).
337 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000338}
339
340
Chris Lattnerff384912007-10-07 02:00:24 +0000341Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
342 if (nKeys < 2)
343 return Selector(IIV[0], nKeys);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000344
Steve Naroff29238a02007-10-05 18:42:47 +0000345 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
346
347 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
348
349 // Unique selector, to guarantee there is one per name.
350 llvm::FoldingSetNodeID ID;
351 MultiKeywordSelector::Profile(ID, IIV, nKeys);
352
353 void *InsertPos = 0;
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000354 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000355 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000356
Steve Naroff29238a02007-10-05 18:42:47 +0000357 // MultiKeywordSelector objects are not allocated with new because they have a
358 // variable size array (for parameter types) at the end of them.
359 MultiKeywordSelector *SI =
360 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
361 nKeys*sizeof(IdentifierInfo *));
362 new (SI) MultiKeywordSelector(nKeys, IIV);
363 SelTab->InsertNode(SI, InsertPos);
364 return Selector(SI);
365}
366
Steve Naroff29238a02007-10-05 18:42:47 +0000367SelectorTable::SelectorTable() {
368 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
369}
370
371SelectorTable::~SelectorTable() {
372 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
373}
374
375