blob: 760dcf4349815670efaa1ee0a4568dfa768e4258 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
Chris Lattnerc7229c32007-10-07 08:58:51 +000015#include "clang/Basic/IdentifierTable.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000016#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000017#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000018#include "llvm/ADT/DenseMap.h"
Ted Kremenekc637e6b2007-10-23 22:18:37 +000019#include "llvm/Bitcode/Serialization.h"
20
Reid Spencer5f016e22007-07-11 17:01:13 +000021using namespace clang;
22
23//===----------------------------------------------------------------------===//
24// IdentifierInfo Implementation
25//===----------------------------------------------------------------------===//
26
27IdentifierInfo::IdentifierInfo() {
Reid Spencer5f016e22007-07-11 17:01:13 +000028 TokenID = tok::identifier;
Reid Spencer5f016e22007-07-11 17:01:13 +000029 ObjCID = tok::objc_not_keyword;
30 BuiltinID = 0;
Chris Lattner4365a7e2007-10-07 07:09:52 +000031 HasMacro = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000032 IsExtension = false;
33 IsPoisoned = false;
34 IsOtherTargetMacro = false;
35 IsCPPOperatorKeyword = false;
Chris Lattner938867c2007-07-19 00:11:19 +000036 IsNonPortableBuiltin = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000037 FETokenInfo = 0;
38}
39
Reid Spencer5f016e22007-07-11 17:01:13 +000040//===----------------------------------------------------------------------===//
41// IdentifierTable Implementation
42//===----------------------------------------------------------------------===//
43
44IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
45 // Start with space for 8K identifiers.
46 : HashTable(8192) {
47
48 // Populate the identifier table with info about keywords for the current
49 // language.
50 AddKeywords(LangOpts);
51}
52
Ted Kremenekc637e6b2007-10-23 22:18:37 +000053// This cstor is intended to be used only for serialization.
54IdentifierTable::IdentifierTable() : HashTable(8192) {}
55
Reid Spencer5f016e22007-07-11 17:01:13 +000056//===----------------------------------------------------------------------===//
57// Language Keyword Implementation
58//===----------------------------------------------------------------------===//
59
60/// AddKeyword - This method is used to associate a token ID with specific
61/// identifiers because they are language keywords. This causes the lexer to
62/// automatically map matching identifiers to specialized token codes.
63///
Chris Lattnerd4b80f12007-07-16 04:18:29 +000064/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
65/// enabled in the specified langauge, set to 1 if it is an extension
66/// in the specified language, and set to 2 if disabled in the
67/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +000068static void AddKeyword(const char *Keyword, unsigned KWLen,
69 tok::TokenKind TokenCode,
Chris Lattnerd4b80f12007-07-16 04:18:29 +000070 int C90, int C99, int CXX, int CXX0x,
Reid Spencer5f016e22007-07-11 17:01:13 +000071 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnerd4b80f12007-07-16 04:18:29 +000072 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
73 : (LangOpts.C99 ? C99 : C90);
Reid Spencer5f016e22007-07-11 17:01:13 +000074
75 // Don't add this keyword if disabled in this language or if an extension
76 // and extensions are disabled.
77 if (Flags + LangOpts.NoExtensions >= 2) return;
78
79 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
80 Info.setTokenID(TokenCode);
81 Info.setIsExtensionToken(Flags == 1);
82}
83
84static void AddAlias(const char *Keyword, unsigned KWLen,
85 const char *AliaseeKeyword, unsigned AliaseeKWLen,
86 const LangOptions &LangOpts, IdentifierTable &Table) {
87 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
88 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
89 AliaseeKeyword+AliaseeKWLen);
90 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
91 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
92}
93
Reid Spencer5f016e22007-07-11 17:01:13 +000094/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
95/// representations.
96static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
97 tok::TokenKind TokenCode,
98 IdentifierTable &Table) {
99 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
100 Info.setTokenID(TokenCode);
Ted Kremenekc637e6b2007-10-23 22:18:37 +0000101 Info.setIsCPlusPlusOperatorKeyword();
Reid Spencer5f016e22007-07-11 17:01:13 +0000102}
103
104/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
105/// "property".
106static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
107 const char *Name, unsigned NameLen,
108 IdentifierTable &Table) {
109 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
110}
111
112/// AddKeywords - Add all keywords to the symbol table.
113///
114void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
115 enum {
116 C90Shift = 0,
117 EXTC90 = 1 << C90Shift,
118 NOTC90 = 2 << C90Shift,
119 C99Shift = 2,
120 EXTC99 = 1 << C99Shift,
121 NOTC99 = 2 << C99Shift,
122 CPPShift = 4,
123 EXTCPP = 1 << CPPShift,
124 NOTCPP = 2 << CPPShift,
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000125 CPP0xShift = 6,
126 EXTCPP0x = 1 << CPP0xShift,
127 NOTCPP0x = 2 << CPP0xShift,
Reid Spencer5f016e22007-07-11 17:01:13 +0000128 Mask = 3
129 };
130
131 // Add keywords and tokens for the current language.
132#define KEYWORD(NAME, FLAGS) \
133 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
134 ((FLAGS) >> C90Shift) & Mask, \
135 ((FLAGS) >> C99Shift) & Mask, \
Chris Lattnerd4b80f12007-07-16 04:18:29 +0000136 ((FLAGS) >> CPPShift) & Mask, \
137 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000138#define ALIAS(NAME, TOK) \
139 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000140#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
141 if (LangOpts.CXXOperatorNames) \
142 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
143#define OBJC1_AT_KEYWORD(NAME) \
144 if (LangOpts.ObjC1) \
145 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
146#define OBJC2_AT_KEYWORD(NAME) \
147 if (LangOpts.ObjC2) \
148 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
149#include "clang/Basic/TokenKinds.def"
150}
151
Chris Lattner387b98d2007-10-07 07:52:34 +0000152tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
153 // We use a perfect hash function here involving the length of the keyword,
154 // the first and third character. For preprocessor ID's there are no
155 // collisions (if there were, the switch below would complain about duplicate
156 // case values). Note that this depends on 'if' being null terminated.
157
158#define HASH(LEN, FIRST, THIRD) \
159 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
160#define CASE(LEN, FIRST, THIRD, NAME) \
161 case HASH(LEN, FIRST, THIRD): \
162 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
163
164 unsigned Len = getLength();
Chris Lattnera31f0302007-10-10 20:59:57 +0000165 if (Len < 2) return tok::pp_not_keyword;
Chris Lattner387b98d2007-10-07 07:52:34 +0000166 const char *Name = getName();
167 switch (HASH(Len, Name[0], Name[2])) {
168 default: return tok::pp_not_keyword;
169 CASE( 2, 'i', '\0', if);
170 CASE( 4, 'e', 'i', elif);
171 CASE( 4, 'e', 's', else);
172 CASE( 4, 'l', 'n', line);
173 CASE( 4, 's', 'c', sccs);
174 CASE( 5, 'e', 'd', endif);
175 CASE( 5, 'e', 'r', error);
176 CASE( 5, 'i', 'e', ident);
177 CASE( 5, 'i', 'd', ifdef);
178 CASE( 5, 'u', 'd', undef);
179
180 CASE( 6, 'a', 's', assert);
181 CASE( 6, 'd', 'f', define);
182 CASE( 6, 'i', 'n', ifndef);
183 CASE( 6, 'i', 'p', import);
184 CASE( 6, 'p', 'a', pragma);
185
186 CASE( 7, 'd', 'f', defined);
187 CASE( 7, 'i', 'c', include);
188 CASE( 7, 'w', 'r', warning);
189
190 CASE( 8, 'u', 'a', unassert);
191 CASE(12, 'i', 'c', include_next);
192 CASE(13, 'd', 'f', define_target);
193 CASE(19, 'd', 'f', define_other_target);
194#undef CASE
195#undef HASH
196 }
197}
Reid Spencer5f016e22007-07-11 17:01:13 +0000198
199//===----------------------------------------------------------------------===//
200// Stats Implementation
201//===----------------------------------------------------------------------===//
202
203/// PrintStats - Print statistics about how well the identifier table is doing
204/// at hashing identifiers.
205void IdentifierTable::PrintStats() const {
206 unsigned NumBuckets = HashTable.getNumBuckets();
207 unsigned NumIdentifiers = HashTable.getNumItems();
208 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
209 unsigned AverageIdentifierSize = 0;
210 unsigned MaxIdentifierLength = 0;
211
212 // TODO: Figure out maximum times an identifier had to probe for -stats.
213 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
214 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
215 unsigned IdLen = I->getKeyLength();
216 AverageIdentifierSize += IdLen;
217 if (MaxIdentifierLength < IdLen)
218 MaxIdentifierLength = IdLen;
219 }
220
221 fprintf(stderr, "\n*** Identifier Table Stats:\n");
222 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
223 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
224 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
225 NumIdentifiers/(double)NumBuckets);
226 fprintf(stderr, "Ave identifier length: %f\n",
227 (AverageIdentifierSize/(double)NumIdentifiers));
228 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
229
230 // Compute statistics about the memory allocated for identifiers.
231 HashTable.getAllocator().PrintStats();
232}
Steve Naroff68d331a2007-09-27 14:38:14 +0000233
Steve Naroff29238a02007-10-05 18:42:47 +0000234//===----------------------------------------------------------------------===//
235// SelectorTable Implementation
236//===----------------------------------------------------------------------===//
237
Chris Lattner85994262007-10-05 20:15:24 +0000238unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
239 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
240}
241
242
Steve Naroff29238a02007-10-05 18:42:47 +0000243/// MultiKeywordSelector - One of these variable length records is kept for each
244/// selector containing more than one keyword. We use a folding set
245/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
246/// this class is provided strictly through Selector.
Chris Lattner85994262007-10-05 20:15:24 +0000247namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000248class MultiKeywordSelector : public llvm::FoldingSetNode {
249public:
250 unsigned NumArgs;
251
252 // Constructor for keyword selectors.
253 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
254 assert((nKeys > 1) && "not a multi-keyword selector");
255 NumArgs = nKeys;
256 // Fill in the trailing keyword array.
257 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
258 for (unsigned i = 0; i != nKeys; ++i)
259 KeyInfo[i] = IIV[i];
260 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000261 // getName - Derive the full selector name and return it.
262 std::string getName() const;
263
Steve Naroff29238a02007-10-05 18:42:47 +0000264 unsigned getNumArgs() const { return NumArgs; }
265
266 typedef IdentifierInfo *const *keyword_iterator;
267 keyword_iterator keyword_begin() const {
268 return reinterpret_cast<keyword_iterator>(this+1);
269 }
270 keyword_iterator keyword_end() const {
271 return keyword_begin()+NumArgs;
272 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000273 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
274 assert(i < NumArgs && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000275 return keyword_begin()[i];
276 }
277 static void Profile(llvm::FoldingSetNodeID &ID,
278 keyword_iterator ArgTys, unsigned NumArgs) {
279 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000280 for (unsigned i = 0; i != NumArgs; ++i)
281 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000282 }
283 void Profile(llvm::FoldingSetNodeID &ID) {
284 Profile(ID, keyword_begin(), NumArgs);
285 }
286};
Chris Lattner85994262007-10-05 20:15:24 +0000287} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000288
289unsigned Selector::getNumArgs() const {
290 unsigned IIF = getIdentifierInfoFlag();
291 if (IIF == ZeroArg)
292 return 0;
293 if (IIF == OneArg)
294 return 1;
295 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
296 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
297 return SI->getNumArgs();
298}
299
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000300IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
301 if (IdentifierInfo *II = getAsIdentifierInfo()) {
302 assert(argIndex == 0 && "illegal keyword index");
Steve Naroff29238a02007-10-05 18:42:47 +0000303 return II;
304 }
305 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
306 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
307 return SI->getIdentifierInfoForSlot(argIndex);
308}
309
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000310std::string MultiKeywordSelector::getName() const {
311 std::string Result;
312 unsigned Length = 0;
313 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
314 if (*I)
315 Length += (*I)->getLength();
316 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000317 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000318
319 Result.reserve(Length);
320
321 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
322 if (*I)
323 Result.insert(Result.end(), (*I)->getName(),
324 (*I)->getName()+(*I)->getLength());
325 Result.push_back(':');
326 }
327
328 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000329}
330
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000331std::string Selector::getName() const {
332 if (IdentifierInfo *II = getAsIdentifierInfo()) {
333 if (getNumArgs() == 0)
334 return II->getName();
335
336 std::string Res = II->getName();
337 Res += ":";
338 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000339 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000340
341 // We have a multiple keyword selector (no embedded flags).
342 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000343}
344
345
Chris Lattnerff384912007-10-07 02:00:24 +0000346Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
347 if (nKeys < 2)
348 return Selector(IIV[0], nKeys);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000349
Steve Naroff29238a02007-10-05 18:42:47 +0000350 llvm::FoldingSet<MultiKeywordSelector> *SelTab;
351
352 SelTab = static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
353
354 // Unique selector, to guarantee there is one per name.
355 llvm::FoldingSetNodeID ID;
356 MultiKeywordSelector::Profile(ID, IIV, nKeys);
357
358 void *InsertPos = 0;
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000359 if (MultiKeywordSelector *SI = SelTab->FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000360 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000361
Steve Naroff29238a02007-10-05 18:42:47 +0000362 // MultiKeywordSelector objects are not allocated with new because they have a
363 // variable size array (for parameter types) at the end of them.
364 MultiKeywordSelector *SI =
365 (MultiKeywordSelector*)malloc(sizeof(MultiKeywordSelector) +
366 nKeys*sizeof(IdentifierInfo *));
367 new (SI) MultiKeywordSelector(nKeys, IIV);
368 SelTab->InsertNode(SI, InsertPos);
369 return Selector(SI);
370}
371
Steve Naroff29238a02007-10-05 18:42:47 +0000372SelectorTable::SelectorTable() {
373 Impl = new llvm::FoldingSet<MultiKeywordSelector>;
374}
375
376SelectorTable::~SelectorTable() {
377 delete static_cast<llvm::FoldingSet<MultiKeywordSelector> *>(Impl);
378}
379
Ted Kremenekc637e6b2007-10-23 22:18:37 +0000380//===----------------------------------------------------------------------===//
381// Serialization for IdentifierInfo and IdentifierTable.
382//===----------------------------------------------------------------------===//
383
384void llvm::SerializeTrait<IdentifierInfo>::Serialize(llvm::Serializer& S,
385 const IdentifierInfo& I) {
386
387 S.Emit<tok::TokenKind>(I.getTokenID());
388 S.EmitInt(I.getBuiltinID(),9);
389 S.Emit<tok::ObjCKeywordKind>(I.getObjCKeywordID());
390 S.Emit(I.hasMacroDefinition());
391 S.Emit(I.isExtensionToken());
392 S.Emit(I.isPoisoned());
393 S.Emit(I.isOtherTargetMacro());
394 S.Emit(I.isCPlusPlusOperatorKeyword());
395 S.Emit(I.isNonPortableBuiltin());
396}
397
398void llvm::SerializeTrait<IdentifierInfo>::Deserialize(llvm::Deserializer& D,
399 IdentifierInfo& I) {
400 tok::TokenKind X;
401 I.setTokenID(D.Read<tok::TokenKind>(X));
402
403 I.setBuiltinID(D.ReadInt(9));
404
405 tok::ObjCKeywordKind Y;
406 I.setObjCKeywordID(D.Read<tok::ObjCKeywordKind>(Y));
407
408 I.setHasMacroDefinition(D.ReadBool());
409 I.setIsExtensionToken(D.ReadBool());
410 I.setIsPoisoned(D.ReadBool());
411 I.setIsOtherTargetMacro(D.ReadBool());
412 I.setIsCPlusPlusOperatorKeyword(D.ReadBool());
413 I.setNonPortableBuiltin(D.ReadBool());
414}
415
416void llvm::SerializeTrait<IdentifierTable>::Serialize(llvm::Serializer& S,
417 const IdentifierTable& T){
418 S.Emit<unsigned>(T.size());
419
420 for (clang::IdentifierTable::iterator I=T.begin(), E=T.end(); I != E; ++I) {
421 S.EmitCString(I->getKeyData());
422 S.Emit(I->getValue());
423 }
424}
425
426void llvm::SerializeTrait<IdentifierTable>::Deserialize(llvm::Deserializer& D,
427 IdentifierTable& T) {
428 unsigned len = D.ReadInt();
429 std::vector<char> buff;
430 buff.reserve(200);
431
432 for (unsigned i = 0; i < len; ++i) {
433 D.ReadCString(buff);
434 IdentifierInfo& Info = T.get(&buff[0],&buff[0]+buff.size());
435 D.Read(Info);
436 }
437}
438
Steve Naroff29238a02007-10-05 18:42:47 +0000439