blob: cf78da986926042092e0e3ad8238a88b3079a222 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
Chris Lattnerc7229c32007-10-07 08:58:51 +000015#include "clang/Basic/IdentifierTable.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000016#include "clang/Basic/LangOptions.h"
Steve Naroff29238a02007-10-05 18:42:47 +000017#include "llvm/ADT/FoldingSet.h"
Chris Lattner85994262007-10-05 20:15:24 +000018#include "llvm/ADT/DenseMap.h"
Chris Lattner3daed522009-03-02 22:20:04 +000019#include <cstdio>
Ted Kremenekc637e6b2007-10-23 22:18:37 +000020
Reid Spencer5f016e22007-07-11 17:01:13 +000021using namespace clang;
22
23//===----------------------------------------------------------------------===//
24// IdentifierInfo Implementation
25//===----------------------------------------------------------------------===//
26
Ted Kremenekea9c26b2009-01-20 23:28:34 +000027IdentifierInfo::IdentifierInfo() {
Reid Spencer5f016e22007-07-11 17:01:13 +000028 TokenID = tok::identifier;
Douglas Gregor5142af32008-11-06 16:32:23 +000029 ObjCOrBuiltinID = 0;
Chris Lattner4365a7e2007-10-07 07:09:52 +000030 HasMacro = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000031 IsExtension = false;
32 IsPoisoned = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000033 IsCPPOperatorKeyword = false;
Chris Lattner6a170eb2009-01-21 07:43:11 +000034 NeedsHandleIdentifier = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000035 FETokenInfo = 0;
Ted Kremenekea9c26b2009-01-20 23:28:34 +000036 Entry = 0;
Reid Spencer5f016e22007-07-11 17:01:13 +000037}
38
Reid Spencer5f016e22007-07-11 17:01:13 +000039//===----------------------------------------------------------------------===//
40// IdentifierTable Implementation
41//===----------------------------------------------------------------------===//
42
Ted Kremenek72b1b152009-01-15 18:47:46 +000043IdentifierInfoLookup::~IdentifierInfoLookup() {}
44
Douglas Gregor8c5a7602009-04-25 23:30:02 +000045ExternalIdentifierLookup::~ExternalIdentifierLookup() {}
46
Ted Kremenek72b1b152009-01-15 18:47:46 +000047IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
48 IdentifierInfoLookup* externalLookup)
49 : HashTable(8192), // Start with space for 8K identifiers.
50 ExternalLookup(externalLookup) {
Reid Spencer5f016e22007-07-11 17:01:13 +000051
52 // Populate the identifier table with info about keywords for the current
53 // language.
54 AddKeywords(LangOpts);
55}
56
57//===----------------------------------------------------------------------===//
58// Language Keyword Implementation
59//===----------------------------------------------------------------------===//
60
Eli Friedmaneb32fde2009-04-28 03:13:54 +000061// Constants for TokenKinds.def
62namespace {
63 enum {
64 KEYALL = 1,
65 KEYC99 = 2,
66 KEYCXX = 4,
67 KEYCXX0X = 8,
68 KEYGNU = 16,
69 KEYMS = 32
70 };
71}
72
Reid Spencer5f016e22007-07-11 17:01:13 +000073/// AddKeyword - This method is used to associate a token ID with specific
74/// identifiers because they are language keywords. This causes the lexer to
75/// automatically map matching identifiers to specialized token codes.
76///
Chris Lattnerd4b80f12007-07-16 04:18:29 +000077/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
78/// enabled in the specified langauge, set to 1 if it is an extension
79/// in the specified language, and set to 2 if disabled in the
80/// specified language.
Reid Spencer5f016e22007-07-11 17:01:13 +000081static void AddKeyword(const char *Keyword, unsigned KWLen,
Eli Friedmaneb32fde2009-04-28 03:13:54 +000082 tok::TokenKind TokenCode, unsigned Flags,
Reid Spencer5f016e22007-07-11 17:01:13 +000083 const LangOptions &LangOpts, IdentifierTable &Table) {
Eli Friedmaneb32fde2009-04-28 03:13:54 +000084 unsigned AddResult = 0;
85 if (Flags & KEYALL) AddResult = 2;
86 else if (LangOpts.CPlusPlus && (Flags & KEYCXX)) AddResult = 2;
87 else if (LangOpts.CPlusPlus0x && (Flags & KEYCXX0X)) AddResult = 2;
88 else if (LangOpts.C99 && (Flags & KEYC99)) AddResult = 2;
89 else if (LangOpts.GNUMode && (Flags & KEYGNU)) AddResult = 1;
90 else if (LangOpts.Microsoft && (Flags & KEYMS)) AddResult = 1;
91
92 // Don't add this keyword if disabled in this language.
93 if (AddResult == 0) return;
94
Reid Spencer5f016e22007-07-11 17:01:13 +000095 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
96 Info.setTokenID(TokenCode);
Eli Friedmaneb32fde2009-04-28 03:13:54 +000097 Info.setIsExtensionToken(AddResult == 1);
Reid Spencer5f016e22007-07-11 17:01:13 +000098}
99
Reid Spencer5f016e22007-07-11 17:01:13 +0000100/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
101/// representations.
102static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
103 tok::TokenKind TokenCode,
104 IdentifierTable &Table) {
105 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
106 Info.setTokenID(TokenCode);
Ted Kremenekc637e6b2007-10-23 22:18:37 +0000107 Info.setIsCPlusPlusOperatorKeyword();
Reid Spencer5f016e22007-07-11 17:01:13 +0000108}
109
110/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
111/// "property".
112static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
113 const char *Name, unsigned NameLen,
114 IdentifierTable &Table) {
115 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
116}
117
118/// AddKeywords - Add all keywords to the symbol table.
119///
120void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000121 // Add keywords and tokens for the current language.
122#define KEYWORD(NAME, FLAGS) \
123 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
Eli Friedmaneb32fde2009-04-28 03:13:54 +0000124 FLAGS, LangOpts, *this);
125#define ALIAS(NAME, TOK, FLAGS) \
126 AddKeyword(NAME, strlen(NAME), tok::kw_ ## TOK, \
127 FLAGS, LangOpts, *this);
Reid Spencer5f016e22007-07-11 17:01:13 +0000128#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
129 if (LangOpts.CXXOperatorNames) \
130 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
131#define OBJC1_AT_KEYWORD(NAME) \
132 if (LangOpts.ObjC1) \
133 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
134#define OBJC2_AT_KEYWORD(NAME) \
135 if (LangOpts.ObjC2) \
136 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
137#include "clang/Basic/TokenKinds.def"
138}
139
Chris Lattner387b98d2007-10-07 07:52:34 +0000140tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
141 // We use a perfect hash function here involving the length of the keyword,
142 // the first and third character. For preprocessor ID's there are no
143 // collisions (if there were, the switch below would complain about duplicate
144 // case values). Note that this depends on 'if' being null terminated.
145
146#define HASH(LEN, FIRST, THIRD) \
147 (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
148#define CASE(LEN, FIRST, THIRD, NAME) \
149 case HASH(LEN, FIRST, THIRD): \
150 return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
151
152 unsigned Len = getLength();
Chris Lattnera31f0302007-10-10 20:59:57 +0000153 if (Len < 2) return tok::pp_not_keyword;
Chris Lattner387b98d2007-10-07 07:52:34 +0000154 const char *Name = getName();
155 switch (HASH(Len, Name[0], Name[2])) {
156 default: return tok::pp_not_keyword;
157 CASE( 2, 'i', '\0', if);
158 CASE( 4, 'e', 'i', elif);
159 CASE( 4, 'e', 's', else);
160 CASE( 4, 'l', 'n', line);
161 CASE( 4, 's', 'c', sccs);
162 CASE( 5, 'e', 'd', endif);
163 CASE( 5, 'e', 'r', error);
164 CASE( 5, 'i', 'e', ident);
165 CASE( 5, 'i', 'd', ifdef);
166 CASE( 5, 'u', 'd', undef);
167
168 CASE( 6, 'a', 's', assert);
169 CASE( 6, 'd', 'f', define);
170 CASE( 6, 'i', 'n', ifndef);
171 CASE( 6, 'i', 'p', import);
172 CASE( 6, 'p', 'a', pragma);
173
174 CASE( 7, 'd', 'f', defined);
175 CASE( 7, 'i', 'c', include);
176 CASE( 7, 'w', 'r', warning);
177
178 CASE( 8, 'u', 'a', unassert);
179 CASE(12, 'i', 'c', include_next);
Chris Lattnerb8e240e2009-04-08 18:24:34 +0000180
181 CASE(16, '_', 'i', __include_macros);
Chris Lattner387b98d2007-10-07 07:52:34 +0000182#undef CASE
183#undef HASH
184 }
185}
Reid Spencer5f016e22007-07-11 17:01:13 +0000186
187//===----------------------------------------------------------------------===//
188// Stats Implementation
189//===----------------------------------------------------------------------===//
190
191/// PrintStats - Print statistics about how well the identifier table is doing
192/// at hashing identifiers.
193void IdentifierTable::PrintStats() const {
194 unsigned NumBuckets = HashTable.getNumBuckets();
195 unsigned NumIdentifiers = HashTable.getNumItems();
196 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
197 unsigned AverageIdentifierSize = 0;
198 unsigned MaxIdentifierLength = 0;
199
200 // TODO: Figure out maximum times an identifier had to probe for -stats.
Ted Kremenekea9c26b2009-01-20 23:28:34 +0000201 for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
Reid Spencer5f016e22007-07-11 17:01:13 +0000202 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
203 unsigned IdLen = I->getKeyLength();
204 AverageIdentifierSize += IdLen;
205 if (MaxIdentifierLength < IdLen)
206 MaxIdentifierLength = IdLen;
207 }
208
209 fprintf(stderr, "\n*** Identifier Table Stats:\n");
210 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
211 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
212 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
213 NumIdentifiers/(double)NumBuckets);
214 fprintf(stderr, "Ave identifier length: %f\n",
215 (AverageIdentifierSize/(double)NumIdentifiers));
216 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
217
218 // Compute statistics about the memory allocated for identifiers.
219 HashTable.getAllocator().PrintStats();
220}
Steve Naroff68d331a2007-09-27 14:38:14 +0000221
Steve Naroff29238a02007-10-05 18:42:47 +0000222//===----------------------------------------------------------------------===//
223// SelectorTable Implementation
224//===----------------------------------------------------------------------===//
225
Chris Lattner85994262007-10-05 20:15:24 +0000226unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
227 return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
228}
229
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000230namespace clang {
Steve Naroff29238a02007-10-05 18:42:47 +0000231/// MultiKeywordSelector - One of these variable length records is kept for each
232/// selector containing more than one keyword. We use a folding set
233/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
234/// this class is provided strictly through Selector.
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000235class MultiKeywordSelector
236 : public DeclarationNameExtra, public llvm::FoldingSetNode {
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000237 MultiKeywordSelector(unsigned nKeys) {
238 ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
239 }
Steve Naroff29238a02007-10-05 18:42:47 +0000240public:
Steve Naroff29238a02007-10-05 18:42:47 +0000241 // Constructor for keyword selectors.
242 MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
243 assert((nKeys > 1) && "not a multi-keyword selector");
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000244 ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
Ted Kremenekbdbb2852007-11-30 22:46:56 +0000245
Steve Naroff29238a02007-10-05 18:42:47 +0000246 // Fill in the trailing keyword array.
247 IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
248 for (unsigned i = 0; i != nKeys; ++i)
249 KeyInfo[i] = IIV[i];
Ted Kremenekbdbb2852007-11-30 22:46:56 +0000250 }
251
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000252 // getName - Derive the full selector name and return it.
253 std::string getName() const;
254
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000255 unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; }
Steve Naroff29238a02007-10-05 18:42:47 +0000256
257 typedef IdentifierInfo *const *keyword_iterator;
258 keyword_iterator keyword_begin() const {
259 return reinterpret_cast<keyword_iterator>(this+1);
260 }
261 keyword_iterator keyword_end() const {
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000262 return keyword_begin()+getNumArgs();
Steve Naroff29238a02007-10-05 18:42:47 +0000263 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000264 IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000265 assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
Steve Naroff29238a02007-10-05 18:42:47 +0000266 return keyword_begin()[i];
267 }
268 static void Profile(llvm::FoldingSetNodeID &ID,
269 keyword_iterator ArgTys, unsigned NumArgs) {
270 ID.AddInteger(NumArgs);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000271 for (unsigned i = 0; i != NumArgs; ++i)
272 ID.AddPointer(ArgTys[i]);
Steve Naroff29238a02007-10-05 18:42:47 +0000273 }
274 void Profile(llvm::FoldingSetNodeID &ID) {
Douglas Gregor2e1cd422008-11-17 14:58:09 +0000275 Profile(ID, keyword_begin(), getNumArgs());
Steve Naroff29238a02007-10-05 18:42:47 +0000276 }
277};
Chris Lattner85994262007-10-05 20:15:24 +0000278} // end namespace clang.
Steve Naroff29238a02007-10-05 18:42:47 +0000279
280unsigned Selector::getNumArgs() const {
281 unsigned IIF = getIdentifierInfoFlag();
282 if (IIF == ZeroArg)
283 return 0;
284 if (IIF == OneArg)
285 return 1;
286 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
287 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
288 return SI->getNumArgs();
289}
290
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000291IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
Douglas Gregor405bad02009-04-26 22:20:50 +0000292 if (getIdentifierInfoFlag()) {
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000293 assert(argIndex == 0 && "illegal keyword index");
Douglas Gregor405bad02009-04-26 22:20:50 +0000294 return getAsIdentifierInfo();
Steve Naroff29238a02007-10-05 18:42:47 +0000295 }
296 // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
297 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
298 return SI->getIdentifierInfoForSlot(argIndex);
299}
300
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000301std::string MultiKeywordSelector::getName() const {
302 std::string Result;
303 unsigned Length = 0;
304 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
305 if (*I)
306 Length += (*I)->getLength();
307 ++Length; // :
Steve Naroff29238a02007-10-05 18:42:47 +0000308 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000309
310 Result.reserve(Length);
311
312 for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
313 if (*I)
314 Result.insert(Result.end(), (*I)->getName(),
315 (*I)->getName()+(*I)->getLength());
316 Result.push_back(':');
317 }
318
319 return Result;
Steve Naroff29238a02007-10-05 18:42:47 +0000320}
321
Chris Lattner077bf5e2008-11-24 03:33:13 +0000322std::string Selector::getAsString() const {
Douglas Gregor405bad02009-04-26 22:20:50 +0000323 if (InfoPtr == 0)
324 return "<null selector>";
325
Ted Kremenekf5ed3962009-03-06 23:36:28 +0000326 if (InfoPtr & ArgFlags) {
327 IdentifierInfo *II = getAsIdentifierInfo();
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000328
Ted Kremenek150ec292009-03-07 01:22:02 +0000329 // If the number of arguments is 0 then II is guaranteed to not be null.
Ted Kremenekf5ed3962009-03-06 23:36:28 +0000330 if (getNumArgs() == 0)
Ted Kremenek150ec292009-03-07 01:22:02 +0000331 return II->getName();
Ted Kremenekf5ed3962009-03-06 23:36:28 +0000332
333 std::string Res = II ? II->getName() : "";
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000334 Res += ":";
335 return Res;
Steve Naroff29238a02007-10-05 18:42:47 +0000336 }
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000337
338 // We have a multiple keyword selector (no embedded flags).
339 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
Steve Naroff29238a02007-10-05 18:42:47 +0000340}
341
342
Chris Lattner5f7d2282009-03-04 05:35:38 +0000343namespace {
344 struct SelectorTableImpl {
345 llvm::FoldingSet<MultiKeywordSelector> Table;
346 llvm::BumpPtrAllocator Allocator;
347 };
348} // end anonymous namespace.
349
350static SelectorTableImpl &getSelectorTableImpl(void *P) {
351 return *static_cast<SelectorTableImpl*>(P);
352}
353
354
Chris Lattnerff384912007-10-07 02:00:24 +0000355Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
356 if (nKeys < 2)
357 return Selector(IIV[0], nKeys);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000358
Chris Lattner5f7d2282009-03-04 05:35:38 +0000359 SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
Steve Naroff29238a02007-10-05 18:42:47 +0000360
361 // Unique selector, to guarantee there is one per name.
362 llvm::FoldingSetNodeID ID;
363 MultiKeywordSelector::Profile(ID, IIV, nKeys);
364
365 void *InsertPos = 0;
Chris Lattner5f7d2282009-03-04 05:35:38 +0000366 if (MultiKeywordSelector *SI =
367 SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
Steve Naroff29238a02007-10-05 18:42:47 +0000368 return Selector(SI);
Chris Lattnerf836e3f2007-10-07 01:33:16 +0000369
Steve Naroff29238a02007-10-05 18:42:47 +0000370 // MultiKeywordSelector objects are not allocated with new because they have a
371 // variable size array (for parameter types) at the end of them.
Chris Lattner5f7d2282009-03-04 05:35:38 +0000372 unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
373 MultiKeywordSelector *SI =
374 (MultiKeywordSelector*)SelTabImpl.Allocator.Allocate(Size,
375 llvm::alignof<MultiKeywordSelector>());
Steve Naroff29238a02007-10-05 18:42:47 +0000376 new (SI) MultiKeywordSelector(nKeys, IIV);
Chris Lattner5f7d2282009-03-04 05:35:38 +0000377 SelTabImpl.Table.InsertNode(SI, InsertPos);
Steve Naroff29238a02007-10-05 18:42:47 +0000378 return Selector(SI);
379}
380
Steve Naroff29238a02007-10-05 18:42:47 +0000381SelectorTable::SelectorTable() {
Chris Lattner5f7d2282009-03-04 05:35:38 +0000382 Impl = new SelectorTableImpl();
Steve Naroff29238a02007-10-05 18:42:47 +0000383}
384
385SelectorTable::~SelectorTable() {
Chris Lattner5f7d2282009-03-04 05:35:38 +0000386 delete &getSelectorTableImpl(Impl);
Steve Naroff29238a02007-10-05 18:42:47 +0000387}
388