blob: 9a03d3f72d063e8c81e193a35418327fa108fea7 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Basic/LangOptions.h"
18using namespace clang;
19
20//===----------------------------------------------------------------------===//
Steve Naroff87c329f2007-08-23 18:16:40 +000021// Token Implementation
22//===----------------------------------------------------------------------===//
23
24/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
25bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
26 return getKind() == tok::identifier &&
27 getIdentifierInfo()->getObjCKeywordID() == objcKey;
28}
29
30/// getObjCKeywordID - Return the ObjC keyword kind.
31tok::ObjCKeywordKind Token::getObjCKeywordID() const {
32 IdentifierInfo *specId = getIdentifierInfo();
33 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
34}
35
Steve Naroff6cb1d362007-09-28 22:22:11 +000036char *MultiKeywordSelector::getName(llvm::SmallVectorImpl<char> &methodName) {
Steve Naroff4ed9d662007-09-27 14:38:14 +000037 methodName[0] = '\0';
Steve Naroff6cb1d362007-09-28 22:22:11 +000038 keyword_iterator KeyIter = keyword_begin();
39 for (unsigned int i = 0; i < NumArgs; i++) {
40 if (KeyIter[i]) {
41 unsigned KeyLen = strlen(KeyIter[i]->getName());
42 methodName.append(KeyIter[i]->getName(), KeyIter[i]->getName()+KeyLen);
Steve Naroff4ed9d662007-09-27 14:38:14 +000043 }
Steve Naroff6cb1d362007-09-28 22:22:11 +000044 methodName.push_back(':');
Steve Naroff4ed9d662007-09-27 14:38:14 +000045 }
Steve Naroff6cb1d362007-09-28 22:22:11 +000046 methodName.push_back('\0');
47 return &methodName[0];
48}
49
50char *Selector::getName(llvm::SmallVectorImpl<char> &methodName) {
51 methodName[0] = '\0';
52 IdentifierInfo *II = getAsIdentifierInfo();
53 if (II) {
54 unsigned NameLen = strlen(II->getName());
55 methodName.append(II->getName(), II->getName()+NameLen);
56 if (getNumArgs() == 1)
57 methodName.push_back(':');
58 methodName.push_back('\0');
59 } else { // We have a multiple keyword selector (no embedded flags).
60 MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
61 SI->getName(methodName);
62 }
Steve Naroff4ed9d662007-09-27 14:38:14 +000063 return &methodName[0];
64}
65
Steve Naroff87c329f2007-08-23 18:16:40 +000066//===----------------------------------------------------------------------===//
Chris Lattner4b009652007-07-25 00:24:17 +000067// IdentifierInfo Implementation
68//===----------------------------------------------------------------------===//
69
70IdentifierInfo::IdentifierInfo() {
71 Macro = 0;
72 TokenID = tok::identifier;
73 PPID = tok::pp_not_keyword;
74 ObjCID = tok::objc_not_keyword;
75 BuiltinID = 0;
76 IsExtension = false;
77 IsPoisoned = false;
78 IsOtherTargetMacro = false;
79 IsCPPOperatorKeyword = false;
80 IsNonPortableBuiltin = false;
81 FETokenInfo = 0;
82}
83
84IdentifierInfo::~IdentifierInfo() {
85 delete Macro;
86}
87
88//===----------------------------------------------------------------------===//
89// IdentifierTable Implementation
90//===----------------------------------------------------------------------===//
91
92IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
93 // Start with space for 8K identifiers.
94 : HashTable(8192) {
95
96 // Populate the identifier table with info about keywords for the current
97 // language.
98 AddKeywords(LangOpts);
99}
100
101//===----------------------------------------------------------------------===//
102// Language Keyword Implementation
103//===----------------------------------------------------------------------===//
104
105/// AddKeyword - This method is used to associate a token ID with specific
106/// identifiers because they are language keywords. This causes the lexer to
107/// automatically map matching identifiers to specialized token codes.
108///
109/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
110/// enabled in the specified langauge, set to 1 if it is an extension
111/// in the specified language, and set to 2 if disabled in the
112/// specified language.
113static void AddKeyword(const char *Keyword, unsigned KWLen,
114 tok::TokenKind TokenCode,
115 int C90, int C99, int CXX, int CXX0x,
116 const LangOptions &LangOpts, IdentifierTable &Table) {
117 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
118 : (LangOpts.C99 ? C99 : C90);
119
120 // Don't add this keyword if disabled in this language or if an extension
121 // and extensions are disabled.
122 if (Flags + LangOpts.NoExtensions >= 2) return;
123
124 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
125 Info.setTokenID(TokenCode);
126 Info.setIsExtensionToken(Flags == 1);
127}
128
129static void AddAlias(const char *Keyword, unsigned KWLen,
130 const char *AliaseeKeyword, unsigned AliaseeKWLen,
131 const LangOptions &LangOpts, IdentifierTable &Table) {
132 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
133 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
134 AliaseeKeyword+AliaseeKWLen);
135 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
136 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
137}
138
139/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
140/// "elif".
141static void AddPPKeyword(tok::PPKeywordKind PPID,
142 const char *Name, unsigned NameLen,
143 IdentifierTable &Table) {
144 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
145}
146
147/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
148/// representations.
149static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
150 tok::TokenKind TokenCode,
151 IdentifierTable &Table) {
152 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
153 Info.setTokenID(TokenCode);
154 Info.setIsCPlusplusOperatorKeyword();
155}
156
157/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
158/// "property".
159static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
160 const char *Name, unsigned NameLen,
161 IdentifierTable &Table) {
162 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
163}
164
165/// AddKeywords - Add all keywords to the symbol table.
166///
167void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
168 enum {
169 C90Shift = 0,
170 EXTC90 = 1 << C90Shift,
171 NOTC90 = 2 << C90Shift,
172 C99Shift = 2,
173 EXTC99 = 1 << C99Shift,
174 NOTC99 = 2 << C99Shift,
175 CPPShift = 4,
176 EXTCPP = 1 << CPPShift,
177 NOTCPP = 2 << CPPShift,
178 CPP0xShift = 6,
179 EXTCPP0x = 1 << CPP0xShift,
180 NOTCPP0x = 2 << CPP0xShift,
181 Mask = 3
182 };
183
184 // Add keywords and tokens for the current language.
185#define KEYWORD(NAME, FLAGS) \
186 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
187 ((FLAGS) >> C90Shift) & Mask, \
188 ((FLAGS) >> C99Shift) & Mask, \
189 ((FLAGS) >> CPPShift) & Mask, \
190 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
191#define ALIAS(NAME, TOK) \
192 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
193#define PPKEYWORD(NAME) \
194 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
195#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
196 if (LangOpts.CXXOperatorNames) \
197 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
198#define OBJC1_AT_KEYWORD(NAME) \
199 if (LangOpts.ObjC1) \
200 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
201#define OBJC2_AT_KEYWORD(NAME) \
202 if (LangOpts.ObjC2) \
203 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
204#include "clang/Basic/TokenKinds.def"
205}
206
207
208//===----------------------------------------------------------------------===//
209// Stats Implementation
210//===----------------------------------------------------------------------===//
211
212/// PrintStats - Print statistics about how well the identifier table is doing
213/// at hashing identifiers.
214void IdentifierTable::PrintStats() const {
215 unsigned NumBuckets = HashTable.getNumBuckets();
216 unsigned NumIdentifiers = HashTable.getNumItems();
217 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
218 unsigned AverageIdentifierSize = 0;
219 unsigned MaxIdentifierLength = 0;
220
221 // TODO: Figure out maximum times an identifier had to probe for -stats.
222 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
223 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
224 unsigned IdLen = I->getKeyLength();
225 AverageIdentifierSize += IdLen;
226 if (MaxIdentifierLength < IdLen)
227 MaxIdentifierLength = IdLen;
228 }
229
230 fprintf(stderr, "\n*** Identifier Table Stats:\n");
231 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
232 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
233 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
234 NumIdentifiers/(double)NumBuckets);
235 fprintf(stderr, "Ave identifier length: %f\n",
236 (AverageIdentifierSize/(double)NumIdentifiers));
237 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
238
239 // Compute statistics about the memory allocated for identifiers.
240 HashTable.getAllocator().PrintStats();
241}
Steve Naroff4ed9d662007-09-27 14:38:14 +0000242