blob: de68435549498525e15a42bd603dd1a5ea142f3f [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include "clang/Basic/LangOptions.h"
18using namespace clang;
19
20//===----------------------------------------------------------------------===//
Steve Naroff87c329f2007-08-23 18:16:40 +000021// Token Implementation
22//===----------------------------------------------------------------------===//
23
24/// isObjCAtKeyword - Return true if we have an ObjC keyword identifier.
25bool Token::isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const {
26 return getKind() == tok::identifier &&
27 getIdentifierInfo()->getObjCKeywordID() == objcKey;
28}
29
30/// getObjCKeywordID - Return the ObjC keyword kind.
31tok::ObjCKeywordKind Token::getObjCKeywordID() const {
32 IdentifierInfo *specId = getIdentifierInfo();
33 return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword;
34}
35
Steve Naroffe41de352007-09-27 18:52:21 +000036char *SelectorInfo::getName(llvm::SmallVectorImpl<char> &methodName) {
Steve Naroff4ed9d662007-09-27 14:38:14 +000037 int len=0;
38 methodName[0] = '\0';
39 if (NumArgs) {
40 keyword_iterator KeyIter = keyword_begin();
41 for (unsigned int i = 0; i < NumArgs; i++) {
42 if (KeyIter[i]) {
Steve Naroffe41de352007-09-27 18:52:21 +000043 unsigned KeyLen = strlen(KeyIter[i]->getName());
44 methodName.append(KeyIter[i]->getName(), KeyIter[i]->getName()+KeyLen);
45 len += KeyLen;
Steve Naroff4ed9d662007-09-27 14:38:14 +000046 }
Steve Naroffe41de352007-09-27 18:52:21 +000047 methodName.push_back(':');
Steve Naroff4ed9d662007-09-27 14:38:14 +000048 len++;
49 }
50 } else {
51 IdentifierInfo **UnaryInfo = reinterpret_cast<IdentifierInfo **>(this+1);
Steve Naroffe41de352007-09-27 18:52:21 +000052 unsigned NameLen = strlen(UnaryInfo[0]->getName());
53 methodName.append(UnaryInfo[0]->getName(), UnaryInfo[0]->getName()+NameLen);
54 len += NameLen;
Steve Naroff4ed9d662007-09-27 14:38:14 +000055 }
56 methodName[len] = '\0';
57 return &methodName[0];
58}
59
Steve Naroff87c329f2007-08-23 18:16:40 +000060//===----------------------------------------------------------------------===//
Chris Lattner4b009652007-07-25 00:24:17 +000061// IdentifierInfo Implementation
62//===----------------------------------------------------------------------===//
63
64IdentifierInfo::IdentifierInfo() {
65 Macro = 0;
66 TokenID = tok::identifier;
67 PPID = tok::pp_not_keyword;
68 ObjCID = tok::objc_not_keyword;
69 BuiltinID = 0;
70 IsExtension = false;
71 IsPoisoned = false;
72 IsOtherTargetMacro = false;
73 IsCPPOperatorKeyword = false;
74 IsNonPortableBuiltin = false;
75 FETokenInfo = 0;
76}
77
78IdentifierInfo::~IdentifierInfo() {
79 delete Macro;
80}
81
82//===----------------------------------------------------------------------===//
83// IdentifierTable Implementation
84//===----------------------------------------------------------------------===//
85
86IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
87 // Start with space for 8K identifiers.
88 : HashTable(8192) {
89
90 // Populate the identifier table with info about keywords for the current
91 // language.
92 AddKeywords(LangOpts);
93}
94
95//===----------------------------------------------------------------------===//
96// Language Keyword Implementation
97//===----------------------------------------------------------------------===//
98
99/// AddKeyword - This method is used to associate a token ID with specific
100/// identifiers because they are language keywords. This causes the lexer to
101/// automatically map matching identifiers to specialized token codes.
102///
103/// The C90/C99/CPP/CPP0x flags are set to 0 if the token should be
104/// enabled in the specified langauge, set to 1 if it is an extension
105/// in the specified language, and set to 2 if disabled in the
106/// specified language.
107static void AddKeyword(const char *Keyword, unsigned KWLen,
108 tok::TokenKind TokenCode,
109 int C90, int C99, int CXX, int CXX0x,
110 const LangOptions &LangOpts, IdentifierTable &Table) {
111 int Flags = LangOpts.CPlusPlus ? (LangOpts.CPlusPlus0x? CXX0x : CXX)
112 : (LangOpts.C99 ? C99 : C90);
113
114 // Don't add this keyword if disabled in this language or if an extension
115 // and extensions are disabled.
116 if (Flags + LangOpts.NoExtensions >= 2) return;
117
118 IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
119 Info.setTokenID(TokenCode);
120 Info.setIsExtensionToken(Flags == 1);
121}
122
123static void AddAlias(const char *Keyword, unsigned KWLen,
124 const char *AliaseeKeyword, unsigned AliaseeKWLen,
125 const LangOptions &LangOpts, IdentifierTable &Table) {
126 IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen);
127 IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword,
128 AliaseeKeyword+AliaseeKWLen);
129 AliasInfo.setTokenID(AliaseeInfo.getTokenID());
130 AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken());
131}
132
133/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
134/// "elif".
135static void AddPPKeyword(tok::PPKeywordKind PPID,
136 const char *Name, unsigned NameLen,
137 IdentifierTable &Table) {
138 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
139}
140
141/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
142/// representations.
143static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen,
144 tok::TokenKind TokenCode,
145 IdentifierTable &Table) {
146 IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen);
147 Info.setTokenID(TokenCode);
148 Info.setIsCPlusplusOperatorKeyword();
149}
150
151/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
152/// "property".
153static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
154 const char *Name, unsigned NameLen,
155 IdentifierTable &Table) {
156 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
157}
158
159/// AddKeywords - Add all keywords to the symbol table.
160///
161void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
162 enum {
163 C90Shift = 0,
164 EXTC90 = 1 << C90Shift,
165 NOTC90 = 2 << C90Shift,
166 C99Shift = 2,
167 EXTC99 = 1 << C99Shift,
168 NOTC99 = 2 << C99Shift,
169 CPPShift = 4,
170 EXTCPP = 1 << CPPShift,
171 NOTCPP = 2 << CPPShift,
172 CPP0xShift = 6,
173 EXTCPP0x = 1 << CPP0xShift,
174 NOTCPP0x = 2 << CPP0xShift,
175 Mask = 3
176 };
177
178 // Add keywords and tokens for the current language.
179#define KEYWORD(NAME, FLAGS) \
180 AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \
181 ((FLAGS) >> C90Shift) & Mask, \
182 ((FLAGS) >> C99Shift) & Mask, \
183 ((FLAGS) >> CPPShift) & Mask, \
184 ((FLAGS) >> CPP0xShift) & Mask, LangOpts, *this);
185#define ALIAS(NAME, TOK) \
186 AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this);
187#define PPKEYWORD(NAME) \
188 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
189#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
190 if (LangOpts.CXXOperatorNames) \
191 AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this);
192#define OBJC1_AT_KEYWORD(NAME) \
193 if (LangOpts.ObjC1) \
194 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
195#define OBJC2_AT_KEYWORD(NAME) \
196 if (LangOpts.ObjC2) \
197 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
198#include "clang/Basic/TokenKinds.def"
199}
200
201
202//===----------------------------------------------------------------------===//
203// Stats Implementation
204//===----------------------------------------------------------------------===//
205
206/// PrintStats - Print statistics about how well the identifier table is doing
207/// at hashing identifiers.
208void IdentifierTable::PrintStats() const {
209 unsigned NumBuckets = HashTable.getNumBuckets();
210 unsigned NumIdentifiers = HashTable.getNumItems();
211 unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
212 unsigned AverageIdentifierSize = 0;
213 unsigned MaxIdentifierLength = 0;
214
215 // TODO: Figure out maximum times an identifier had to probe for -stats.
216 for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator
217 I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
218 unsigned IdLen = I->getKeyLength();
219 AverageIdentifierSize += IdLen;
220 if (MaxIdentifierLength < IdLen)
221 MaxIdentifierLength = IdLen;
222 }
223
224 fprintf(stderr, "\n*** Identifier Table Stats:\n");
225 fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
226 fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
227 fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
228 NumIdentifiers/(double)NumBuckets);
229 fprintf(stderr, "Ave identifier length: %f\n",
230 (AverageIdentifierSize/(double)NumIdentifiers));
231 fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
232
233 // Compute statistics about the memory allocated for identifiers.
234 HashTable.getAllocator().PrintStats();
235}
Steve Naroff4ed9d662007-09-27 14:38:14 +0000236