blob: fa48bccd150c5b973106ae53a3a44e0cb01697c2 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000010// This file implements the IdentifierInfo, IdentifierVisitor, and
Chris Lattner91cbf112006-07-03 04:28:52 +000011// IdentifierTable interfaces.
Chris Lattner22eb9722006-06-18 05:43:12 +000012//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
Chris Lattner25e0d542006-10-18 06:07:05 +000017#include "clang/Basic/LangOptions.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000018#include <iostream>
19using namespace llvm;
20using namespace clang;
21
22//===----------------------------------------------------------------------===//
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000023// IdentifierInfo Implementation
Chris Lattner22eb9722006-06-18 05:43:12 +000024//===----------------------------------------------------------------------===//
25
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000026void IdentifierInfo::Destroy() {
Chris Lattner22eb9722006-06-18 05:43:12 +000027 delete Macro;
28}
29
Chris Lattner91cbf112006-07-03 04:28:52 +000030//===----------------------------------------------------------------------===//
31// IdentifierVisitor Implementation
32//===----------------------------------------------------------------------===//
33
34IdentifierVisitor::~IdentifierVisitor() {
35}
Chris Lattner22eb9722006-06-18 05:43:12 +000036
37//===----------------------------------------------------------------------===//
38// Memory Allocation Support
39//===----------------------------------------------------------------------===//
40
41/// The identifier table has a very simple memory allocation pattern: it just
42/// keeps allocating identifiers, then never frees them unless it frees them
43/// all. As such, we use a simple bump-pointer memory allocator to make
44/// allocation speedy. Shark showed that malloc was 27% of the time spent in
45/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
46#define USE_ALLOCATOR 1
47#if USE_ALLOCATOR
48
49namespace {
50class MemRegion {
51 unsigned RegionSize;
52 MemRegion *Next;
53 char *NextPtr;
54public:
55 void Init(unsigned size, MemRegion *next) {
56 RegionSize = size;
57 Next = next;
58 NextPtr = (char*)(this+1);
59
60 // FIXME: uses GCC extension.
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000061 unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +000062 NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
63 ~(intptr_t)(Alignment-1));
64 }
65
66 const MemRegion *getNext() const { return Next; }
67 unsigned getNumBytesAllocated() const {
68 return NextPtr-(const char*)this;
69 }
70
71 /// Allocate - Allocate and return at least the specified number of bytes.
72 ///
73 void *Allocate(unsigned AllocSize, MemRegion **RegPtr) {
74 // FIXME: uses GCC extension.
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000075 unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +000076 // Round size up to an even multiple of the alignment.
77 AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
78
79 // If there is space in this region for the identifier, return it.
80 if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
81 void *Result = NextPtr;
82 NextPtr += AllocSize;
83 return Result;
84 }
85
86 // Otherwise, we have to allocate a new chunk. Create one twice as big as
87 // this one.
88 MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
89 NewRegion->Init(RegionSize*2, this);
90
91 // Update the current "first region" pointer to point to the new region.
92 *RegPtr = NewRegion;
93
94 // Try allocating from it now.
95 return NewRegion->Allocate(AllocSize, RegPtr);
96 }
97
98 /// Deallocate - Release all memory for this region to the system.
99 ///
100 void Deallocate() {
101 MemRegion *next = Next;
102 free(this);
103 if (next)
104 next->Deallocate();
105 }
106};
107}
108
109#endif
110
111//===----------------------------------------------------------------------===//
112// IdentifierTable Implementation
113//===----------------------------------------------------------------------===//
114
115
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000116/// IdentifierLink - There is one of these allocated by IdentifierInfo.
Chris Lattner22eb9722006-06-18 05:43:12 +0000117/// These form the linked list of buckets for the hash table.
118struct IdentifierBucket {
119 /// Next - This is the next bucket in the linked list.
120 IdentifierBucket *Next;
121
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000122 IdentifierInfo TokInfo;
Chris Lattner22eb9722006-06-18 05:43:12 +0000123 // NOTE: TokInfo must be the last element in this structure, as the string
124 // information for the identifier is allocated right after it.
125};
126
127// FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3.
Chris Lattner3c98fd32006-10-22 17:48:27 +0000128/// HASH_TABLE_SIZE - The current size of the hash table. Note that this must
129/// always be a power of two!
Chris Lattner0ba3dc42006-10-25 03:38:23 +0000130static unsigned HASH_TABLE_SIZE = 8096*4;
Chris Lattner22eb9722006-06-18 05:43:12 +0000131
Chris Lattner25e0d542006-10-18 06:07:05 +0000132IdentifierTable::IdentifierTable(const LangOptions &LangOpts) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000133 IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE]();
134 TheTable = TableArray;
135 NumIdentifiers = 0;
136#if USE_ALLOCATOR
137 TheMemory = malloc(8*4096);
138 ((MemRegion*)TheMemory)->Init(8*4096, 0);
139#endif
140
141 memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*));
Chris Lattner25e0d542006-10-18 06:07:05 +0000142
143 AddKeywords(LangOpts);
Chris Lattner22eb9722006-06-18 05:43:12 +0000144}
145
146IdentifierTable::~IdentifierTable() {
147 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
148 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
149 IdentifierBucket *Id = TableArray[i];
150 while (Id) {
151 // Free memory referenced by the identifier (e.g. macro info).
152 Id->TokInfo.Destroy();
153
154 IdentifierBucket *Next = Id->Next;
155#if !USE_ALLOCATOR
156 free(Id);
157#endif
158 Id = Next;
159 }
160 }
161#if USE_ALLOCATOR
162 ((MemRegion*)TheMemory)->Deallocate();
163#endif
164 delete [] TableArray;
165}
166
167/// HashString - Compute a hash code for the specified string.
168///
169static unsigned HashString(const char *Start, const char *End) {
170 unsigned int Result = 0;
171 // Perl hash function.
172 while (Start != End)
173 Result = Result * 33 + *Start++;
174 Result = Result + (Result >> 5);
175 return Result;
176}
177
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000178IdentifierInfo &IdentifierTable::get(const char *NameStart,
Chris Lattner0e1cf1f2006-07-04 18:53:52 +0000179 const char *NameEnd) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000180 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
181
Chris Lattner893f2722006-10-26 05:12:31 +0000182 unsigned FullHash = HashString(NameStart, NameEnd);
183 unsigned Hash = FullHash & (HASH_TABLE_SIZE-1);
Chris Lattner22eb9722006-06-18 05:43:12 +0000184 unsigned Length = NameEnd-NameStart;
185
186 IdentifierBucket *IdentHead = TableArray[Hash];
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000187 for (IdentifierBucket *Identifier = IdentHead, *LastID = 0; Identifier;
188 LastID = Identifier, Identifier = Identifier->Next) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000189 if (Identifier->TokInfo.getNameLength() == Length &&
Chris Lattner341fd062006-10-26 05:18:38 +0000190 Identifier->TokInfo.HashValue == FullHash &&
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000191 memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0) {
192 // If found identifier wasn't at start of bucket, move it there so
193 // that frequently searched for identifiers are found earlier, even if
194 // they first occur late in the source file.
195 if (LastID) {
196 LastID->Next = Identifier->Next;
197 Identifier->Next = IdentHead;
198 TableArray[Hash] = Identifier;
199 }
200
Chris Lattner22eb9722006-06-18 05:43:12 +0000201 return Identifier->TokInfo;
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000202 }
Chris Lattner22eb9722006-06-18 05:43:12 +0000203 }
204
205 // Allocate a new identifier, with space for the null-terminated string at the
206 // end.
207 unsigned AllocSize = sizeof(IdentifierBucket)+Length+1;
208#if USE_ALLOCATOR
209 IdentifierBucket *Identifier = (IdentifierBucket*)
210 ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory);
211#else
212 IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize);
213#endif
214 Identifier->TokInfo.NameLen = Length;
215 Identifier->TokInfo.Macro = 0;
216 Identifier->TokInfo.TokenID = tok::identifier;
Chris Lattner87d3bec2006-10-17 03:44:32 +0000217 Identifier->TokInfo.PPID = tok::pp_not_keyword;
Chris Lattner720f2702006-10-17 04:03:44 +0000218 Identifier->TokInfo.ObjCID = tok::objc_not_keyword;
Chris Lattner22eb9722006-06-18 05:43:12 +0000219 Identifier->TokInfo.IsExtension = false;
Chris Lattner17862172006-06-24 22:12:56 +0000220 Identifier->TokInfo.IsPoisoned = false;
Chris Lattner063400e2006-10-14 19:54:15 +0000221 Identifier->TokInfo.IsOtherTargetMacro = false;
Chris Lattner22eb9722006-06-18 05:43:12 +0000222 Identifier->TokInfo.FETokenInfo = 0;
Chris Lattner893f2722006-10-26 05:12:31 +0000223 Identifier->TokInfo.HashValue = FullHash;
Chris Lattner22eb9722006-06-18 05:43:12 +0000224
225 // Copy the string information.
226 char *StrBuffer = (char*)(Identifier+1);
227 memcpy(StrBuffer, NameStart, Length);
228 StrBuffer[Length] = 0; // Null terminate string.
229
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000230 // Link it into the hash table. Adding it to the start of the hash table is
231 // useful for buckets with lots of entries. This means that more recently
232 // referenced identifiers will be near the head of the bucket.
Chris Lattner22eb9722006-06-18 05:43:12 +0000233 Identifier->Next = IdentHead;
234 TableArray[Hash] = Identifier;
235 return Identifier->TokInfo;
236}
237
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000238IdentifierInfo &IdentifierTable::get(const std::string &Name) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000239 // Don't use c_str() here: no need to be null terminated.
240 const char *NameBytes = &Name[0];
241 unsigned Size = Name.size();
242 return get(NameBytes, NameBytes+Size);
243}
244
Chris Lattner91cbf112006-07-03 04:28:52 +0000245/// VisitIdentifiers - This method walks through all of the identifiers,
246/// invoking IV->VisitIdentifier for each of them.
247void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
248 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
249 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
250 for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next)
251 IV.VisitIdentifier(Id->TokInfo);
252 }
253}
Chris Lattner22eb9722006-06-18 05:43:12 +0000254
Chris Lattner25e0d542006-10-18 06:07:05 +0000255//===----------------------------------------------------------------------===//
256// Language Keyword Implementation
257//===----------------------------------------------------------------------===//
258
259/// AddKeyword - This method is used to associate a token ID with specific
260/// identifiers because they are language keywords. This causes the lexer to
261/// automatically map matching identifiers to specialized token codes.
262///
263/// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
264/// specified langauge, set to 1 if it is an extension in the specified
265/// language, and set to 2 if disabled in the specified language.
266static void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
Chris Lattnera4271e42006-10-20 06:13:26 +0000267 int C90, int C99, int CXX,
Chris Lattner25e0d542006-10-18 06:07:05 +0000268 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnera4271e42006-10-20 06:13:26 +0000269 int Flags = LangOpts.CPlusPlus ? CXX : (LangOpts.C99 ? C99 : C90);
Chris Lattner25e0d542006-10-18 06:07:05 +0000270
271 // Don't add this keyword if disabled in this language or if an extension
272 // and extensions are disabled.
273 if (Flags + LangOpts.NoExtensions >= 2) return;
274
275 const char *Str = &Keyword[0];
276 IdentifierInfo &Info = Table.get(Str, Str+Keyword.size());
277 Info.setTokenID(TokenCode);
278 Info.setIsExtensionToken(Flags == 1);
279}
280
281/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
282/// "elif".
283static void AddPPKeyword(tok::PPKeywordKind PPID,
284 const char *Name, unsigned NameLen,
285 IdentifierTable &Table) {
286 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
287}
288
289/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
290/// "property".
291static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
292 const char *Name, unsigned NameLen,
293 IdentifierTable &Table) {
294 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
295}
296
297/// AddKeywords - Add all keywords to the symbol table.
298///
299void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
300 enum {
301 C90Shift = 0,
302 EXTC90 = 1 << C90Shift,
303 NOTC90 = 2 << C90Shift,
304 C99Shift = 2,
305 EXTC99 = 1 << C99Shift,
306 NOTC99 = 2 << C99Shift,
307 CPPShift = 4,
308 EXTCPP = 1 << CPPShift,
309 NOTCPP = 2 << CPPShift,
310 Mask = 3
311 };
312
313 // Add keywords and tokens for the current language.
314#define KEYWORD(NAME, FLAGS) \
315 AddKeyword(#NAME, tok::kw_ ## NAME, \
316 ((FLAGS) >> C90Shift) & Mask, \
317 ((FLAGS) >> C99Shift) & Mask, \
318 ((FLAGS) >> CPPShift) & Mask, LangOpts, *this);
319#define ALIAS(NAME, TOK) \
320 AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
321#define PPKEYWORD(NAME) \
322 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
323#define OBJC1_AT_KEYWORD(NAME) \
324 if (LangOpts.ObjC1) \
325 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
326#define OBJC2_AT_KEYWORD(NAME) \
327 if (LangOpts.ObjC2) \
328 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
329#include "clang/Basic/TokenKinds.def"
330}
331
332
333//===----------------------------------------------------------------------===//
334// Stats Implementation
335//===----------------------------------------------------------------------===//
336
Chris Lattner22eb9722006-06-18 05:43:12 +0000337/// PrintStats - Print statistics about how well the identifier table is doing
338/// at hashing identifiers.
339void IdentifierTable::PrintStats() const {
340 unsigned NumIdentifiers = 0;
341 unsigned NumEmptyBuckets = 0;
342 unsigned MaxBucketLength = 0;
343 unsigned AverageIdentifierSize = 0;
344 unsigned MaxIdentifierLength = 0;
345
346 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
347 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
348
349 unsigned NumIdentifiersInBucket = 0;
350 for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
351 AverageIdentifierSize += Id->TokInfo.getNameLength();
352 if (MaxIdentifierLength < Id->TokInfo.getNameLength())
353 MaxIdentifierLength = Id->TokInfo.getNameLength();
354 ++NumIdentifiersInBucket;
355 }
Chris Lattner893f2722006-10-26 05:12:31 +0000356 if (NumIdentifiersInBucket > MaxBucketLength) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000357 MaxBucketLength = NumIdentifiersInBucket;
Chris Lattner893f2722006-10-26 05:12:31 +0000358
359#if 0 // This code can be enabled to see (with -stats) a sample of some of the
360 // longest buckets in the hash table. Useful for inspecting density of
361 // buckets etc.
362 std::cerr << "Bucket length " << MaxBucketLength << ":\n";
363 for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
364 std::cerr << " " << Id->TokInfo.getName() << " hash = "
365 << Id->TokInfo.HashValue << "\n";
366 }
367#endif
368 }
Chris Lattner22eb9722006-06-18 05:43:12 +0000369 if (NumIdentifiersInBucket == 0)
370 ++NumEmptyBuckets;
371
372 NumIdentifiers += NumIdentifiersInBucket;
373 }
374
375 std::cerr << "\n*** Identifier Table Stats:\n";
376 std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
377 std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
378 std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n";
379 std::cerr << "Hash density (#identifiers per bucket): "
380 << NumIdentifiers/(double)HASH_TABLE_SIZE << "\n";
381 std::cerr << "Nonempty hash density (average chain length): "
382 << NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n";
383 std::cerr << "Ave identifier length: "
384 << (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
385 std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
386
387 // Compute statistics about the memory allocated for identifiers.
388#if USE_ALLOCATOR
389 unsigned BytesUsed = 0;
390 unsigned NumRegions = 0;
391 const MemRegion *R = (MemRegion*)TheMemory;
392 for (; R; R = R->getNext(), ++NumRegions) {
393 BytesUsed += R->getNumBytesAllocated();
394 }
395 std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
396 std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
397#endif
398}
399
400