blob: 99c43422ebbc7600986ed3e7b4c26ccb20edf204 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000010// This file implements the IdentifierInfo, IdentifierVisitor, and
Chris Lattner91cbf112006-07-03 04:28:52 +000011// IdentifierTable interfaces.
Chris Lattner22eb9722006-06-18 05:43:12 +000012//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
Chris Lattner25e0d542006-10-18 06:07:05 +000017#include "clang/Basic/LangOptions.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000018#include <iostream>
19using namespace llvm;
20using namespace clang;
21
22//===----------------------------------------------------------------------===//
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000023// IdentifierInfo Implementation
Chris Lattner22eb9722006-06-18 05:43:12 +000024//===----------------------------------------------------------------------===//
25
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000026void IdentifierInfo::Destroy() {
Chris Lattner22eb9722006-06-18 05:43:12 +000027 delete Macro;
28}
29
Chris Lattner91cbf112006-07-03 04:28:52 +000030//===----------------------------------------------------------------------===//
31// IdentifierVisitor Implementation
32//===----------------------------------------------------------------------===//
33
34IdentifierVisitor::~IdentifierVisitor() {
35}
Chris Lattner22eb9722006-06-18 05:43:12 +000036
37//===----------------------------------------------------------------------===//
38// Memory Allocation Support
39//===----------------------------------------------------------------------===//
40
41/// The identifier table has a very simple memory allocation pattern: it just
42/// keeps allocating identifiers, then never frees them unless it frees them
43/// all. As such, we use a simple bump-pointer memory allocator to make
44/// allocation speedy. Shark showed that malloc was 27% of the time spent in
45/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
46#define USE_ALLOCATOR 1
47#if USE_ALLOCATOR
48
49namespace {
50class MemRegion {
51 unsigned RegionSize;
52 MemRegion *Next;
53 char *NextPtr;
54public:
55 void Init(unsigned size, MemRegion *next) {
56 RegionSize = size;
57 Next = next;
58 NextPtr = (char*)(this+1);
59
60 // FIXME: uses GCC extension.
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000061 unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +000062 NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
63 ~(intptr_t)(Alignment-1));
64 }
65
66 const MemRegion *getNext() const { return Next; }
67 unsigned getNumBytesAllocated() const {
68 return NextPtr-(const char*)this;
69 }
70
71 /// Allocate - Allocate and return at least the specified number of bytes.
72 ///
73 void *Allocate(unsigned AllocSize, MemRegion **RegPtr) {
74 // FIXME: uses GCC extension.
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000075 unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +000076 // Round size up to an even multiple of the alignment.
77 AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
78
79 // If there is space in this region for the identifier, return it.
80 if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
81 void *Result = NextPtr;
82 NextPtr += AllocSize;
83 return Result;
84 }
85
86 // Otherwise, we have to allocate a new chunk. Create one twice as big as
87 // this one.
88 MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
89 NewRegion->Init(RegionSize*2, this);
90
91 // Update the current "first region" pointer to point to the new region.
92 *RegPtr = NewRegion;
93
94 // Try allocating from it now.
95 return NewRegion->Allocate(AllocSize, RegPtr);
96 }
97
98 /// Deallocate - Release all memory for this region to the system.
99 ///
100 void Deallocate() {
101 MemRegion *next = Next;
102 free(this);
103 if (next)
104 next->Deallocate();
105 }
106};
107}
108
109#endif
110
111//===----------------------------------------------------------------------===//
112// IdentifierTable Implementation
113//===----------------------------------------------------------------------===//
114
115
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000116/// IdentifierBucket - The hash table consists of an array of these. If Info is
117/// non-null, this is an extant entry, otherwise, it is a hole.
Chris Lattner22eb9722006-06-18 05:43:12 +0000118struct IdentifierBucket {
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000119 /// FullHashValue - This remembers the full hash value of the identifier for
120 /// easy scanning.
121 unsigned FullHashValue;
Chris Lattner22eb9722006-06-18 05:43:12 +0000122
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000123 /// Info - This is a pointer to the actual identifier info object.
124 IdentifierInfo *Info;
Chris Lattner22eb9722006-06-18 05:43:12 +0000125};
126
Chris Lattner25e0d542006-10-18 06:07:05 +0000127IdentifierTable::IdentifierTable(const LangOptions &LangOpts) {
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000128 HashTableSize = 8192; // Start with space for 8K identifiers.
129 IdentifierBucket *TableArray = new IdentifierBucket[HashTableSize]();
130 memset(TableArray, 0, HashTableSize*sizeof(IdentifierBucket));
131
Chris Lattner22eb9722006-06-18 05:43:12 +0000132 TheTable = TableArray;
133 NumIdentifiers = 0;
134#if USE_ALLOCATOR
135 TheMemory = malloc(8*4096);
136 ((MemRegion*)TheMemory)->Init(8*4096, 0);
137#endif
138
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000139 // Populate the identifier table with info about keywords for the current
140 // language.
Chris Lattner25e0d542006-10-18 06:07:05 +0000141 AddKeywords(LangOpts);
Chris Lattner22eb9722006-06-18 05:43:12 +0000142}
143
144IdentifierTable::~IdentifierTable() {
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000145 IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
146 for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
147 if (IdentifierInfo *Id = TableArray[i].Info) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000148 // Free memory referenced by the identifier (e.g. macro info).
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000149 Id->Destroy();
Chris Lattner22eb9722006-06-18 05:43:12 +0000150
Chris Lattner22eb9722006-06-18 05:43:12 +0000151#if !USE_ALLOCATOR
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000152 // Free the memory for the identifier itself.
Chris Lattner22eb9722006-06-18 05:43:12 +0000153 free(Id);
154#endif
Chris Lattner22eb9722006-06-18 05:43:12 +0000155 }
156 }
157#if USE_ALLOCATOR
158 ((MemRegion*)TheMemory)->Deallocate();
159#endif
160 delete [] TableArray;
161}
162
163/// HashString - Compute a hash code for the specified string.
164///
165static unsigned HashString(const char *Start, const char *End) {
166 unsigned int Result = 0;
167 // Perl hash function.
168 while (Start != End)
169 Result = Result * 33 + *Start++;
170 Result = Result + (Result >> 5);
171 return Result;
172}
173
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000174IdentifierInfo &IdentifierTable::get(const char *NameStart,
Chris Lattner0e1cf1f2006-07-04 18:53:52 +0000175 const char *NameEnd) {
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000176 IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
Chris Lattner22eb9722006-06-18 05:43:12 +0000177
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000178 unsigned HTSize = HashTableSize;
179 unsigned FullHashValue = HashString(NameStart, NameEnd);
180 unsigned BucketNo = FullHashValue & (HTSize-1);
Chris Lattner22eb9722006-06-18 05:43:12 +0000181 unsigned Length = NameEnd-NameStart;
182
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000183 unsigned ProbeAmt = 1;
184 while (1) {
185 IdentifierBucket &Bucket = TableArray[BucketNo];
186 IdentifierInfo *BucketII = Bucket.Info;
187 // If we found an empty bucket, this identifier isn't in the table yet.
188 if (BucketII == 0) break;
Chris Lattner22eb9722006-06-18 05:43:12 +0000189
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000190 // If the full hash value matches, check deeply for a match. The common
191 // case here is that we are only looking at the buckets (for identifier info
192 // being non-null and for the full hash value) not at the identifiers. This
193 // is important for cache locality.
194 if (Bucket.FullHashValue == FullHashValue &&
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000195 memcmp(BucketII->getName(), NameStart, Length) == 0)
196 // We found a match!
197 return *BucketII;
198
199 // Okay, we didn't find the identifier. Probe to the next bucket.
200 BucketNo = (BucketNo+ProbeAmt) & (HashTableSize-1);
201
202 // Use quadratic probing, it has fewer clumping artifacts than linear
203 // probing and has good cache behavior in the common case.
204 ++ProbeAmt;
205 }
206
207 // Okay, the identifier doesn't already exist, and BucketNo is the bucket to
208 // fill in. Allocate a new identifier with space for the null-terminated
209 // string at the end.
210 unsigned AllocSize = sizeof(IdentifierInfo)+Length+1;
Chris Lattner22eb9722006-06-18 05:43:12 +0000211#if USE_ALLOCATOR
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000212 IdentifierInfo *Identifier = (IdentifierInfo*)
Chris Lattner22eb9722006-06-18 05:43:12 +0000213 ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory);
214#else
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000215 IdentifierInfo *Identifier = (IdentifierInfo*)malloc(AllocSize);
Chris Lattner22eb9722006-06-18 05:43:12 +0000216#endif
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000217 Identifier->Macro = 0;
218 Identifier->TokenID = tok::identifier;
219 Identifier->PPID = tok::pp_not_keyword;
220 Identifier->ObjCID = tok::objc_not_keyword;
221 Identifier->IsExtension = false;
222 Identifier->IsPoisoned = false;
223 Identifier->IsOtherTargetMacro = false;
224 Identifier->FETokenInfo = 0;
225 ++NumIdentifiers;
Chris Lattner22eb9722006-06-18 05:43:12 +0000226
227 // Copy the string information.
228 char *StrBuffer = (char*)(Identifier+1);
229 memcpy(StrBuffer, NameStart, Length);
230 StrBuffer[Length] = 0; // Null terminate string.
231
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000232 // Fill in the bucket for the hash table.
233 TableArray[BucketNo].Info = Identifier;
234 TableArray[BucketNo].FullHashValue = FullHashValue;
235
236 // If the hash table is now more than 3/4 full, rehash into a larger table.
237 if (NumIdentifiers > HashTableSize*3/4)
238 RehashTable();
239
240 return *Identifier;
Chris Lattner22eb9722006-06-18 05:43:12 +0000241}
242
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000243IdentifierInfo &IdentifierTable::get(const std::string &Name) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000244 // Don't use c_str() here: no need to be null terminated.
245 const char *NameBytes = &Name[0];
246 unsigned Size = Name.size();
247 return get(NameBytes, NameBytes+Size);
248}
249
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000250void IdentifierTable::RehashTable() {
251 unsigned NewSize = HashTableSize*2;
252 IdentifierBucket *NewTableArray = new IdentifierBucket[NewSize]();
253 memset(NewTableArray, 0, NewSize*sizeof(IdentifierBucket));
254
255 // Rehash all the identifier into their new buckets. Luckily we already have
256 // the hash values available :).
257 IdentifierBucket *CurTable = (IdentifierBucket *)TheTable;
258 for (IdentifierBucket *IB = CurTable, *E = CurTable+HashTableSize;
259 IB != E; ++IB) {
260 if (IB->Info) {
261 // Fast case, bucket available.
262 unsigned FullHash = IB->FullHashValue;
263 unsigned NewBucket = FullHash & (NewSize-1);
264 if (NewTableArray[NewBucket].Info == 0) {
265 NewTableArray[FullHash & (NewSize-1)].Info = IB->Info;
266 NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
267 continue;
268 }
269
270 unsigned ProbeSize = 1;
271 do {
272 NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
273 } while (NewTableArray[NewBucket].Info);
274
275 // Finally found a slot. Fill it in.
Chris Lattnera8831162006-10-27 04:54:47 +0000276 NewTableArray[NewBucket].Info = IB->Info;
277 NewTableArray[NewBucket].FullHashValue = FullHash;
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000278 }
279 }
280
281 delete[] CurTable;
282
283 TheTable = NewTableArray;
284 HashTableSize = NewSize;
285}
286
287
Chris Lattner91cbf112006-07-03 04:28:52 +0000288/// VisitIdentifiers - This method walks through all of the identifiers,
289/// invoking IV->VisitIdentifier for each of them.
290void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000291 IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
292 for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
293 if (IdentifierInfo *Id = TableArray[i].Info)
294 IV.VisitIdentifier(*Id);
Chris Lattner91cbf112006-07-03 04:28:52 +0000295 }
296}
Chris Lattner22eb9722006-06-18 05:43:12 +0000297
Chris Lattner25e0d542006-10-18 06:07:05 +0000298//===----------------------------------------------------------------------===//
299// Language Keyword Implementation
300//===----------------------------------------------------------------------===//
301
302/// AddKeyword - This method is used to associate a token ID with specific
303/// identifiers because they are language keywords. This causes the lexer to
304/// automatically map matching identifiers to specialized token codes.
305///
306/// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
307/// specified langauge, set to 1 if it is an extension in the specified
308/// language, and set to 2 if disabled in the specified language.
309static void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
Chris Lattnera4271e42006-10-20 06:13:26 +0000310 int C90, int C99, int CXX,
Chris Lattner25e0d542006-10-18 06:07:05 +0000311 const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattnera4271e42006-10-20 06:13:26 +0000312 int Flags = LangOpts.CPlusPlus ? CXX : (LangOpts.C99 ? C99 : C90);
Chris Lattner25e0d542006-10-18 06:07:05 +0000313
314 // Don't add this keyword if disabled in this language or if an extension
315 // and extensions are disabled.
316 if (Flags + LangOpts.NoExtensions >= 2) return;
317
318 const char *Str = &Keyword[0];
319 IdentifierInfo &Info = Table.get(Str, Str+Keyword.size());
320 Info.setTokenID(TokenCode);
321 Info.setIsExtensionToken(Flags == 1);
322}
323
324/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
325/// "elif".
326static void AddPPKeyword(tok::PPKeywordKind PPID,
327 const char *Name, unsigned NameLen,
328 IdentifierTable &Table) {
329 Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
330}
331
332/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
333/// "property".
334static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
335 const char *Name, unsigned NameLen,
336 IdentifierTable &Table) {
337 Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
338}
339
340/// AddKeywords - Add all keywords to the symbol table.
341///
342void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
343 enum {
344 C90Shift = 0,
345 EXTC90 = 1 << C90Shift,
346 NOTC90 = 2 << C90Shift,
347 C99Shift = 2,
348 EXTC99 = 1 << C99Shift,
349 NOTC99 = 2 << C99Shift,
350 CPPShift = 4,
351 EXTCPP = 1 << CPPShift,
352 NOTCPP = 2 << CPPShift,
353 Mask = 3
354 };
355
356 // Add keywords and tokens for the current language.
357#define KEYWORD(NAME, FLAGS) \
358 AddKeyword(#NAME, tok::kw_ ## NAME, \
359 ((FLAGS) >> C90Shift) & Mask, \
360 ((FLAGS) >> C99Shift) & Mask, \
361 ((FLAGS) >> CPPShift) & Mask, LangOpts, *this);
362#define ALIAS(NAME, TOK) \
363 AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
364#define PPKEYWORD(NAME) \
365 AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
366#define OBJC1_AT_KEYWORD(NAME) \
367 if (LangOpts.ObjC1) \
368 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
369#define OBJC2_AT_KEYWORD(NAME) \
370 if (LangOpts.ObjC2) \
371 AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
372#include "clang/Basic/TokenKinds.def"
373}
374
375
376//===----------------------------------------------------------------------===//
377// Stats Implementation
378//===----------------------------------------------------------------------===//
379
Chris Lattner22eb9722006-06-18 05:43:12 +0000380/// PrintStats - Print statistics about how well the identifier table is doing
381/// at hashing identifiers.
382void IdentifierTable::PrintStats() const {
Chris Lattner22eb9722006-06-18 05:43:12 +0000383 unsigned NumEmptyBuckets = 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000384 unsigned AverageIdentifierSize = 0;
385 unsigned MaxIdentifierLength = 0;
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000386 unsigned NumProbed = 0;
Chris Lattner22eb9722006-06-18 05:43:12 +0000387
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000388 IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
389 for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
390 if (TableArray[i].Info == 0) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000391 ++NumEmptyBuckets;
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000392 continue;
393 }
394 IdentifierInfo *Id = TableArray[i].Info;
Chris Lattner56bdb9a2006-10-27 05:06:38 +0000395 unsigned IdLen = strlen(Id->getName());
396 AverageIdentifierSize += IdLen;
397 if (MaxIdentifierLength < IdLen)
398 MaxIdentifierLength = IdLen;
Chris Lattner22eb9722006-06-18 05:43:12 +0000399
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000400 // Count the number of times something was probed.
401 if ((TableArray[i].FullHashValue & (e-1)) != i)
402 ++NumProbed;
403
404 // TODO: Figure out maximum times an identifier had to probe for -stats.
Chris Lattner22eb9722006-06-18 05:43:12 +0000405 }
406
407 std::cerr << "\n*** Identifier Table Stats:\n";
408 std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
409 std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
Chris Lattner22eb9722006-06-18 05:43:12 +0000410 std::cerr << "Hash density (#identifiers per bucket): "
Chris Lattnerf2e3ac32006-10-27 03:59:10 +0000411 << NumIdentifiers/(double)HashTableSize << "\n";
412 std::cerr << "Num probed identifiers: " << NumProbed << " ("
413 << NumProbed*100.0/NumIdentifiers << "%)\n";
Chris Lattner22eb9722006-06-18 05:43:12 +0000414 std::cerr << "Ave identifier length: "
415 << (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
416 std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
417
418 // Compute statistics about the memory allocated for identifiers.
419#if USE_ALLOCATOR
420 unsigned BytesUsed = 0;
421 unsigned NumRegions = 0;
422 const MemRegion *R = (MemRegion*)TheMemory;
423 for (; R; R = R->getNext(), ++NumRegions) {
424 BytesUsed += R->getNumBytesAllocated();
425 }
426 std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
427 std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
428#endif
429}
430
431