blob: a12ef9833537772d4c3b7861433b740d4d9e20c6 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000010// This file implements the IdentifierInfo, IdentifierVisitor, and
Chris Lattner91cbf112006-07-03 04:28:52 +000011// IdentifierTable interfaces.
Chris Lattner22eb9722006-06-18 05:43:12 +000012//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/IdentifierTable.h"
16#include "clang/Lex/MacroInfo.h"
17#include <iostream>
18using namespace llvm;
19using namespace clang;
20
21//===----------------------------------------------------------------------===//
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000022// IdentifierInfo Implementation
Chris Lattner22eb9722006-06-18 05:43:12 +000023//===----------------------------------------------------------------------===//
24
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000025void IdentifierInfo::Destroy() {
Chris Lattner22eb9722006-06-18 05:43:12 +000026 delete Macro;
27}
28
Chris Lattner91cbf112006-07-03 04:28:52 +000029//===----------------------------------------------------------------------===//
30// IdentifierVisitor Implementation
31//===----------------------------------------------------------------------===//
32
33IdentifierVisitor::~IdentifierVisitor() {
34}
Chris Lattner22eb9722006-06-18 05:43:12 +000035
36//===----------------------------------------------------------------------===//
37// Memory Allocation Support
38//===----------------------------------------------------------------------===//
39
40/// The identifier table has a very simple memory allocation pattern: it just
41/// keeps allocating identifiers, then never frees them unless it frees them
42/// all. As such, we use a simple bump-pointer memory allocator to make
43/// allocation speedy. Shark showed that malloc was 27% of the time spent in
44/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
45#define USE_ALLOCATOR 1
46#if USE_ALLOCATOR
47
48namespace {
49class MemRegion {
50 unsigned RegionSize;
51 MemRegion *Next;
52 char *NextPtr;
53public:
54 void Init(unsigned size, MemRegion *next) {
55 RegionSize = size;
56 Next = next;
57 NextPtr = (char*)(this+1);
58
59 // FIXME: uses GCC extension.
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000060 unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +000061 NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
62 ~(intptr_t)(Alignment-1));
63 }
64
65 const MemRegion *getNext() const { return Next; }
66 unsigned getNumBytesAllocated() const {
67 return NextPtr-(const char*)this;
68 }
69
70 /// Allocate - Allocate and return at least the specified number of bytes.
71 ///
72 void *Allocate(unsigned AllocSize, MemRegion **RegPtr) {
73 // FIXME: uses GCC extension.
Chris Lattnerc79f6fb2006-07-04 17:53:21 +000074 unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner22eb9722006-06-18 05:43:12 +000075 // Round size up to an even multiple of the alignment.
76 AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
77
78 // If there is space in this region for the identifier, return it.
79 if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
80 void *Result = NextPtr;
81 NextPtr += AllocSize;
82 return Result;
83 }
84
85 // Otherwise, we have to allocate a new chunk. Create one twice as big as
86 // this one.
87 MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
88 NewRegion->Init(RegionSize*2, this);
89
90 // Update the current "first region" pointer to point to the new region.
91 *RegPtr = NewRegion;
92
93 // Try allocating from it now.
94 return NewRegion->Allocate(AllocSize, RegPtr);
95 }
96
97 /// Deallocate - Release all memory for this region to the system.
98 ///
99 void Deallocate() {
100 MemRegion *next = Next;
101 free(this);
102 if (next)
103 next->Deallocate();
104 }
105};
106}
107
108#endif
109
110//===----------------------------------------------------------------------===//
111// IdentifierTable Implementation
112//===----------------------------------------------------------------------===//
113
114
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000115/// IdentifierLink - There is one of these allocated by IdentifierInfo.
Chris Lattner22eb9722006-06-18 05:43:12 +0000116/// These form the linked list of buckets for the hash table.
117struct IdentifierBucket {
118 /// Next - This is the next bucket in the linked list.
119 IdentifierBucket *Next;
120
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000121 IdentifierInfo TokInfo;
Chris Lattner22eb9722006-06-18 05:43:12 +0000122 // NOTE: TokInfo must be the last element in this structure, as the string
123 // information for the identifier is allocated right after it.
124};
125
126// FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3.
127static unsigned HASH_TABLE_SIZE = 8096;
128
129IdentifierTable::IdentifierTable() {
130 IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE]();
131 TheTable = TableArray;
132 NumIdentifiers = 0;
133#if USE_ALLOCATOR
134 TheMemory = malloc(8*4096);
135 ((MemRegion*)TheMemory)->Init(8*4096, 0);
136#endif
137
138 memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*));
139}
140
141IdentifierTable::~IdentifierTable() {
142 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
143 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
144 IdentifierBucket *Id = TableArray[i];
145 while (Id) {
146 // Free memory referenced by the identifier (e.g. macro info).
147 Id->TokInfo.Destroy();
148
149 IdentifierBucket *Next = Id->Next;
150#if !USE_ALLOCATOR
151 free(Id);
152#endif
153 Id = Next;
154 }
155 }
156#if USE_ALLOCATOR
157 ((MemRegion*)TheMemory)->Deallocate();
158#endif
159 delete [] TableArray;
160}
161
162/// HashString - Compute a hash code for the specified string.
163///
164static unsigned HashString(const char *Start, const char *End) {
165 unsigned int Result = 0;
166 // Perl hash function.
167 while (Start != End)
168 Result = Result * 33 + *Start++;
169 Result = Result + (Result >> 5);
170 return Result;
171}
172
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000173IdentifierInfo &IdentifierTable::get(const char *NameStart,
Chris Lattner0e1cf1f2006-07-04 18:53:52 +0000174 const char *NameEnd) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000175 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
176
177 unsigned Hash = HashString(NameStart, NameEnd) % HASH_TABLE_SIZE;
178 unsigned Length = NameEnd-NameStart;
179
180 IdentifierBucket *IdentHead = TableArray[Hash];
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000181 for (IdentifierBucket *Identifier = IdentHead, *LastID = 0; Identifier;
182 LastID = Identifier, Identifier = Identifier->Next) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000183 if (Identifier->TokInfo.getNameLength() == Length &&
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000184 memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0) {
185 // If found identifier wasn't at start of bucket, move it there so
186 // that frequently searched for identifiers are found earlier, even if
187 // they first occur late in the source file.
188 if (LastID) {
189 LastID->Next = Identifier->Next;
190 Identifier->Next = IdentHead;
191 TableArray[Hash] = Identifier;
192 }
193
Chris Lattner22eb9722006-06-18 05:43:12 +0000194 return Identifier->TokInfo;
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000195 }
Chris Lattner22eb9722006-06-18 05:43:12 +0000196 }
197
198 // Allocate a new identifier, with space for the null-terminated string at the
199 // end.
200 unsigned AllocSize = sizeof(IdentifierBucket)+Length+1;
201#if USE_ALLOCATOR
202 IdentifierBucket *Identifier = (IdentifierBucket*)
203 ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory);
204#else
205 IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize);
206#endif
207 Identifier->TokInfo.NameLen = Length;
208 Identifier->TokInfo.Macro = 0;
209 Identifier->TokInfo.TokenID = tok::identifier;
Chris Lattner87d3bec2006-10-17 03:44:32 +0000210 Identifier->TokInfo.PPID = tok::pp_not_keyword;
Chris Lattner22eb9722006-06-18 05:43:12 +0000211 Identifier->TokInfo.IsExtension = false;
Chris Lattner17862172006-06-24 22:12:56 +0000212 Identifier->TokInfo.IsPoisoned = false;
Chris Lattner063400e2006-10-14 19:54:15 +0000213 Identifier->TokInfo.IsOtherTargetMacro = false;
Chris Lattner22eb9722006-06-18 05:43:12 +0000214 Identifier->TokInfo.FETokenInfo = 0;
215
216 // Copy the string information.
217 char *StrBuffer = (char*)(Identifier+1);
218 memcpy(StrBuffer, NameStart, Length);
219 StrBuffer[Length] = 0; // Null terminate string.
220
Chris Lattnerd0a96ba2006-07-10 06:10:51 +0000221 // Link it into the hash table. Adding it to the start of the hash table is
222 // useful for buckets with lots of entries. This means that more recently
223 // referenced identifiers will be near the head of the bucket.
Chris Lattner22eb9722006-06-18 05:43:12 +0000224 Identifier->Next = IdentHead;
225 TableArray[Hash] = Identifier;
226 return Identifier->TokInfo;
227}
228
Chris Lattnerc79f6fb2006-07-04 17:53:21 +0000229IdentifierInfo &IdentifierTable::get(const std::string &Name) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000230 // Don't use c_str() here: no need to be null terminated.
231 const char *NameBytes = &Name[0];
232 unsigned Size = Name.size();
233 return get(NameBytes, NameBytes+Size);
234}
235
Chris Lattner91cbf112006-07-03 04:28:52 +0000236/// VisitIdentifiers - This method walks through all of the identifiers,
237/// invoking IV->VisitIdentifier for each of them.
238void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
239 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
240 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
241 for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next)
242 IV.VisitIdentifier(Id->TokInfo);
243 }
244}
Chris Lattner22eb9722006-06-18 05:43:12 +0000245
246/// PrintStats - Print statistics about how well the identifier table is doing
247/// at hashing identifiers.
248void IdentifierTable::PrintStats() const {
249 unsigned NumIdentifiers = 0;
250 unsigned NumEmptyBuckets = 0;
251 unsigned MaxBucketLength = 0;
252 unsigned AverageIdentifierSize = 0;
253 unsigned MaxIdentifierLength = 0;
254
255 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
256 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
257
258 unsigned NumIdentifiersInBucket = 0;
259 for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
260 AverageIdentifierSize += Id->TokInfo.getNameLength();
261 if (MaxIdentifierLength < Id->TokInfo.getNameLength())
262 MaxIdentifierLength = Id->TokInfo.getNameLength();
263 ++NumIdentifiersInBucket;
264 }
265 if (NumIdentifiersInBucket > MaxBucketLength)
266 MaxBucketLength = NumIdentifiersInBucket;
267 if (NumIdentifiersInBucket == 0)
268 ++NumEmptyBuckets;
269
270 NumIdentifiers += NumIdentifiersInBucket;
271 }
272
273 std::cerr << "\n*** Identifier Table Stats:\n";
274 std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
275 std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
276 std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n";
277 std::cerr << "Hash density (#identifiers per bucket): "
278 << NumIdentifiers/(double)HASH_TABLE_SIZE << "\n";
279 std::cerr << "Nonempty hash density (average chain length): "
280 << NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n";
281 std::cerr << "Ave identifier length: "
282 << (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
283 std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
284
285 // Compute statistics about the memory allocated for identifiers.
286#if USE_ALLOCATOR
287 unsigned BytesUsed = 0;
288 unsigned NumRegions = 0;
289 const MemRegion *R = (MemRegion*)TheMemory;
290 for (; R; R = R->getNext(), ++NumRegions) {
291 BytesUsed += R->getNumBytesAllocated();
292 }
293 std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
294 std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
295#endif
296}
297
298