blob: e81ad57e87462b58e35a5da098cf6fd04cce6c86 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierTokenInfo and IdentifierTable interfaces.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Lex/IdentifierTable.h"
15#include "clang/Lex/MacroInfo.h"
16#include <iostream>
17using namespace llvm;
18using namespace clang;
19
20//===----------------------------------------------------------------------===//
21// IdentifierTokenInfo Implementation
22//===----------------------------------------------------------------------===//
23
24void IdentifierTokenInfo::Destroy() {
25 delete Macro;
26}
27
28
29//===----------------------------------------------------------------------===//
30// Memory Allocation Support
31//===----------------------------------------------------------------------===//
32
33/// The identifier table has a very simple memory allocation pattern: it just
34/// keeps allocating identifiers, then never frees them unless it frees them
35/// all. As such, we use a simple bump-pointer memory allocator to make
36/// allocation speedy. Shark showed that malloc was 27% of the time spent in
37/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
38#define USE_ALLOCATOR 1
39#if USE_ALLOCATOR
40
41namespace {
42class MemRegion {
43 unsigned RegionSize;
44 MemRegion *Next;
45 char *NextPtr;
46public:
47 void Init(unsigned size, MemRegion *next) {
48 RegionSize = size;
49 Next = next;
50 NextPtr = (char*)(this+1);
51
52 // FIXME: uses GCC extension.
53 unsigned Alignment = __alignof__(IdentifierTokenInfo);
54 NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
55 ~(intptr_t)(Alignment-1));
56 }
57
58 const MemRegion *getNext() const { return Next; }
59 unsigned getNumBytesAllocated() const {
60 return NextPtr-(const char*)this;
61 }
62
63 /// Allocate - Allocate and return at least the specified number of bytes.
64 ///
65 void *Allocate(unsigned AllocSize, MemRegion **RegPtr) {
66 // FIXME: uses GCC extension.
67 unsigned Alignment = __alignof__(IdentifierTokenInfo);
68 // Round size up to an even multiple of the alignment.
69 AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
70
71 // If there is space in this region for the identifier, return it.
72 if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
73 void *Result = NextPtr;
74 NextPtr += AllocSize;
75 return Result;
76 }
77
78 // Otherwise, we have to allocate a new chunk. Create one twice as big as
79 // this one.
80 MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2);
81 NewRegion->Init(RegionSize*2, this);
82
83 // Update the current "first region" pointer to point to the new region.
84 *RegPtr = NewRegion;
85
86 // Try allocating from it now.
87 return NewRegion->Allocate(AllocSize, RegPtr);
88 }
89
90 /// Deallocate - Release all memory for this region to the system.
91 ///
92 void Deallocate() {
93 MemRegion *next = Next;
94 free(this);
95 if (next)
96 next->Deallocate();
97 }
98};
99}
100
101#endif
102
103//===----------------------------------------------------------------------===//
104// IdentifierTable Implementation
105//===----------------------------------------------------------------------===//
106
107
108/// IdentifierLink - There is one of these allocated by IdentifierTokenInfo.
109/// These form the linked list of buckets for the hash table.
110struct IdentifierBucket {
111 /// Next - This is the next bucket in the linked list.
112 IdentifierBucket *Next;
113
114 IdentifierTokenInfo TokInfo;
115 // NOTE: TokInfo must be the last element in this structure, as the string
116 // information for the identifier is allocated right after it.
117};
118
119// FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3.
120static unsigned HASH_TABLE_SIZE = 8096;
121
122IdentifierTable::IdentifierTable() {
123 IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE]();
124 TheTable = TableArray;
125 NumIdentifiers = 0;
126#if USE_ALLOCATOR
127 TheMemory = malloc(8*4096);
128 ((MemRegion*)TheMemory)->Init(8*4096, 0);
129#endif
130
131 memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*));
132}
133
134IdentifierTable::~IdentifierTable() {
135 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
136 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
137 IdentifierBucket *Id = TableArray[i];
138 while (Id) {
139 // Free memory referenced by the identifier (e.g. macro info).
140 Id->TokInfo.Destroy();
141
142 IdentifierBucket *Next = Id->Next;
143#if !USE_ALLOCATOR
144 free(Id);
145#endif
146 Id = Next;
147 }
148 }
149#if USE_ALLOCATOR
150 ((MemRegion*)TheMemory)->Deallocate();
151#endif
152 delete [] TableArray;
153}
154
155/// HashString - Compute a hash code for the specified string.
156///
157static unsigned HashString(const char *Start, const char *End) {
158 unsigned int Result = 0;
159 // Perl hash function.
160 while (Start != End)
161 Result = Result * 33 + *Start++;
162 Result = Result + (Result >> 5);
163 return Result;
164}
165
166IdentifierTokenInfo &IdentifierTable::get(const char *NameStart,
167 const char *NameEnd) {
168 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
169
170 unsigned Hash = HashString(NameStart, NameEnd) % HASH_TABLE_SIZE;
171 unsigned Length = NameEnd-NameStart;
172
173 IdentifierBucket *IdentHead = TableArray[Hash];
174 for (IdentifierBucket *Identifier = IdentHead; Identifier;
175 Identifier = Identifier->Next) {
176 if (Identifier->TokInfo.getNameLength() == Length &&
177 memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0)
178 return Identifier->TokInfo;
179 }
180
181 // Allocate a new identifier, with space for the null-terminated string at the
182 // end.
183 unsigned AllocSize = sizeof(IdentifierBucket)+Length+1;
184#if USE_ALLOCATOR
185 IdentifierBucket *Identifier = (IdentifierBucket*)
186 ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory);
187#else
188 IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize);
189#endif
190 Identifier->TokInfo.NameLen = Length;
191 Identifier->TokInfo.Macro = 0;
192 Identifier->TokInfo.TokenID = tok::identifier;
193 Identifier->TokInfo.IsExtension = false;
194 Identifier->TokInfo.FETokenInfo = 0;
195
196 // Copy the string information.
197 char *StrBuffer = (char*)(Identifier+1);
198 memcpy(StrBuffer, NameStart, Length);
199 StrBuffer[Length] = 0; // Null terminate string.
200
201 // Link it into the hash table.
202 Identifier->Next = IdentHead;
203 TableArray[Hash] = Identifier;
204 return Identifier->TokInfo;
205}
206
207IdentifierTokenInfo &IdentifierTable::get(const std::string &Name) {
208 // Don't use c_str() here: no need to be null terminated.
209 const char *NameBytes = &Name[0];
210 unsigned Size = Name.size();
211 return get(NameBytes, NameBytes+Size);
212}
213
214
215
216/// PrintStats - Print statistics about how well the identifier table is doing
217/// at hashing identifiers.
218void IdentifierTable::PrintStats() const {
219 unsigned NumIdentifiers = 0;
220 unsigned NumEmptyBuckets = 0;
221 unsigned MaxBucketLength = 0;
222 unsigned AverageIdentifierSize = 0;
223 unsigned MaxIdentifierLength = 0;
224
225 IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
226 for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
227
228 unsigned NumIdentifiersInBucket = 0;
229 for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
230 AverageIdentifierSize += Id->TokInfo.getNameLength();
231 if (MaxIdentifierLength < Id->TokInfo.getNameLength())
232 MaxIdentifierLength = Id->TokInfo.getNameLength();
233 ++NumIdentifiersInBucket;
234 }
235 if (NumIdentifiersInBucket > MaxBucketLength)
236 MaxBucketLength = NumIdentifiersInBucket;
237 if (NumIdentifiersInBucket == 0)
238 ++NumEmptyBuckets;
239
240 NumIdentifiers += NumIdentifiersInBucket;
241 }
242
243 std::cerr << "\n*** Identifier Table Stats:\n";
244 std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
245 std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
246 std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n";
247 std::cerr << "Hash density (#identifiers per bucket): "
248 << NumIdentifiers/(double)HASH_TABLE_SIZE << "\n";
249 std::cerr << "Nonempty hash density (average chain length): "
250 << NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n";
251 std::cerr << "Ave identifier length: "
252 << (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
253 std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
254
255 // Compute statistics about the memory allocated for identifiers.
256#if USE_ALLOCATOR
257 unsigned BytesUsed = 0;
258 unsigned NumRegions = 0;
259 const MemRegion *R = (MemRegion*)TheMemory;
260 for (; R; R = R->getNext(), ++NumRegions) {
261 BytesUsed += R->getNumBytesAllocated();
262 }
263 std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
264 std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
265#endif
266}
267
268