| //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file was developed by Chris Lattner and is distributed under |
| // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements the IdentifierInfo, IdentifierVisitor, and |
| // IdentifierTable interfaces. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang/Lex/IdentifierTable.h" |
| #include "clang/Lex/MacroInfo.h" |
| #include <iostream> |
| using namespace llvm; |
| using namespace clang; |
| |
| //===----------------------------------------------------------------------===// |
| // IdentifierInfo Implementation |
| //===----------------------------------------------------------------------===// |
| |
| void IdentifierInfo::Destroy() { |
| delete Macro; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // IdentifierVisitor Implementation |
| //===----------------------------------------------------------------------===// |
| |
| IdentifierVisitor::~IdentifierVisitor() { |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Memory Allocation Support |
| //===----------------------------------------------------------------------===// |
| |
| /// The identifier table has a very simple memory allocation pattern: it just |
| /// keeps allocating identifiers, then never frees them unless it frees them |
| /// all. As such, we use a simple bump-pointer memory allocator to make |
| /// allocation speedy. Shark showed that malloc was 27% of the time spent in |
| /// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this. |
| #define USE_ALLOCATOR 1 |
| #if USE_ALLOCATOR |
| |
| namespace { |
| class MemRegion { |
| unsigned RegionSize; |
| MemRegion *Next; |
| char *NextPtr; |
| public: |
| void Init(unsigned size, MemRegion *next) { |
| RegionSize = size; |
| Next = next; |
| NextPtr = (char*)(this+1); |
| |
| // FIXME: uses GCC extension. |
| unsigned Alignment = __alignof__(IdentifierInfo); |
| NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) & |
| ~(intptr_t)(Alignment-1)); |
| } |
| |
| const MemRegion *getNext() const { return Next; } |
| unsigned getNumBytesAllocated() const { |
| return NextPtr-(const char*)this; |
| } |
| |
| /// Allocate - Allocate and return at least the specified number of bytes. |
| /// |
| void *Allocate(unsigned AllocSize, MemRegion **RegPtr) { |
| // FIXME: uses GCC extension. |
| unsigned Alignment = __alignof__(IdentifierInfo); |
| // Round size up to an even multiple of the alignment. |
| AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1); |
| |
| // If there is space in this region for the identifier, return it. |
| if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) { |
| void *Result = NextPtr; |
| NextPtr += AllocSize; |
| return Result; |
| } |
| |
| // Otherwise, we have to allocate a new chunk. Create one twice as big as |
| // this one. |
| MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2); |
| NewRegion->Init(RegionSize*2, this); |
| |
| // Update the current "first region" pointer to point to the new region. |
| *RegPtr = NewRegion; |
| |
| // Try allocating from it now. |
| return NewRegion->Allocate(AllocSize, RegPtr); |
| } |
| |
| /// Deallocate - Release all memory for this region to the system. |
| /// |
| void Deallocate() { |
| MemRegion *next = Next; |
| free(this); |
| if (next) |
| next->Deallocate(); |
| } |
| }; |
| } |
| |
| #endif |
| |
| //===----------------------------------------------------------------------===// |
| // IdentifierTable Implementation |
| //===----------------------------------------------------------------------===// |
| |
| |
| /// IdentifierLink - There is one of these allocated by IdentifierInfo. |
| /// These form the linked list of buckets for the hash table. |
| struct IdentifierBucket { |
| /// Next - This is the next bucket in the linked list. |
| IdentifierBucket *Next; |
| |
| IdentifierInfo TokInfo; |
| // NOTE: TokInfo must be the last element in this structure, as the string |
| // information for the identifier is allocated right after it. |
| }; |
| |
| // FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3. |
| static unsigned HASH_TABLE_SIZE = 8096; |
| |
| IdentifierTable::IdentifierTable() { |
| IdentifierBucket **TableArray = new IdentifierBucket*[HASH_TABLE_SIZE](); |
| TheTable = TableArray; |
| NumIdentifiers = 0; |
| #if USE_ALLOCATOR |
| TheMemory = malloc(8*4096); |
| ((MemRegion*)TheMemory)->Init(8*4096, 0); |
| #endif |
| |
| memset(TheTable, 0, HASH_TABLE_SIZE*sizeof(IdentifierBucket*)); |
| } |
| |
| IdentifierTable::~IdentifierTable() { |
| IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; |
| for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) { |
| IdentifierBucket *Id = TableArray[i]; |
| while (Id) { |
| // Free memory referenced by the identifier (e.g. macro info). |
| Id->TokInfo.Destroy(); |
| |
| IdentifierBucket *Next = Id->Next; |
| #if !USE_ALLOCATOR |
| free(Id); |
| #endif |
| Id = Next; |
| } |
| } |
| #if USE_ALLOCATOR |
| ((MemRegion*)TheMemory)->Deallocate(); |
| #endif |
| delete [] TableArray; |
| } |
| |
| /// HashString - Compute a hash code for the specified string. |
| /// |
| static unsigned HashString(const char *Start, const char *End) { |
| unsigned int Result = 0; |
| // Perl hash function. |
| while (Start != End) |
| Result = Result * 33 + *Start++; |
| Result = Result + (Result >> 5); |
| return Result; |
| } |
| |
| IdentifierInfo &IdentifierTable::get(const char *NameStart, |
| const char *NameEnd) { |
| IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; |
| |
| unsigned Hash = HashString(NameStart, NameEnd) % HASH_TABLE_SIZE; |
| unsigned Length = NameEnd-NameStart; |
| |
| IdentifierBucket *IdentHead = TableArray[Hash]; |
| for (IdentifierBucket *Identifier = IdentHead; Identifier; |
| Identifier = Identifier->Next) { |
| if (Identifier->TokInfo.getNameLength() == Length && |
| memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0) |
| return Identifier->TokInfo; |
| } |
| |
| // Allocate a new identifier, with space for the null-terminated string at the |
| // end. |
| unsigned AllocSize = sizeof(IdentifierBucket)+Length+1; |
| #if USE_ALLOCATOR |
| IdentifierBucket *Identifier = (IdentifierBucket*) |
| ((MemRegion*)TheMemory)->Allocate(AllocSize, (MemRegion**)&TheMemory); |
| #else |
| IdentifierBucket *Identifier = (IdentifierBucket*)malloc(AllocSize); |
| #endif |
| Identifier->TokInfo.NameLen = Length; |
| Identifier->TokInfo.Macro = 0; |
| Identifier->TokInfo.TokenID = tok::identifier; |
| Identifier->TokInfo.IsExtension = false; |
| Identifier->TokInfo.IsPoisoned = false; |
| Identifier->TokInfo.FETokenInfo = 0; |
| |
| // Copy the string information. |
| char *StrBuffer = (char*)(Identifier+1); |
| memcpy(StrBuffer, NameStart, Length); |
| StrBuffer[Length] = 0; // Null terminate string. |
| |
| // Link it into the hash table. |
| Identifier->Next = IdentHead; |
| TableArray[Hash] = Identifier; |
| return Identifier->TokInfo; |
| } |
| |
| IdentifierInfo &IdentifierTable::get(const std::string &Name) { |
| // Don't use c_str() here: no need to be null terminated. |
| const char *NameBytes = &Name[0]; |
| unsigned Size = Name.size(); |
| return get(NameBytes, NameBytes+Size); |
| } |
| |
| /// VisitIdentifiers - This method walks through all of the identifiers, |
| /// invoking IV->VisitIdentifier for each of them. |
| void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) { |
| IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; |
| for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) { |
| for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) |
| IV.VisitIdentifier(Id->TokInfo); |
| } |
| } |
| |
| /// PrintStats - Print statistics about how well the identifier table is doing |
| /// at hashing identifiers. |
| void IdentifierTable::PrintStats() const { |
| unsigned NumIdentifiers = 0; |
| unsigned NumEmptyBuckets = 0; |
| unsigned MaxBucketLength = 0; |
| unsigned AverageIdentifierSize = 0; |
| unsigned MaxIdentifierLength = 0; |
| |
| IdentifierBucket **TableArray = (IdentifierBucket**)TheTable; |
| for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) { |
| |
| unsigned NumIdentifiersInBucket = 0; |
| for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) { |
| AverageIdentifierSize += Id->TokInfo.getNameLength(); |
| if (MaxIdentifierLength < Id->TokInfo.getNameLength()) |
| MaxIdentifierLength = Id->TokInfo.getNameLength(); |
| ++NumIdentifiersInBucket; |
| } |
| if (NumIdentifiersInBucket > MaxBucketLength) |
| MaxBucketLength = NumIdentifiersInBucket; |
| if (NumIdentifiersInBucket == 0) |
| ++NumEmptyBuckets; |
| |
| NumIdentifiers += NumIdentifiersInBucket; |
| } |
| |
| std::cerr << "\n*** Identifier Table Stats:\n"; |
| std::cerr << "# Identifiers: " << NumIdentifiers << "\n"; |
| std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n"; |
| std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n"; |
| std::cerr << "Hash density (#identifiers per bucket): " |
| << NumIdentifiers/(double)HASH_TABLE_SIZE << "\n"; |
| std::cerr << "Nonempty hash density (average chain length): " |
| << NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n"; |
| std::cerr << "Ave identifier length: " |
| << (AverageIdentifierSize/(double)NumIdentifiers) << "\n"; |
| std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n"; |
| |
| // Compute statistics about the memory allocated for identifiers. |
| #if USE_ALLOCATOR |
| unsigned BytesUsed = 0; |
| unsigned NumRegions = 0; |
| const MemRegion *R = (MemRegion*)TheMemory; |
| for (; R; R = R->getNext(), ++NumRegions) { |
| BytesUsed += R->getNumBytesAllocated(); |
| } |
| std::cerr << "\nNumber of memory regions: " << NumRegions << "\n"; |
| std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n"; |
| #endif |
| } |
| |
| |