Blame - clang/Lex/IdentifierTable.cpp - toolchain/llvm-project

blob: 049521768917e7247541ed276ebfea3bfad72580 [file] [log] [blame]

Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1	//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	10	// This file implements the IdentifierInfo, IdentifierVisitor, and
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	11	// IdentifierTable interfaces.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang/Lex/IdentifierTable.h"
				16	#include "clang/Lex/MacroInfo.h"
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame^]	17	#include "clang/Basic/LangOptions.h"
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	18	#include <iostream>
				19	using namespace llvm;
				20	using namespace clang;
				21
				22	//===----------------------------------------------------------------------===//
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	23	// IdentifierInfo Implementation
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	24	//===----------------------------------------------------------------------===//
				25
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	26	void IdentifierInfo::Destroy() {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	27	delete Macro;
				28	}
				29
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	30	//===----------------------------------------------------------------------===//
				31	// IdentifierVisitor Implementation
				32	//===----------------------------------------------------------------------===//
				33
				34	IdentifierVisitor::~IdentifierVisitor() {
				35	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	36
				37	//===----------------------------------------------------------------------===//
				38	// Memory Allocation Support
				39	//===----------------------------------------------------------------------===//
				40
				41	/// The identifier table has a very simple memory allocation pattern: it just
				42	/// keeps allocating identifiers, then never frees them unless it frees them
				43	/// all. As such, we use a simple bump-pointer memory allocator to make
				44	/// allocation speedy. Shark showed that malloc was 27% of the time spent in
				45	/// IdentifierTable::getIdentifier with malloc, and takes a 4.3% time with this.
				46	#define USE_ALLOCATOR 1
				47	#if USE_ALLOCATOR
				48
				49	namespace {
				50	class MemRegion {
				51	unsigned RegionSize;
				52	MemRegion *Next;
				53	char *NextPtr;
				54	public:
				55	void Init(unsigned size, MemRegion *next) {
				56	RegionSize = size;
				57	Next = next;
				58	NextPtr = (char*)(this+1);
				59
				60	// FIXME: uses GCC extension.
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	61	unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	62	NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) &
				63	~(intptr_t)(Alignment-1));
				64	}
				65
				66	const MemRegion *getNext() const { return Next; }
				67	unsigned getNumBytesAllocated() const {
				68	return NextPtr-(const char*)this;
				69	}
				70
				71	/// Allocate - Allocate and return at least the specified number of bytes.
				72	///
				73	void Allocate(unsigned AllocSize, MemRegion *RegPtr) {
				74	// FIXME: uses GCC extension.
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	75	unsigned Alignment = __alignof__(IdentifierInfo);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	76	// Round size up to an even multiple of the alignment.
				77	AllocSize = (AllocSize+Alignment-1) & ~(Alignment-1);
				78
				79	// If there is space in this region for the identifier, return it.
				80	if (unsigned(NextPtr+AllocSize-(char*)this) <= RegionSize) {
				81	void *Result = NextPtr;
				82	NextPtr += AllocSize;
				83	return Result;
				84	}
				85
				86	// Otherwise, we have to allocate a new chunk. Create one twice as big as
				87	// this one.
				88	MemRegion NewRegion = (MemRegion )malloc(RegionSize*2);
				89	NewRegion->Init(RegionSize*2, this);
				90
				91	// Update the current "first region" pointer to point to the new region.
				92	*RegPtr = NewRegion;
				93
				94	// Try allocating from it now.
				95	return NewRegion->Allocate(AllocSize, RegPtr);
				96	}
				97
				98	/// Deallocate - Release all memory for this region to the system.
				99	///
				100	void Deallocate() {
				101	MemRegion *next = Next;
				102	free(this);
				103	if (next)
				104	next->Deallocate();
				105	}
				106	};
				107	}
				108
				109	#endif
				110
				111	//===----------------------------------------------------------------------===//
				112	// IdentifierTable Implementation
				113	//===----------------------------------------------------------------------===//
				114
				115
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	116	/// IdentifierLink - There is one of these allocated by IdentifierInfo.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	117	/// These form the linked list of buckets for the hash table.
				118	struct IdentifierBucket {
				119	/// Next - This is the next bucket in the linked list.
				120	IdentifierBucket *Next;
				121
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	122	IdentifierInfo TokInfo;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	123	// NOTE: TokInfo must be the last element in this structure, as the string
				124	// information for the identifier is allocated right after it.
				125	};
				126
				127	// FIXME: start hashtablesize off at 8K entries, GROW when density gets to 3.
				128	static unsigned HASH_TABLE_SIZE = 8096;
				129
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame^]	130	IdentifierTable::IdentifierTable(const LangOptions &LangOpts) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	131	IdentifierBucket *TableArray = new IdentifierBucket[HASH_TABLE_SIZE]();
				132	TheTable = TableArray;
				133	NumIdentifiers = 0;
				134	#if USE_ALLOCATOR
				135	TheMemory = malloc(8*4096);
				136	((MemRegion)TheMemory)->Init(84096, 0);
				137	#endif
				138
				139	memset(TheTable, 0, HASH_TABLE_SIZEsizeof(IdentifierBucket));
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame^]	140
				141	AddKeywords(LangOpts);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	142	}
				143
				144	IdentifierTable::~IdentifierTable() {
				145	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				146	for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
				147	IdentifierBucket *Id = TableArray[i];
				148	while (Id) {
				149	// Free memory referenced by the identifier (e.g. macro info).
				150	Id->TokInfo.Destroy();
				151
				152	IdentifierBucket *Next = Id->Next;
				153	#if !USE_ALLOCATOR
				154	free(Id);
				155	#endif
				156	Id = Next;
				157	}
				158	}
				159	#if USE_ALLOCATOR
				160	((MemRegion*)TheMemory)->Deallocate();
				161	#endif
				162	delete [] TableArray;
				163	}
				164
				165	/// HashString - Compute a hash code for the specified string.
				166	///
				167	static unsigned HashString(const char Start, const char End) {
				168	unsigned int Result = 0;
				169	// Perl hash function.
				170	while (Start != End)
				171	Result = Result * 33 + *Start++;
				172	Result = Result + (Result >> 5);
				173	return Result;
				174	}
				175
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	176	IdentifierInfo &IdentifierTable::get(const char *NameStart,
Chris Lattner	0e1cf1f	2006-07-04 18:53:52 +0000	[diff] [blame]	177	const char *NameEnd) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	178	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				179
				180	unsigned Hash = HashString(NameStart, NameEnd) % HASH_TABLE_SIZE;
				181	unsigned Length = NameEnd-NameStart;
				182
				183	IdentifierBucket *IdentHead = TableArray[Hash];
Chris Lattner	d0a96ba	2006-07-10 06:10:51 +0000	[diff] [blame]	184	for (IdentifierBucket Identifier = IdentHead, LastID = 0; Identifier;
				185	LastID = Identifier, Identifier = Identifier->Next) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	186	if (Identifier->TokInfo.getNameLength() == Length &&
Chris Lattner	d0a96ba	2006-07-10 06:10:51 +0000	[diff] [blame]	187	memcmp(Identifier->TokInfo.getName(), NameStart, Length) == 0) {
				188	// If found identifier wasn't at start of bucket, move it there so
				189	// that frequently searched for identifiers are found earlier, even if
				190	// they first occur late in the source file.
				191	if (LastID) {
				192	LastID->Next = Identifier->Next;
				193	Identifier->Next = IdentHead;
				194	TableArray[Hash] = Identifier;
				195	}
				196
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	197	return Identifier->TokInfo;
Chris Lattner	d0a96ba	2006-07-10 06:10:51 +0000	[diff] [blame]	198	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	199	}
				200
				201	// Allocate a new identifier, with space for the null-terminated string at the
				202	// end.
				203	unsigned AllocSize = sizeof(IdentifierBucket)+Length+1;
				204	#if USE_ALLOCATOR
				205	IdentifierBucket Identifier = (IdentifierBucket)
				206	((MemRegion)TheMemory)->Allocate(AllocSize, (MemRegion*)&TheMemory);
				207	#else
				208	IdentifierBucket Identifier = (IdentifierBucket)malloc(AllocSize);
				209	#endif
				210	Identifier->TokInfo.NameLen = Length;
				211	Identifier->TokInfo.Macro = 0;
				212	Identifier->TokInfo.TokenID = tok::identifier;
Chris Lattner	87d3bec	2006-10-17 03:44:32 +0000	[diff] [blame]	213	Identifier->TokInfo.PPID = tok::pp_not_keyword;
Chris Lattner	720f270	2006-10-17 04:03:44 +0000	[diff] [blame]	214	Identifier->TokInfo.ObjCID = tok::objc_not_keyword;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	215	Identifier->TokInfo.IsExtension = false;
Chris Lattner	1786217	2006-06-24 22:12:56 +0000	[diff] [blame]	216	Identifier->TokInfo.IsPoisoned = false;
Chris Lattner	063400e	2006-10-14 19:54:15 +0000	[diff] [blame]	217	Identifier->TokInfo.IsOtherTargetMacro = false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	218	Identifier->TokInfo.FETokenInfo = 0;
				219
				220	// Copy the string information.
				221	char StrBuffer = (char)(Identifier+1);
				222	memcpy(StrBuffer, NameStart, Length);
				223	StrBuffer[Length] = 0; // Null terminate string.
				224
Chris Lattner	d0a96ba	2006-07-10 06:10:51 +0000	[diff] [blame]	225	// Link it into the hash table. Adding it to the start of the hash table is
				226	// useful for buckets with lots of entries. This means that more recently
				227	// referenced identifiers will be near the head of the bucket.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	228	Identifier->Next = IdentHead;
				229	TableArray[Hash] = Identifier;
				230	return Identifier->TokInfo;
				231	}
				232
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	233	IdentifierInfo &IdentifierTable::get(const std::string &Name) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	234	// Don't use c_str() here: no need to be null terminated.
				235	const char *NameBytes = &Name[0];
				236	unsigned Size = Name.size();
				237	return get(NameBytes, NameBytes+Size);
				238	}
				239
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	240	/// VisitIdentifiers - This method walks through all of the identifiers,
				241	/// invoking IV->VisitIdentifier for each of them.
				242	void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
				243	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				244	for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
				245	for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next)
				246	IV.VisitIdentifier(Id->TokInfo);
				247	}
				248	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	249
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame^]	250	//===----------------------------------------------------------------------===//
				251	// Language Keyword Implementation
				252	//===----------------------------------------------------------------------===//
				253
				254	/// AddKeyword - This method is used to associate a token ID with specific
				255	/// identifiers because they are language keywords. This causes the lexer to
				256	/// automatically map matching identifiers to specialized token codes.
				257	///
				258	/// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
				259	/// specified langauge, set to 1 if it is an extension in the specified
				260	/// language, and set to 2 if disabled in the specified language.
				261	static void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
				262	int C90, int C99, int CPP,
				263	const LangOptions &LangOpts, IdentifierTable &Table) {
				264	int Flags = LangOpts.CPlusPlus ? CPP : (LangOpts.C99 ? C99 : C90);
				265
				266	// Don't add this keyword if disabled in this language or if an extension
				267	// and extensions are disabled.
				268	if (Flags + LangOpts.NoExtensions >= 2) return;
				269
				270	const char *Str = &Keyword[0];
				271	IdentifierInfo &Info = Table.get(Str, Str+Keyword.size());
				272	Info.setTokenID(TokenCode);
				273	Info.setIsExtensionToken(Flags == 1);
				274	}
				275
				276	/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
				277	/// "elif".
				278	static void AddPPKeyword(tok::PPKeywordKind PPID,
				279	const char *Name, unsigned NameLen,
				280	IdentifierTable &Table) {
				281	Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
				282	}
				283
				284	/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
				285	/// "property".
				286	static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
				287	const char *Name, unsigned NameLen,
				288	IdentifierTable &Table) {
				289	Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
				290	}
				291
				292	/// AddKeywords - Add all keywords to the symbol table.
				293	///
				294	void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
				295	enum {
				296	C90Shift = 0,
				297	EXTC90 = 1 << C90Shift,
				298	NOTC90 = 2 << C90Shift,
				299	C99Shift = 2,
				300	EXTC99 = 1 << C99Shift,
				301	NOTC99 = 2 << C99Shift,
				302	CPPShift = 4,
				303	EXTCPP = 1 << CPPShift,
				304	NOTCPP = 2 << CPPShift,
				305	Mask = 3
				306	};
				307
				308	// Add keywords and tokens for the current language.
				309	#define KEYWORD(NAME, FLAGS) \
				310	AddKeyword(#NAME, tok::kw_ ## NAME, \
				311	((FLAGS) >> C90Shift) & Mask, \
				312	((FLAGS) >> C99Shift) & Mask, \
				313	((FLAGS) >> CPPShift) & Mask, LangOpts, *this);
				314	#define ALIAS(NAME, TOK) \
				315	AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
				316	#define PPKEYWORD(NAME) \
				317	AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
				318	#define OBJC1_AT_KEYWORD(NAME) \
				319	if (LangOpts.ObjC1) \
				320	AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
				321	#define OBJC2_AT_KEYWORD(NAME) \
				322	if (LangOpts.ObjC2) \
				323	AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
				324	#include "clang/Basic/TokenKinds.def"
				325	}
				326
				327
				328	//===----------------------------------------------------------------------===//
				329	// Stats Implementation
				330	//===----------------------------------------------------------------------===//
				331
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	332	/// PrintStats - Print statistics about how well the identifier table is doing
				333	/// at hashing identifiers.
				334	void IdentifierTable::PrintStats() const {
				335	unsigned NumIdentifiers = 0;
				336	unsigned NumEmptyBuckets = 0;
				337	unsigned MaxBucketLength = 0;
				338	unsigned AverageIdentifierSize = 0;
				339	unsigned MaxIdentifierLength = 0;
				340
				341	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				342	for (unsigned i = 0, e = HASH_TABLE_SIZE; i != e; ++i) {
				343
				344	unsigned NumIdentifiersInBucket = 0;
				345	for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
				346	AverageIdentifierSize += Id->TokInfo.getNameLength();
				347	if (MaxIdentifierLength < Id->TokInfo.getNameLength())
				348	MaxIdentifierLength = Id->TokInfo.getNameLength();
				349	++NumIdentifiersInBucket;
				350	}
				351	if (NumIdentifiersInBucket > MaxBucketLength)
				352	MaxBucketLength = NumIdentifiersInBucket;
				353	if (NumIdentifiersInBucket == 0)
				354	++NumEmptyBuckets;
				355
				356	NumIdentifiers += NumIdentifiersInBucket;
				357	}
				358
				359	std::cerr << "\n*** Identifier Table Stats:\n";
				360	std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
				361	std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
				362	std::cerr << "Max identifiers in one bucket: " << MaxBucketLength << "\n";
				363	std::cerr << "Hash density (#identifiers per bucket): "
				364	<< NumIdentifiers/(double)HASH_TABLE_SIZE << "\n";
				365	std::cerr << "Nonempty hash density (average chain length): "
				366	<< NumIdentifiers/(double)(HASH_TABLE_SIZE-NumEmptyBuckets) << "\n";
				367	std::cerr << "Ave identifier length: "
				368	<< (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
				369	std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
				370
				371	// Compute statistics about the memory allocated for identifiers.
				372	#if USE_ALLOCATOR
				373	unsigned BytesUsed = 0;
				374	unsigned NumRegions = 0;
				375	const MemRegion R = (MemRegion)TheMemory;
				376	for (; R; R = R->getNext(), ++NumRegions) {
				377	BytesUsed += R->getNumBytesAllocated();
				378	}
				379	std::cerr << "\nNumber of memory regions: " << NumRegions << "\n";
				380	std::cerr << "Bytes allocated for identifiers: " << BytesUsed << "\n";
				381	#endif
				382	}
				383
				384