Blame - clang/Lex/IdentifierTable.cpp - toolchain/llvm-project

blob: d4210c83a1a2a5f282182ddca45c602a4c09a70b [file] [log] [blame]

Chris Lattner	ec659fc	2006-10-29 22:09:44 +0000	[diff] [blame^]	1	//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	10	// This file implements the IdentifierInfo, IdentifierVisitor, and
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	11	// IdentifierTable interfaces.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang/Lex/IdentifierTable.h"
				16	#include "clang/Lex/MacroInfo.h"
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame]	17	#include "clang/Basic/LangOptions.h"
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	18	#include <iostream>
				19	using namespace llvm;
				20	using namespace clang;
				21
				22	//===----------------------------------------------------------------------===//
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	23	// IdentifierInfo Implementation
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	24	//===----------------------------------------------------------------------===//
				25
Chris Lattner	3bc804e	2006-10-28 23:46:24 +0000	[diff] [blame]	26	IdentifierInfo::IdentifierInfo() {
				27	Macro = 0;
				28	TokenID = tok::identifier;
				29	PPID = tok::pp_not_keyword;
				30	ObjCID = tok::objc_not_keyword;
				31	IsExtension = false;
				32	IsPoisoned = false;
				33	IsOtherTargetMacro = false;
				34	FETokenInfo = 0;
				35	}
				36
				37	IdentifierInfo::~IdentifierInfo() {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	38	delete Macro;
				39	}
				40
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	41	//===----------------------------------------------------------------------===//
				42	// IdentifierVisitor Implementation
				43	//===----------------------------------------------------------------------===//
				44
				45	IdentifierVisitor::~IdentifierVisitor() {
				46	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	47
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	48
				49	//===----------------------------------------------------------------------===//
				50	// IdentifierTable Implementation
				51	//===----------------------------------------------------------------------===//
				52
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	53	/// IdentifierBucket - The hash table consists of an array of these. If Info is
				54	/// non-null, this is an extant entry, otherwise, it is a hole.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	55	struct IdentifierBucket {
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	56	/// FullHashValue - This remembers the full hash value of the identifier for
				57	/// easy scanning.
				58	unsigned FullHashValue;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	59
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	60	/// Info - This is a pointer to the actual identifier info object.
				61	IdentifierInfo *Info;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	62	};
				63
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame]	64	IdentifierTable::IdentifierTable(const LangOptions &LangOpts) {
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	65	HashTableSize = 8192; // Start with space for 8K identifiers.
				66	IdentifierBucket *TableArray = new IdentifierBucket[HashTableSize]();
				67	memset(TableArray, 0, HashTableSize*sizeof(IdentifierBucket));
				68
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	69	TheTable = TableArray;
				70	NumIdentifiers = 0;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	71
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	72	// Populate the identifier table with info about keywords for the current
				73	// language.
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame]	74	AddKeywords(LangOpts);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	75	}
				76
				77	IdentifierTable::~IdentifierTable() {
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	78	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				79	for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
				80	if (IdentifierInfo *Id = TableArray[i].Info) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	81	// Free memory referenced by the identifier (e.g. macro info).
Chris Lattner	3bc804e	2006-10-28 23:46:24 +0000	[diff] [blame]	82	Id->~IdentifierInfo();
Chris Lattner	ec659fc	2006-10-29 22:09:44 +0000	[diff] [blame^]	83	Allocator.Deallocate(Id);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	84	}
				85	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	86	delete [] TableArray;
				87	}
				88
				89	/// HashString - Compute a hash code for the specified string.
				90	///
				91	static unsigned HashString(const char Start, const char End) {
				92	unsigned int Result = 0;
				93	// Perl hash function.
				94	while (Start != End)
				95	Result = Result * 33 + *Start++;
				96	Result = Result + (Result >> 5);
				97	return Result;
				98	}
				99
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	100	IdentifierInfo &IdentifierTable::get(const char *NameStart,
Chris Lattner	0e1cf1f	2006-07-04 18:53:52 +0000	[diff] [blame]	101	const char *NameEnd) {
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	102	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	103
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	104	unsigned HTSize = HashTableSize;
				105	unsigned FullHashValue = HashString(NameStart, NameEnd);
				106	unsigned BucketNo = FullHashValue & (HTSize-1);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	107	unsigned Length = NameEnd-NameStart;
				108
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	109	unsigned ProbeAmt = 1;
				110	while (1) {
				111	IdentifierBucket &Bucket = TableArray[BucketNo];
				112	IdentifierInfo *BucketII = Bucket.Info;
				113	// If we found an empty bucket, this identifier isn't in the table yet.
				114	if (BucketII == 0) break;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	115
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	116	// If the full hash value matches, check deeply for a match. The common
				117	// case here is that we are only looking at the buckets (for identifier info
				118	// being non-null and for the full hash value) not at the identifiers. This
				119	// is important for cache locality.
				120	if (Bucket.FullHashValue == FullHashValue &&
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	121	memcmp(BucketII->getName(), NameStart, Length) == 0)
				122	// We found a match!
				123	return *BucketII;
				124
				125	// Okay, we didn't find the identifier. Probe to the next bucket.
				126	BucketNo = (BucketNo+ProbeAmt) & (HashTableSize-1);
				127
				128	// Use quadratic probing, it has fewer clumping artifacts than linear
				129	// probing and has good cache behavior in the common case.
				130	++ProbeAmt;
				131	}
				132
				133	// Okay, the identifier doesn't already exist, and BucketNo is the bucket to
				134	// fill in. Allocate a new identifier with space for the null-terminated
				135	// string at the end.
				136	unsigned AllocSize = sizeof(IdentifierInfo)+Length+1;
Chris Lattner	ec659fc	2006-10-29 22:09:44 +0000	[diff] [blame^]	137
				138	// FIXME: uses GCC extension.
				139	unsigned Alignment = __alignof__(IdentifierInfo);
				140	IdentifierInfo *Identifier =
				141	(IdentifierInfo*)Allocator.Allocate(AllocSize, Alignment);
Chris Lattner	3bc804e	2006-10-28 23:46:24 +0000	[diff] [blame]	142	new (Identifier) IdentifierInfo();
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	143	++NumIdentifiers;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	144
				145	// Copy the string information.
				146	char StrBuffer = (char)(Identifier+1);
				147	memcpy(StrBuffer, NameStart, Length);
				148	StrBuffer[Length] = 0; // Null terminate string.
				149
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	150	// Fill in the bucket for the hash table.
				151	TableArray[BucketNo].Info = Identifier;
				152	TableArray[BucketNo].FullHashValue = FullHashValue;
				153
				154	// If the hash table is now more than 3/4 full, rehash into a larger table.
				155	if (NumIdentifiers > HashTableSize*3/4)
				156	RehashTable();
				157
				158	return *Identifier;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	159	}
				160
Chris Lattner	c79f6fb	2006-07-04 17:53:21 +0000	[diff] [blame]	161	IdentifierInfo &IdentifierTable::get(const std::string &Name) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	162	// Don't use c_str() here: no need to be null terminated.
				163	const char *NameBytes = &Name[0];
				164	unsigned Size = Name.size();
				165	return get(NameBytes, NameBytes+Size);
				166	}
				167
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	168	void IdentifierTable::RehashTable() {
				169	unsigned NewSize = HashTableSize*2;
				170	IdentifierBucket *NewTableArray = new IdentifierBucket[NewSize]();
				171	memset(NewTableArray, 0, NewSize*sizeof(IdentifierBucket));
				172
				173	// Rehash all the identifier into their new buckets. Luckily we already have
				174	// the hash values available :).
				175	IdentifierBucket CurTable = (IdentifierBucket )TheTable;
				176	for (IdentifierBucket IB = CurTable, E = CurTable+HashTableSize;
				177	IB != E; ++IB) {
				178	if (IB->Info) {
				179	// Fast case, bucket available.
				180	unsigned FullHash = IB->FullHashValue;
				181	unsigned NewBucket = FullHash & (NewSize-1);
				182	if (NewTableArray[NewBucket].Info == 0) {
				183	NewTableArray[FullHash & (NewSize-1)].Info = IB->Info;
				184	NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
				185	continue;
				186	}
				187
				188	unsigned ProbeSize = 1;
				189	do {
				190	NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
				191	} while (NewTableArray[NewBucket].Info);
				192
				193	// Finally found a slot. Fill it in.
Chris Lattner	a883116	2006-10-27 04:54:47 +0000	[diff] [blame]	194	NewTableArray[NewBucket].Info = IB->Info;
				195	NewTableArray[NewBucket].FullHashValue = FullHash;
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	196	}
				197	}
				198
				199	delete[] CurTable;
				200
				201	TheTable = NewTableArray;
				202	HashTableSize = NewSize;
				203	}
				204
				205
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	206	/// VisitIdentifiers - This method walks through all of the identifiers,
				207	/// invoking IV->VisitIdentifier for each of them.
				208	void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	209	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				210	for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
				211	if (IdentifierInfo *Id = TableArray[i].Info)
				212	IV.VisitIdentifier(*Id);
Chris Lattner	91cbf11	2006-07-03 04:28:52 +0000	[diff] [blame]	213	}
				214	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	215
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame]	216	//===----------------------------------------------------------------------===//
				217	// Language Keyword Implementation
				218	//===----------------------------------------------------------------------===//
				219
				220	/// AddKeyword - This method is used to associate a token ID with specific
				221	/// identifiers because they are language keywords. This causes the lexer to
				222	/// automatically map matching identifiers to specialized token codes.
				223	///
				224	/// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
				225	/// specified langauge, set to 1 if it is an extension in the specified
				226	/// language, and set to 2 if disabled in the specified language.
				227	static void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
Chris Lattner	a4271e4	2006-10-20 06:13:26 +0000	[diff] [blame]	228	int C90, int C99, int CXX,
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame]	229	const LangOptions &LangOpts, IdentifierTable &Table) {
Chris Lattner	a4271e4	2006-10-20 06:13:26 +0000	[diff] [blame]	230	int Flags = LangOpts.CPlusPlus ? CXX : (LangOpts.C99 ? C99 : C90);
Chris Lattner	25e0d54	2006-10-18 06:07:05 +0000	[diff] [blame]	231
				232	// Don't add this keyword if disabled in this language or if an extension
				233	// and extensions are disabled.
				234	if (Flags + LangOpts.NoExtensions >= 2) return;
				235
				236	const char *Str = &Keyword[0];
				237	IdentifierInfo &Info = Table.get(Str, Str+Keyword.size());
				238	Info.setTokenID(TokenCode);
				239	Info.setIsExtensionToken(Flags == 1);
				240	}
				241
				242	/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or
				243	/// "elif".
				244	static void AddPPKeyword(tok::PPKeywordKind PPID,
				245	const char *Name, unsigned NameLen,
				246	IdentifierTable &Table) {
				247	Table.get(Name, Name+NameLen).setPPKeywordID(PPID);
				248	}
				249
				250	/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
				251	/// "property".
				252	static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID,
				253	const char *Name, unsigned NameLen,
				254	IdentifierTable &Table) {
				255	Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID);
				256	}
				257
				258	/// AddKeywords - Add all keywords to the symbol table.
				259	///
				260	void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
				261	enum {
				262	C90Shift = 0,
				263	EXTC90 = 1 << C90Shift,
				264	NOTC90 = 2 << C90Shift,
				265	C99Shift = 2,
				266	EXTC99 = 1 << C99Shift,
				267	NOTC99 = 2 << C99Shift,
				268	CPPShift = 4,
				269	EXTCPP = 1 << CPPShift,
				270	NOTCPP = 2 << CPPShift,
				271	Mask = 3
				272	};
				273
				274	// Add keywords and tokens for the current language.
				275	#define KEYWORD(NAME, FLAGS) \
				276	AddKeyword(#NAME, tok::kw_ ## NAME, \
				277	((FLAGS) >> C90Shift) & Mask, \
				278	((FLAGS) >> C99Shift) & Mask, \
				279	((FLAGS) >> CPPShift) & Mask, LangOpts, *this);
				280	#define ALIAS(NAME, TOK) \
				281	AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
				282	#define PPKEYWORD(NAME) \
				283	AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
				284	#define OBJC1_AT_KEYWORD(NAME) \
				285	if (LangOpts.ObjC1) \
				286	AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
				287	#define OBJC2_AT_KEYWORD(NAME) \
				288	if (LangOpts.ObjC2) \
				289	AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this);
				290	#include "clang/Basic/TokenKinds.def"
				291	}
				292
				293
				294	//===----------------------------------------------------------------------===//
				295	// Stats Implementation
				296	//===----------------------------------------------------------------------===//
				297
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	298	/// PrintStats - Print statistics about how well the identifier table is doing
				299	/// at hashing identifiers.
				300	void IdentifierTable::PrintStats() const {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	301	unsigned NumEmptyBuckets = 0;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	302	unsigned AverageIdentifierSize = 0;
				303	unsigned MaxIdentifierLength = 0;
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	304	unsigned NumProbed = 0;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	305
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	306	IdentifierBucket TableArray = (IdentifierBucket)TheTable;
				307	for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
				308	if (TableArray[i].Info == 0) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	309	++NumEmptyBuckets;
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	310	continue;
				311	}
				312	IdentifierInfo *Id = TableArray[i].Info;
Chris Lattner	56bdb9a	2006-10-27 05:06:38 +0000	[diff] [blame]	313	unsigned IdLen = strlen(Id->getName());
				314	AverageIdentifierSize += IdLen;
				315	if (MaxIdentifierLength < IdLen)
				316	MaxIdentifierLength = IdLen;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	317
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	318	// Count the number of times something was probed.
				319	if ((TableArray[i].FullHashValue & (e-1)) != i)
				320	++NumProbed;
				321
				322	// TODO: Figure out maximum times an identifier had to probe for -stats.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	323	}
				324
				325	std::cerr << "\n*** Identifier Table Stats:\n";
				326	std::cerr << "# Identifiers: " << NumIdentifiers << "\n";
				327	std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	328	std::cerr << "Hash density (#identifiers per bucket): "
Chris Lattner	f2e3ac3	2006-10-27 03:59:10 +0000	[diff] [blame]	329	<< NumIdentifiers/(double)HashTableSize << "\n";
				330	std::cerr << "Num probed identifiers: " << NumProbed << " ("
				331	<< NumProbed*100.0/NumIdentifiers << "%)\n";
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	332	std::cerr << "Ave identifier length: "
				333	<< (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
				334	std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
				335
				336	// Compute statistics about the memory allocated for identifiers.
Chris Lattner	ec659fc	2006-10-29 22:09:44 +0000	[diff] [blame^]	337	Allocator.PrintStats();
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	338	}
				339
				340