Blame - utils/TableGen/TGLexer.cpp - fp2-dev/platform/external/llvm

blob: e49af0423393826f5f2c4c51bf603571c34c9770 [file] [log] [blame]

Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	1	//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for TableGen.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Chris Lattner	6aaca04	2007-11-18 05:25:45 +0000	[diff] [blame^]	14	#include "TGLexer.h"
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	15	#include "Record.h"
				16	#include "llvm/Support/Streams.h"
				17	#include "Record.h"
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	18	#include "llvm/Support/MemoryBuffer.h"
				19	typedef std::pair<llvm::Record, std::vector<llvm::Init>*> SubClassRefTy;
				20	#include "FileParser.h"
				21	#include <cctype>
				22	using namespace llvm;
				23
				24	// FIXME: REMOVE THIS.
				25	#define YYEOF 0
				26	#define YYERROR -2
				27
				28	TGLexer::TGLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
				29	CurPtr = CurBuf->getBufferStart();
				30	}
				31
				32	TGLexer::~TGLexer() {
				33	while (!IncludeStack.empty()) {
				34	delete IncludeStack.back().Buffer;
				35	IncludeStack.pop_back();
				36	}
				37	delete CurBuf;
				38	}
				39
				40
				41	std::ostream &TGLexer::err() {
				42	PrintIncludeStack(*cerr.stream());
				43	return *cerr.stream();
				44	}
				45
				46
				47	void TGLexer::PrintIncludeStack(std::ostream &OS) {
				48	for (unsigned i = 0, e = IncludeStack.size(); i != e; ++i)
				49	OS << "Included from " << IncludeStack[i].Buffer->getBufferIdentifier()
				50	<< ":" << IncludeStack[i].LineNo << ":\n";
				51	OS << "Parsing " << CurBuf->getBufferIdentifier() << ":"
				52	<< CurLineNo << ": ";
				53	}
				54
				55	int TGLexer::getNextChar() {
				56	char CurChar = *CurPtr++;
				57	switch (CurChar) {
				58	default:
				59	return CurChar;
				60	case 0:
				61	// A nul character in the stream is either the end of the current buffer or
				62	// a random nul in the file. Disambiguate that here.
				63	if (CurPtr-1 != CurBuf->getBufferEnd())
				64	return 0; // Just whitespace.
				65
				66	// If this is the end of an included file, pop the parent file off the
				67	// include stack.
				68	if (!IncludeStack.empty()) {
				69	delete CurBuf;
				70	CurBuf = IncludeStack.back().Buffer;
				71	CurLineNo = IncludeStack.back().LineNo;
				72	CurPtr = IncludeStack.back().CurPtr;
				73	IncludeStack.pop_back();
				74	return getNextChar();
				75	}
				76
				77	// Otherwise, return end of file.
				78	--CurPtr; // Another call to lex will return EOF again.
				79	return EOF;
				80	case '\n':
				81	case '\r':
				82	// Handle the newline character by ignoring it and incrementing the line
				83	// count. However, be careful about 'dos style' files with \n\r in them.
				84	// Only treat a \n\r or \r\n as a single line.
				85	if ((CurPtr == '\n' \|\| (CurPtr == '\r')) &&
				86	*CurPtr != CurChar)
				87	++CurPtr; // Each the two char newline sequence.
				88
				89	++CurLineNo;
				90	return '\n';
				91	}
				92	}
				93
				94	int TGLexer::LexToken() {
				95	// This always consumes at least one character.
				96	int CurChar = getNextChar();
				97
				98	switch (CurChar) {
				99	default:
				100	// Handle letters: [a-zA-Z_]
				101	if (isalpha(CurChar) \|\| CurChar == '_')
				102	return LexIdentifier();
				103
				104	// Unknown character, return the char itself.
				105	return (unsigned char)CurChar;
				106	case EOF: return YYEOF;
				107	case 0:
				108	case ' ':
				109	case '\t':
				110	case '\n':
				111	case '\r':
				112	// Ignore whitespace.
				113	return LexToken();
				114	case '/':
				115	// If this is the start of a // comment, skip until the end of the line or
				116	// the end of the buffer.
				117	if (*CurPtr == '/')
				118	SkipBCPLComment();
				119	else if (CurPtr == '') {
				120	if (SkipCComment())
				121	return YYERROR;
				122	} else // Otherwise, return this / as a token.
				123	return CurChar;
				124	return LexToken();
				125	case '-': case '+':
				126	case '0': case '1': case '2': case '3': case '4': case '5': case '6':
				127	case '7': case '8': case '9':
				128	return LexNumber();
				129	case '"': return LexString();
				130	case '$': return LexVarName();
				131	case '[': return LexBracket();
				132	case '!': return LexExclaim();
				133	}
				134	}
				135
				136	/// LexString - Lex "[^"]*"
				137	int TGLexer::LexString() {
				138	const char *StrStart = CurPtr;
				139
				140	while (*CurPtr != '"') {
				141	// If we hit the end of the buffer, report an error.
				142	if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) {
				143	TheError = "End of file in string literal";
				144	return YYERROR;
				145	} else if (CurPtr == '\n' \|\| CurPtr == '\r') {
				146	TheError = "End of line in string literal";
				147	return YYERROR;
				148	}
				149
				150	++CurPtr;
				151	}
				152
				153	Filelval.StrVal = new std::string(StrStart, CurPtr);
				154	++CurPtr;
				155	return STRVAL;
				156	}
				157
				158	int TGLexer::LexVarName() {
				159	if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
				160	return '$'; // Invalid varname.
				161
				162	// Otherwise, we're ok, consume the rest of the characters.
				163	const char *VarNameStart = CurPtr++;
				164
				165	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				166	++CurPtr;
				167
				168	Filelval.StrVal = new std::string(VarNameStart, CurPtr);
				169	return VARNAME;
				170	}
				171
				172
				173	int TGLexer::LexIdentifier() {
				174	// The first letter is [a-zA-Z_].
				175	const char *IdentStart = CurPtr-1;
				176
				177	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				178	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				179	++CurPtr;
				180
				181	// Check to see if this identifier is a keyword.
				182	unsigned Len = CurPtr-IdentStart;
				183
				184	if (Len == 3 && !memcmp(IdentStart, "int", 3)) return INT;
				185	if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return BIT;
				186	if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return BITS;
				187	if (Len == 6 && !memcmp(IdentStart, "string", 6)) return STRING;
				188	if (Len == 4 && !memcmp(IdentStart, "list", 4)) return LIST;
				189	if (Len == 4 && !memcmp(IdentStart, "code", 4)) return CODE;
				190	if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return DAG;
				191
				192	if (Len == 5 && !memcmp(IdentStart, "class", 5)) return CLASS;
				193	if (Len == 3 && !memcmp(IdentStart, "def", 3)) return DEF;
				194	if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return DEFM;
				195	if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return MULTICLASS;
				196	if (Len == 5 && !memcmp(IdentStart, "field", 5)) return FIELD;
				197	if (Len == 3 && !memcmp(IdentStart, "let", 3)) return LET;
				198	if (Len == 2 && !memcmp(IdentStart, "in", 2)) return IN;
				199
				200	if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
				201	if (LexInclude()) return YYERROR;
				202	return LexToken();
				203	}
				204
				205	Filelval.StrVal = new std::string(IdentStart, CurPtr);
				206	return ID;
				207	}
				208
				209	/// LexInclude - We just read the "include" token. Get the string token that
				210	/// comes next and enter the include.
				211	bool TGLexer::LexInclude() {
				212	// The token after the include must be a string.
				213	int Tok = LexToken();
				214	if (Tok == YYERROR) return true;
				215	if (Tok != STRVAL) {
				216	TheError = "Expected filename after include";
				217	return true;
				218	}
				219
				220	// Get the string.
				221	std::string Filename = *Filelval.StrVal;
				222	delete Filelval.StrVal;
				223
				224	// Try to find the file.
				225	MemoryBuffer *NewBuf = MemoryBuffer::getFile(&Filename[0], Filename.size());
				226
				227	// If the file didn't exist directly, see if it's in an include path.
				228	for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
				229	std::string IncFile = IncludeDirectories[i] + "/" + Filename;
				230	NewBuf = MemoryBuffer::getFile(&IncFile[0], IncFile.size());
				231	}
				232
				233	if (NewBuf == 0) {
				234	TheError = "Could not find include file '" + Filename + "'";
				235	return true;
				236	}
				237
				238	// Save the line number and lex buffer of the includer.
				239	IncludeStack.push_back(IncludeRec(CurBuf, CurPtr, CurLineNo));
				240
				241	CurLineNo = 1; // Reset line numbering.
				242	CurBuf = NewBuf;
				243	CurPtr = CurBuf->getBufferStart();
				244	return false;
				245	}
				246
				247	void TGLexer::SkipBCPLComment() {
				248	++CurPtr; // skip the second slash.
				249	while (1) {
				250	switch (*CurPtr) {
				251	case '\n':
				252	case '\r':
				253	return; // Newline is end of comment.
				254	case 0:
				255	// If this is the end of the buffer, end the comment.
				256	if (CurPtr == CurBuf->getBufferEnd())
				257	return;
				258	break;
				259	}
				260	// Otherwise, skip the character.
				261	++CurPtr;
				262	}
				263	}
				264
				265	/// SkipCComment - This skips C-style /**/ comments. The only difference from C
				266	/// is that we allow nesting.
				267	bool TGLexer::SkipCComment() {
				268	++CurPtr; // skip the star.
				269	unsigned CommentDepth = 1;
				270
				271	while (1) {
				272	int CurChar = getNextChar();
				273	switch (CurChar) {
				274	case EOF:
				275	TheError = "Unterminated comment!";
				276	return true;
				277	case '*':
				278	// End of the comment?
				279	if (CurPtr[0] != '/') break;
				280
				281	++CurPtr; // End the */.
				282	if (--CommentDepth == 0)
				283	return false;
				284	break;
				285	case '/':
				286	// Start of a nested comment?
				287	if (CurPtr[0] != '*') break;
				288	++CurPtr;
				289	++CommentDepth;
				290	break;
				291	}
				292	}
				293	}
				294
				295	/// LexNumber - Lex:
				296	/// [-+]?[0-9]+
				297	/// 0x[0-9a-fA-F]+
				298	/// 0b[01]+
				299	int TGLexer::LexNumber() {
				300	const char *NumStart = CurPtr-1;
				301
				302	if (CurPtr[-1] == '0') {
				303	if (CurPtr[0] == 'x') {
				304	++CurPtr;
				305	NumStart = CurPtr;
				306	while (isxdigit(CurPtr[0]))
				307	++CurPtr;
				308
				309	if (CurPtr == NumStart) {
				310	TheError = "Invalid hexadecimal number";
				311	return YYERROR;
				312	}
				313	Filelval.IntVal = strtoll(NumStart, 0, 16);
				314	return INTVAL;
				315	} else if (CurPtr[0] == 'b') {
				316	++CurPtr;
				317	NumStart = CurPtr;
				318	while (CurPtr[0] == '0' \|\| CurPtr[0] == '1')
				319	++CurPtr;
				320
				321	if (CurPtr == NumStart) {
				322	TheError = "Invalid binary number";
				323	return YYERROR;
				324	}
				325	Filelval.IntVal = strtoll(NumStart, 0, 2);
				326	return INTVAL;
				327	}
				328	}
				329
				330	// Check for a sign without a digit.
				331	if (CurPtr[-1] == '-' \|\| CurPtr[-1] == '+') {
				332	if (!isdigit(CurPtr[0]))
				333	return CurPtr[-1];
				334	}
				335
				336	while (isdigit(CurPtr[0]))
				337	++CurPtr;
				338	Filelval.IntVal = strtoll(NumStart, 0, 10);
				339	return INTVAL;
				340	}
				341
				342	/// LexBracket - We just read '['. If this is a code block, return it,
				343	/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ \| }[^]] )* }]'
				344	int TGLexer::LexBracket() {
				345	if (CurPtr[0] != '{')
				346	return '[';
				347	++CurPtr;
				348	const char *CodeStart = CurPtr;
				349	while (1) {
				350	int Char = getNextChar();
				351	if (Char == EOF) break;
				352
				353	if (Char != '}') continue;
				354
				355	Char = getNextChar();
				356	if (Char == EOF) break;
				357	if (Char == ']') {
				358	Filelval.StrVal = new std::string(CodeStart, CurPtr-2);
				359	return CODEFRAGMENT;
				360	}
				361	}
				362
				363	TheError = "Invalid Code Block";
				364	return YYERROR;
				365	}
				366
				367	/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
				368	int TGLexer::LexExclaim() {
				369	if (!isalpha(*CurPtr))
				370	return '!';
				371
				372	const char *Start = CurPtr++;
				373	while (isalpha(*CurPtr))
				374	++CurPtr;
				375
				376	// Check to see which operator this is.
				377	unsigned Len = CurPtr-Start;
				378
				379	if (Len == 3 && !memcmp(Start, "con", 3)) return CONCATTOK;
				380	if (Len == 3 && !memcmp(Start, "sra", 3)) return SRATOK;
				381	if (Len == 3 && !memcmp(Start, "srl", 3)) return SRLTOK;
				382	if (Len == 3 && !memcmp(Start, "shl", 3)) return SHLTOK;
				383	if (Len == 9 && !memcmp(Start, "strconcat", 9)) return STRCONCATTOK;
				384
				385	TheError = "Unknown operator";
				386	return YYERROR;
				387	}
				388
				389	//===----------------------------------------------------------------------===//
				390	// Interfaces used by the Bison parser.
				391	//===----------------------------------------------------------------------===//
				392
				393	int Fileparse();
				394	static TGLexer *TheLexer;
				395
				396	namespace llvm {
				397
				398	std::ostream &err() {
				399	return TheLexer->err();
				400	}
				401
				402	/// ParseFile - this function begins the parsing of the specified tablegen
				403	/// file.
				404	///
				405	void ParseFile(const std::string &Filename,
				406	const std::vector<std::string> &IncludeDirs) {
				407	std::string ErrorStr;
				408	MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(&Filename[0], Filename.size(),
				409	&ErrorStr);
				410	if (F == 0) {
				411	cerr << "Could not open input file '" + Filename + "': " << ErrorStr <<"\n";
				412	exit(1);
				413	}
				414
				415	assert(!TheLexer && "Lexer isn't reentrant yet!");
				416	TheLexer = new TGLexer(F);
				417
				418	// Record the location of the include directory so that the lexer can find
				419	// it later.
				420	TheLexer->setIncludeDirs(IncludeDirs);
				421
				422	Fileparse();
				423
				424	// Cleanup
				425	delete TheLexer;
				426	TheLexer = 0;
				427	}
				428	} // End llvm namespace
				429
				430
				431	int Filelex() {
				432	assert(TheLexer && "No lexer setup yet!");
				433	int Tok = TheLexer->LexToken();
				434	if (Tok == YYERROR) {
				435	err() << TheLexer->getError() << "\n";
				436	exit(1);
				437	}
				438	return Tok;
				439	}