Blame - utils/TableGen/TGLexer.cpp - fp2-dev/platform/external/llvm

blob: 562f0ee0f728f13b44d5e39428b8f53db07dbff3 [file] [log] [blame]

Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	1	//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for TableGen.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Chris Lattner	6aaca04	2007-11-18 05:25:45 +0000	[diff] [blame]	14	#include "TGLexer.h"
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	15	#include "Record.h"
				16	#include "llvm/Support/Streams.h"
				17	#include "Record.h"
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	18	#include "llvm/Support/MemoryBuffer.h"
				19	typedef std::pair<llvm::Record, std::vector<llvm::Init>*> SubClassRefTy;
				20	#include "FileParser.h"
				21	#include <cctype>
				22	using namespace llvm;
				23
				24	// FIXME: REMOVE THIS.
				25	#define YYEOF 0
				26	#define YYERROR -2
				27
				28	TGLexer::TGLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
				29	CurPtr = CurBuf->getBufferStart();
Chris Lattner	56a9fcf	2007-11-19 07:43:52 +0000	[diff] [blame]	30	TokStart = 0;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	31	}
				32
				33	TGLexer::~TGLexer() {
				34	while (!IncludeStack.empty()) {
				35	delete IncludeStack.back().Buffer;
				36	IncludeStack.pop_back();
				37	}
				38	delete CurBuf;
				39	}
				40
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	41	/// ReturnError - Set the error to the specified string at the specified
				42	/// location. This is defined to always return YYERROR.
				43	int TGLexer::ReturnError(const char *Loc, const std::string &Msg) {
				44	PrintError(Loc, Msg);
				45	return YYERROR;
				46	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	47
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	48	std::ostream &TGLexer::err() const {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	49	PrintIncludeStack(*cerr.stream());
				50	return *cerr.stream();
				51	}
				52
				53
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	54	void TGLexer::PrintIncludeStack(std::ostream &OS) const {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	55	for (unsigned i = 0, e = IncludeStack.size(); i != e; ++i)
				56	OS << "Included from " << IncludeStack[i].Buffer->getBufferIdentifier()
				57	<< ":" << IncludeStack[i].LineNo << ":\n";
				58	OS << "Parsing " << CurBuf->getBufferIdentifier() << ":"
				59	<< CurLineNo << ": ";
				60	}
				61
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	62	/// PrintError - Print the error at the specified location.
				63	void TGLexer::PrintError(const char *ErrorLoc, const std::string &Msg) const {
				64	err() << Msg << "\n";
				65	assert(ErrorLoc && "Location not specified!");
				66
				67	// Scan backward to find the start of the line.
				68	const char *LineStart = ErrorLoc;
				69	while (LineStart != CurBuf->getBufferStart() &&
				70	LineStart[-1] != '\n' && LineStart[-1] != '\r')
				71	--LineStart;
				72	// Get the end of the line.
				73	const char *LineEnd = ErrorLoc;
				74	while (LineEnd != CurBuf->getBufferEnd() &&
				75	LineEnd[0] != '\n' && LineEnd[0] != '\r')
				76	++LineEnd;
				77	// Print out the line.
				78	cerr << std::string(LineStart, LineEnd) << "\n";
				79	// Print out spaces before the carat.
Chris Lattner	56a9fcf	2007-11-19 07:43:52 +0000	[diff] [blame]	80	for (const char *Pos = LineStart; Pos != ErrorLoc; ++Pos)
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	81	cerr << (*Pos == '\t' ? '\t' : ' ');
				82	cerr << "^\n";
				83	}
				84
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	85	int TGLexer::getNextChar() {
				86	char CurChar = *CurPtr++;
				87	switch (CurChar) {
				88	default:
Chris Lattner	c181918	2007-11-18 05:48:46 +0000	[diff] [blame]	89	return (unsigned char)CurChar;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	90	case 0:
				91	// A nul character in the stream is either the end of the current buffer or
				92	// a random nul in the file. Disambiguate that here.
				93	if (CurPtr-1 != CurBuf->getBufferEnd())
				94	return 0; // Just whitespace.
				95
				96	// If this is the end of an included file, pop the parent file off the
				97	// include stack.
				98	if (!IncludeStack.empty()) {
				99	delete CurBuf;
				100	CurBuf = IncludeStack.back().Buffer;
				101	CurLineNo = IncludeStack.back().LineNo;
				102	CurPtr = IncludeStack.back().CurPtr;
				103	IncludeStack.pop_back();
				104	return getNextChar();
				105	}
				106
				107	// Otherwise, return end of file.
				108	--CurPtr; // Another call to lex will return EOF again.
				109	return EOF;
				110	case '\n':
				111	case '\r':
				112	// Handle the newline character by ignoring it and incrementing the line
				113	// count. However, be careful about 'dos style' files with \n\r in them.
				114	// Only treat a \n\r or \r\n as a single line.
				115	if ((CurPtr == '\n' \|\| (CurPtr == '\r')) &&
				116	*CurPtr != CurChar)
Chris Lattner	c181918	2007-11-18 05:48:46 +0000	[diff] [blame]	117	++CurPtr; // Eat the two char newline sequence.
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	118
				119	++CurLineNo;
				120	return '\n';
				121	}
				122	}
				123
				124	int TGLexer::LexToken() {
Chris Lattner	56a9fcf	2007-11-19 07:43:52 +0000	[diff] [blame]	125	TokStart = CurPtr;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	126	// This always consumes at least one character.
				127	int CurChar = getNextChar();
				128
				129	switch (CurChar) {
				130	default:
				131	// Handle letters: [a-zA-Z_]
				132	if (isalpha(CurChar) \|\| CurChar == '_')
				133	return LexIdentifier();
				134
				135	// Unknown character, return the char itself.
				136	return (unsigned char)CurChar;
				137	case EOF: return YYEOF;
				138	case 0:
				139	case ' ':
				140	case '\t':
				141	case '\n':
				142	case '\r':
				143	// Ignore whitespace.
				144	return LexToken();
				145	case '/':
				146	// If this is the start of a // comment, skip until the end of the line or
				147	// the end of the buffer.
				148	if (*CurPtr == '/')
				149	SkipBCPLComment();
				150	else if (CurPtr == '') {
				151	if (SkipCComment())
				152	return YYERROR;
				153	} else // Otherwise, return this / as a token.
				154	return CurChar;
				155	return LexToken();
				156	case '-': case '+':
				157	case '0': case '1': case '2': case '3': case '4': case '5': case '6':
				158	case '7': case '8': case '9':
				159	return LexNumber();
				160	case '"': return LexString();
				161	case '$': return LexVarName();
				162	case '[': return LexBracket();
				163	case '!': return LexExclaim();
				164	}
				165	}
				166
				167	/// LexString - Lex "[^"]*"
				168	int TGLexer::LexString() {
				169	const char *StrStart = CurPtr;
				170
				171	while (*CurPtr != '"') {
				172	// If we hit the end of the buffer, report an error.
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	173	if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
				174	return ReturnError(StrStart, "End of file in string literal");
				175
				176	if (CurPtr == '\n' \|\| CurPtr == '\r')
				177	return ReturnError(StrStart, "End of line in string literal");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	178
				179	++CurPtr;
				180	}
				181
				182	Filelval.StrVal = new std::string(StrStart, CurPtr);
				183	++CurPtr;
				184	return STRVAL;
				185	}
				186
				187	int TGLexer::LexVarName() {
				188	if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
				189	return '$'; // Invalid varname.
				190
				191	// Otherwise, we're ok, consume the rest of the characters.
				192	const char *VarNameStart = CurPtr++;
				193
				194	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				195	++CurPtr;
				196
				197	Filelval.StrVal = new std::string(VarNameStart, CurPtr);
				198	return VARNAME;
				199	}
				200
				201
				202	int TGLexer::LexIdentifier() {
				203	// The first letter is [a-zA-Z_].
				204	const char *IdentStart = CurPtr-1;
				205
				206	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				207	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				208	++CurPtr;
				209
				210	// Check to see if this identifier is a keyword.
				211	unsigned Len = CurPtr-IdentStart;
				212
				213	if (Len == 3 && !memcmp(IdentStart, "int", 3)) return INT;
				214	if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return BIT;
				215	if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return BITS;
				216	if (Len == 6 && !memcmp(IdentStart, "string", 6)) return STRING;
				217	if (Len == 4 && !memcmp(IdentStart, "list", 4)) return LIST;
				218	if (Len == 4 && !memcmp(IdentStart, "code", 4)) return CODE;
				219	if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return DAG;
				220
				221	if (Len == 5 && !memcmp(IdentStart, "class", 5)) return CLASS;
				222	if (Len == 3 && !memcmp(IdentStart, "def", 3)) return DEF;
				223	if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return DEFM;
				224	if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return MULTICLASS;
				225	if (Len == 5 && !memcmp(IdentStart, "field", 5)) return FIELD;
				226	if (Len == 3 && !memcmp(IdentStart, "let", 3)) return LET;
				227	if (Len == 2 && !memcmp(IdentStart, "in", 2)) return IN;
				228
				229	if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
				230	if (LexInclude()) return YYERROR;
				231	return LexToken();
				232	}
				233
				234	Filelval.StrVal = new std::string(IdentStart, CurPtr);
				235	return ID;
				236	}
				237
				238	/// LexInclude - We just read the "include" token. Get the string token that
				239	/// comes next and enter the include.
				240	bool TGLexer::LexInclude() {
				241	// The token after the include must be a string.
				242	int Tok = LexToken();
				243	if (Tok == YYERROR) return true;
				244	if (Tok != STRVAL) {
Chris Lattner	56a9fcf	2007-11-19 07:43:52 +0000	[diff] [blame]	245	PrintError(getTokenStart(), "Expected filename after include");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	246	return true;
				247	}
				248
				249	// Get the string.
				250	std::string Filename = *Filelval.StrVal;
				251	delete Filelval.StrVal;
				252
				253	// Try to find the file.
				254	MemoryBuffer *NewBuf = MemoryBuffer::getFile(&Filename[0], Filename.size());
				255
				256	// If the file didn't exist directly, see if it's in an include path.
				257	for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
				258	std::string IncFile = IncludeDirectories[i] + "/" + Filename;
				259	NewBuf = MemoryBuffer::getFile(&IncFile[0], IncFile.size());
				260	}
				261
				262	if (NewBuf == 0) {
Chris Lattner	56a9fcf	2007-11-19 07:43:52 +0000	[diff] [blame]	263	PrintError(getTokenStart(),
				264	"Could not find include file '" + Filename + "'");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	265	return true;
				266	}
				267
				268	// Save the line number and lex buffer of the includer.
				269	IncludeStack.push_back(IncludeRec(CurBuf, CurPtr, CurLineNo));
				270
				271	CurLineNo = 1; // Reset line numbering.
				272	CurBuf = NewBuf;
				273	CurPtr = CurBuf->getBufferStart();
				274	return false;
				275	}
				276
				277	void TGLexer::SkipBCPLComment() {
				278	++CurPtr; // skip the second slash.
				279	while (1) {
				280	switch (*CurPtr) {
				281	case '\n':
				282	case '\r':
				283	return; // Newline is end of comment.
				284	case 0:
				285	// If this is the end of the buffer, end the comment.
				286	if (CurPtr == CurBuf->getBufferEnd())
				287	return;
				288	break;
				289	}
				290	// Otherwise, skip the character.
				291	++CurPtr;
				292	}
				293	}
				294
				295	/// SkipCComment - This skips C-style /**/ comments. The only difference from C
				296	/// is that we allow nesting.
				297	bool TGLexer::SkipCComment() {
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	298	const char *CommentStart = CurPtr-1;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	299	++CurPtr; // skip the star.
				300	unsigned CommentDepth = 1;
				301
				302	while (1) {
				303	int CurChar = getNextChar();
				304	switch (CurChar) {
				305	case EOF:
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	306	PrintError(CommentStart, "Unterminated comment!");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	307	return true;
				308	case '*':
				309	// End of the comment?
				310	if (CurPtr[0] != '/') break;
				311
				312	++CurPtr; // End the */.
				313	if (--CommentDepth == 0)
				314	return false;
				315	break;
				316	case '/':
				317	// Start of a nested comment?
				318	if (CurPtr[0] != '*') break;
				319	++CurPtr;
				320	++CommentDepth;
				321	break;
				322	}
				323	}
				324	}
				325
				326	/// LexNumber - Lex:
				327	/// [-+]?[0-9]+
				328	/// 0x[0-9a-fA-F]+
				329	/// 0b[01]+
				330	int TGLexer::LexNumber() {
				331	const char *NumStart = CurPtr-1;
				332
				333	if (CurPtr[-1] == '0') {
				334	if (CurPtr[0] == 'x') {
				335	++CurPtr;
				336	NumStart = CurPtr;
				337	while (isxdigit(CurPtr[0]))
				338	++CurPtr;
				339
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	340	// Requires at least one hex digit.
				341	if (CurPtr == NumStart)
				342	return ReturnError(CurPtr-2, "Invalid hexadecimal number");
				343
Chuck Rose III	0ccb930	2007-11-21 00:37:56 +0000	[diff] [blame]	344	Filelval.IntVal = strtol(NumStart, 0, 16);
				345
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	346	return INTVAL;
				347	} else if (CurPtr[0] == 'b') {
				348	++CurPtr;
				349	NumStart = CurPtr;
				350	while (CurPtr[0] == '0' \|\| CurPtr[0] == '1')
				351	++CurPtr;
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	352
				353	// Requires at least one binary digit.
				354	if (CurPtr == NumStart)
				355	return ReturnError(CurPtr-2, "Invalid binary number");
Chuck Rose III	0ccb930	2007-11-21 00:37:56 +0000	[diff] [blame]	356
				357	Filelval.IntVal = strtol(NumStart, 0, 2);
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	358	return INTVAL;
				359	}
				360	}
				361
				362	// Check for a sign without a digit.
				363	if (CurPtr[-1] == '-' \|\| CurPtr[-1] == '+') {
				364	if (!isdigit(CurPtr[0]))
				365	return CurPtr[-1];
				366	}
				367
				368	while (isdigit(CurPtr[0]))
				369	++CurPtr;
Chuck Rose III	0ccb930	2007-11-21 00:37:56 +0000	[diff] [blame]	370
				371	Filelval.IntVal = strtol(NumStart, 0, 10);
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	372	return INTVAL;
				373	}
				374
				375	/// LexBracket - We just read '['. If this is a code block, return it,
				376	/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ \| }[^]] )* }]'
				377	int TGLexer::LexBracket() {
				378	if (CurPtr[0] != '{')
				379	return '[';
				380	++CurPtr;
				381	const char *CodeStart = CurPtr;
				382	while (1) {
				383	int Char = getNextChar();
				384	if (Char == EOF) break;
				385
				386	if (Char != '}') continue;
				387
				388	Char = getNextChar();
				389	if (Char == EOF) break;
				390	if (Char == ']') {
				391	Filelval.StrVal = new std::string(CodeStart, CurPtr-2);
				392	return CODEFRAGMENT;
				393	}
				394	}
				395
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	396	return ReturnError(CodeStart-2, "Unterminated Code Block");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	397	}
				398
				399	/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
				400	int TGLexer::LexExclaim() {
				401	if (!isalpha(*CurPtr))
				402	return '!';
				403
				404	const char *Start = CurPtr++;
				405	while (isalpha(*CurPtr))
				406	++CurPtr;
				407
				408	// Check to see which operator this is.
				409	unsigned Len = CurPtr-Start;
				410
				411	if (Len == 3 && !memcmp(Start, "con", 3)) return CONCATTOK;
				412	if (Len == 3 && !memcmp(Start, "sra", 3)) return SRATOK;
				413	if (Len == 3 && !memcmp(Start, "srl", 3)) return SRLTOK;
				414	if (Len == 3 && !memcmp(Start, "shl", 3)) return SHLTOK;
				415	if (Len == 9 && !memcmp(Start, "strconcat", 9)) return STRCONCATTOK;
				416
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	417	return ReturnError(Start-1, "Unknown operator");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	418	}
				419
				420	//===----------------------------------------------------------------------===//
				421	// Interfaces used by the Bison parser.
				422	//===----------------------------------------------------------------------===//
				423
				424	int Fileparse();
				425	static TGLexer *TheLexer;
				426
				427	namespace llvm {
				428
				429	std::ostream &err() {
				430	return TheLexer->err();
				431	}
				432
				433	/// ParseFile - this function begins the parsing of the specified tablegen
				434	/// file.
				435	///
				436	void ParseFile(const std::string &Filename,
				437	const std::vector<std::string> &IncludeDirs) {
				438	std::string ErrorStr;
				439	MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(&Filename[0], Filename.size(),
				440	&ErrorStr);
				441	if (F == 0) {
				442	cerr << "Could not open input file '" + Filename + "': " << ErrorStr <<"\n";
				443	exit(1);
				444	}
				445
				446	assert(!TheLexer && "Lexer isn't reentrant yet!");
				447	TheLexer = new TGLexer(F);
				448
				449	// Record the location of the include directory so that the lexer can find
				450	// it later.
				451	TheLexer->setIncludeDirs(IncludeDirs);
				452
				453	Fileparse();
				454
				455	// Cleanup
				456	delete TheLexer;
				457	TheLexer = 0;
				458	}
				459	} // End llvm namespace
				460
				461
				462	int Filelex() {
				463	assert(TheLexer && "No lexer setup yet!");
				464	int Tok = TheLexer->LexToken();
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	465	if (Tok == YYERROR)
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	466	exit(1);
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	467	return Tok;
				468	}