Blame - utils/TableGen/TGLexer.cpp - fp2-dev/platform/external/llvm

blob: dd7ad6ceb101df796e6e2c7b3f040e39dabb0004 [file] [log] [blame]

Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	1	//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for TableGen.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Chris Lattner	6aaca04	2007-11-18 05:25:45 +0000	[diff] [blame]	14	#include "TGLexer.h"
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	15	#include "Record.h"
				16	#include "llvm/Support/Streams.h"
				17	#include "Record.h"
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	18	#include "llvm/Support/MemoryBuffer.h"
				19	typedef std::pair<llvm::Record, std::vector<llvm::Init>*> SubClassRefTy;
				20	#include "FileParser.h"
				21	#include <cctype>
				22	using namespace llvm;
				23
				24	// FIXME: REMOVE THIS.
				25	#define YYEOF 0
				26	#define YYERROR -2
				27
				28	TGLexer::TGLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
				29	CurPtr = CurBuf->getBufferStart();
				30	}
				31
				32	TGLexer::~TGLexer() {
				33	while (!IncludeStack.empty()) {
				34	delete IncludeStack.back().Buffer;
				35	IncludeStack.pop_back();
				36	}
				37	delete CurBuf;
				38	}
				39
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	40	/// ReturnError - Set the error to the specified string at the specified
				41	/// location. This is defined to always return YYERROR.
				42	int TGLexer::ReturnError(const char *Loc, const std::string &Msg) {
				43	PrintError(Loc, Msg);
				44	return YYERROR;
				45	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	46
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	47	std::ostream &TGLexer::err() const {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	48	PrintIncludeStack(*cerr.stream());
				49	return *cerr.stream();
				50	}
				51
				52
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	53	void TGLexer::PrintIncludeStack(std::ostream &OS) const {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	54	for (unsigned i = 0, e = IncludeStack.size(); i != e; ++i)
				55	OS << "Included from " << IncludeStack[i].Buffer->getBufferIdentifier()
				56	<< ":" << IncludeStack[i].LineNo << ":\n";
				57	OS << "Parsing " << CurBuf->getBufferIdentifier() << ":"
				58	<< CurLineNo << ": ";
				59	}
				60
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	61	/// PrintError - Print the error at the specified location.
				62	void TGLexer::PrintError(const char *ErrorLoc, const std::string &Msg) const {
				63	err() << Msg << "\n";
				64	assert(ErrorLoc && "Location not specified!");
				65
				66	// Scan backward to find the start of the line.
				67	const char *LineStart = ErrorLoc;
				68	while (LineStart != CurBuf->getBufferStart() &&
				69	LineStart[-1] != '\n' && LineStart[-1] != '\r')
				70	--LineStart;
				71	// Get the end of the line.
				72	const char *LineEnd = ErrorLoc;
				73	while (LineEnd != CurBuf->getBufferEnd() &&
				74	LineEnd[0] != '\n' && LineEnd[0] != '\r')
				75	++LineEnd;
				76	// Print out the line.
				77	cerr << std::string(LineStart, LineEnd) << "\n";
				78	// Print out spaces before the carat.
				79	const char *Pos = LineStart;
				80	while (Pos != ErrorLoc)
				81	cerr << (*Pos == '\t' ? '\t' : ' ');
				82	cerr << "^\n";
				83	}
				84
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	85	int TGLexer::getNextChar() {
				86	char CurChar = *CurPtr++;
				87	switch (CurChar) {
				88	default:
Chris Lattner	c181918	2007-11-18 05:48:46 +0000	[diff] [blame]	89	return (unsigned char)CurChar;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	90	case 0:
				91	// A nul character in the stream is either the end of the current buffer or
				92	// a random nul in the file. Disambiguate that here.
				93	if (CurPtr-1 != CurBuf->getBufferEnd())
				94	return 0; // Just whitespace.
				95
				96	// If this is the end of an included file, pop the parent file off the
				97	// include stack.
				98	if (!IncludeStack.empty()) {
				99	delete CurBuf;
				100	CurBuf = IncludeStack.back().Buffer;
				101	CurLineNo = IncludeStack.back().LineNo;
				102	CurPtr = IncludeStack.back().CurPtr;
				103	IncludeStack.pop_back();
				104	return getNextChar();
				105	}
				106
				107	// Otherwise, return end of file.
				108	--CurPtr; // Another call to lex will return EOF again.
				109	return EOF;
				110	case '\n':
				111	case '\r':
				112	// Handle the newline character by ignoring it and incrementing the line
				113	// count. However, be careful about 'dos style' files with \n\r in them.
				114	// Only treat a \n\r or \r\n as a single line.
				115	if ((CurPtr == '\n' \|\| (CurPtr == '\r')) &&
				116	*CurPtr != CurChar)
Chris Lattner	c181918	2007-11-18 05:48:46 +0000	[diff] [blame]	117	++CurPtr; // Eat the two char newline sequence.
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	118
				119	++CurLineNo;
				120	return '\n';
				121	}
				122	}
				123
				124	int TGLexer::LexToken() {
				125	// This always consumes at least one character.
				126	int CurChar = getNextChar();
				127
				128	switch (CurChar) {
				129	default:
				130	// Handle letters: [a-zA-Z_]
				131	if (isalpha(CurChar) \|\| CurChar == '_')
				132	return LexIdentifier();
				133
				134	// Unknown character, return the char itself.
				135	return (unsigned char)CurChar;
				136	case EOF: return YYEOF;
				137	case 0:
				138	case ' ':
				139	case '\t':
				140	case '\n':
				141	case '\r':
				142	// Ignore whitespace.
				143	return LexToken();
				144	case '/':
				145	// If this is the start of a // comment, skip until the end of the line or
				146	// the end of the buffer.
				147	if (*CurPtr == '/')
				148	SkipBCPLComment();
				149	else if (CurPtr == '') {
				150	if (SkipCComment())
				151	return YYERROR;
				152	} else // Otherwise, return this / as a token.
				153	return CurChar;
				154	return LexToken();
				155	case '-': case '+':
				156	case '0': case '1': case '2': case '3': case '4': case '5': case '6':
				157	case '7': case '8': case '9':
				158	return LexNumber();
				159	case '"': return LexString();
				160	case '$': return LexVarName();
				161	case '[': return LexBracket();
				162	case '!': return LexExclaim();
				163	}
				164	}
				165
				166	/// LexString - Lex "[^"]*"
				167	int TGLexer::LexString() {
				168	const char *StrStart = CurPtr;
				169
				170	while (*CurPtr != '"') {
				171	// If we hit the end of the buffer, report an error.
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	172	if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
				173	return ReturnError(StrStart, "End of file in string literal");
				174
				175	if (CurPtr == '\n' \|\| CurPtr == '\r')
				176	return ReturnError(StrStart, "End of line in string literal");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	177
				178	++CurPtr;
				179	}
				180
				181	Filelval.StrVal = new std::string(StrStart, CurPtr);
				182	++CurPtr;
				183	return STRVAL;
				184	}
				185
				186	int TGLexer::LexVarName() {
				187	if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
				188	return '$'; // Invalid varname.
				189
				190	// Otherwise, we're ok, consume the rest of the characters.
				191	const char *VarNameStart = CurPtr++;
				192
				193	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				194	++CurPtr;
				195
				196	Filelval.StrVal = new std::string(VarNameStart, CurPtr);
				197	return VARNAME;
				198	}
				199
				200
				201	int TGLexer::LexIdentifier() {
				202	// The first letter is [a-zA-Z_].
				203	const char *IdentStart = CurPtr-1;
				204
				205	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				206	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				207	++CurPtr;
				208
				209	// Check to see if this identifier is a keyword.
				210	unsigned Len = CurPtr-IdentStart;
				211
				212	if (Len == 3 && !memcmp(IdentStart, "int", 3)) return INT;
				213	if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return BIT;
				214	if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return BITS;
				215	if (Len == 6 && !memcmp(IdentStart, "string", 6)) return STRING;
				216	if (Len == 4 && !memcmp(IdentStart, "list", 4)) return LIST;
				217	if (Len == 4 && !memcmp(IdentStart, "code", 4)) return CODE;
				218	if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return DAG;
				219
				220	if (Len == 5 && !memcmp(IdentStart, "class", 5)) return CLASS;
				221	if (Len == 3 && !memcmp(IdentStart, "def", 3)) return DEF;
				222	if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return DEFM;
				223	if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return MULTICLASS;
				224	if (Len == 5 && !memcmp(IdentStart, "field", 5)) return FIELD;
				225	if (Len == 3 && !memcmp(IdentStart, "let", 3)) return LET;
				226	if (Len == 2 && !memcmp(IdentStart, "in", 2)) return IN;
				227
				228	if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
				229	if (LexInclude()) return YYERROR;
				230	return LexToken();
				231	}
				232
				233	Filelval.StrVal = new std::string(IdentStart, CurPtr);
				234	return ID;
				235	}
				236
				237	/// LexInclude - We just read the "include" token. Get the string token that
				238	/// comes next and enter the include.
				239	bool TGLexer::LexInclude() {
				240	// The token after the include must be a string.
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	241	const char *TokStart = CurPtr-7;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	242	int Tok = LexToken();
				243	if (Tok == YYERROR) return true;
				244	if (Tok != STRVAL) {
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	245	PrintError(TokStart, "Expected filename after include");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	246	return true;
				247	}
				248
				249	// Get the string.
				250	std::string Filename = *Filelval.StrVal;
				251	delete Filelval.StrVal;
				252
				253	// Try to find the file.
				254	MemoryBuffer *NewBuf = MemoryBuffer::getFile(&Filename[0], Filename.size());
				255
				256	// If the file didn't exist directly, see if it's in an include path.
				257	for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
				258	std::string IncFile = IncludeDirectories[i] + "/" + Filename;
				259	NewBuf = MemoryBuffer::getFile(&IncFile[0], IncFile.size());
				260	}
				261
				262	if (NewBuf == 0) {
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	263	PrintError(TokStart, "Could not find include file '" + Filename + "'");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	264	return true;
				265	}
				266
				267	// Save the line number and lex buffer of the includer.
				268	IncludeStack.push_back(IncludeRec(CurBuf, CurPtr, CurLineNo));
				269
				270	CurLineNo = 1; // Reset line numbering.
				271	CurBuf = NewBuf;
				272	CurPtr = CurBuf->getBufferStart();
				273	return false;
				274	}
				275
				276	void TGLexer::SkipBCPLComment() {
				277	++CurPtr; // skip the second slash.
				278	while (1) {
				279	switch (*CurPtr) {
				280	case '\n':
				281	case '\r':
				282	return; // Newline is end of comment.
				283	case 0:
				284	// If this is the end of the buffer, end the comment.
				285	if (CurPtr == CurBuf->getBufferEnd())
				286	return;
				287	break;
				288	}
				289	// Otherwise, skip the character.
				290	++CurPtr;
				291	}
				292	}
				293
				294	/// SkipCComment - This skips C-style /**/ comments. The only difference from C
				295	/// is that we allow nesting.
				296	bool TGLexer::SkipCComment() {
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	297	const char *CommentStart = CurPtr-1;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	298	++CurPtr; // skip the star.
				299	unsigned CommentDepth = 1;
				300
				301	while (1) {
				302	int CurChar = getNextChar();
				303	switch (CurChar) {
				304	case EOF:
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	305	PrintError(CommentStart, "Unterminated comment!");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	306	return true;
				307	case '*':
				308	// End of the comment?
				309	if (CurPtr[0] != '/') break;
				310
				311	++CurPtr; // End the */.
				312	if (--CommentDepth == 0)
				313	return false;
				314	break;
				315	case '/':
				316	// Start of a nested comment?
				317	if (CurPtr[0] != '*') break;
				318	++CurPtr;
				319	++CommentDepth;
				320	break;
				321	}
				322	}
				323	}
				324
				325	/// LexNumber - Lex:
				326	/// [-+]?[0-9]+
				327	/// 0x[0-9a-fA-F]+
				328	/// 0b[01]+
				329	int TGLexer::LexNumber() {
				330	const char *NumStart = CurPtr-1;
				331
				332	if (CurPtr[-1] == '0') {
				333	if (CurPtr[0] == 'x') {
				334	++CurPtr;
				335	NumStart = CurPtr;
				336	while (isxdigit(CurPtr[0]))
				337	++CurPtr;
				338
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	339	// Requires at least one hex digit.
				340	if (CurPtr == NumStart)
				341	return ReturnError(CurPtr-2, "Invalid hexadecimal number");
				342
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	343	Filelval.IntVal = strtoll(NumStart, 0, 16);
				344	return INTVAL;
				345	} else if (CurPtr[0] == 'b') {
				346	++CurPtr;
				347	NumStart = CurPtr;
				348	while (CurPtr[0] == '0' \|\| CurPtr[0] == '1')
				349	++CurPtr;
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	350
				351	// Requires at least one binary digit.
				352	if (CurPtr == NumStart)
				353	return ReturnError(CurPtr-2, "Invalid binary number");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	354	Filelval.IntVal = strtoll(NumStart, 0, 2);
				355	return INTVAL;
				356	}
				357	}
				358
				359	// Check for a sign without a digit.
				360	if (CurPtr[-1] == '-' \|\| CurPtr[-1] == '+') {
				361	if (!isdigit(CurPtr[0]))
				362	return CurPtr[-1];
				363	}
				364
				365	while (isdigit(CurPtr[0]))
				366	++CurPtr;
				367	Filelval.IntVal = strtoll(NumStart, 0, 10);
				368	return INTVAL;
				369	}
				370
				371	/// LexBracket - We just read '['. If this is a code block, return it,
				372	/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ \| }[^]] )* }]'
				373	int TGLexer::LexBracket() {
				374	if (CurPtr[0] != '{')
				375	return '[';
				376	++CurPtr;
				377	const char *CodeStart = CurPtr;
				378	while (1) {
				379	int Char = getNextChar();
				380	if (Char == EOF) break;
				381
				382	if (Char != '}') continue;
				383
				384	Char = getNextChar();
				385	if (Char == EOF) break;
				386	if (Char == ']') {
				387	Filelval.StrVal = new std::string(CodeStart, CurPtr-2);
				388	return CODEFRAGMENT;
				389	}
				390	}
				391
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	392	return ReturnError(CodeStart-2, "Unterminated Code Block");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	393	}
				394
				395	/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
				396	int TGLexer::LexExclaim() {
				397	if (!isalpha(*CurPtr))
				398	return '!';
				399
				400	const char *Start = CurPtr++;
				401	while (isalpha(*CurPtr))
				402	++CurPtr;
				403
				404	// Check to see which operator this is.
				405	unsigned Len = CurPtr-Start;
				406
				407	if (Len == 3 && !memcmp(Start, "con", 3)) return CONCATTOK;
				408	if (Len == 3 && !memcmp(Start, "sra", 3)) return SRATOK;
				409	if (Len == 3 && !memcmp(Start, "srl", 3)) return SRLTOK;
				410	if (Len == 3 && !memcmp(Start, "shl", 3)) return SHLTOK;
				411	if (Len == 9 && !memcmp(Start, "strconcat", 9)) return STRCONCATTOK;
				412
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	413	return ReturnError(Start-1, "Unknown operator");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	414	}
				415
				416	//===----------------------------------------------------------------------===//
				417	// Interfaces used by the Bison parser.
				418	//===----------------------------------------------------------------------===//
				419
				420	int Fileparse();
				421	static TGLexer *TheLexer;
				422
				423	namespace llvm {
				424
				425	std::ostream &err() {
				426	return TheLexer->err();
				427	}
				428
				429	/// ParseFile - this function begins the parsing of the specified tablegen
				430	/// file.
				431	///
				432	void ParseFile(const std::string &Filename,
				433	const std::vector<std::string> &IncludeDirs) {
				434	std::string ErrorStr;
				435	MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(&Filename[0], Filename.size(),
				436	&ErrorStr);
				437	if (F == 0) {
				438	cerr << "Could not open input file '" + Filename + "': " << ErrorStr <<"\n";
				439	exit(1);
				440	}
				441
				442	assert(!TheLexer && "Lexer isn't reentrant yet!");
				443	TheLexer = new TGLexer(F);
				444
				445	// Record the location of the include directory so that the lexer can find
				446	// it later.
				447	TheLexer->setIncludeDirs(IncludeDirs);
				448
				449	Fileparse();
				450
				451	// Cleanup
				452	delete TheLexer;
				453	TheLexer = 0;
				454	}
				455	} // End llvm namespace
				456
				457
				458	int Filelex() {
				459	assert(TheLexer && "No lexer setup yet!");
				460	int Tok = TheLexer->LexToken();
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame^]	461	if (Tok == YYERROR)
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	462	exit(1);
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	463	return Tok;
				464	}