Blame - src/LLVM/utils/TableGen/TGLexer.cpp - platform/external/swiftshader

blob: 2c7becc71824d3c4d58824296b224b393f7f8b80 [file] [log] [blame]

John Bauman	8940182	2014-05-06 15:04:28 -0400	[diff] [blame]	1	//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for TableGen.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "TGLexer.h"
				15	#include "llvm/Support/SourceMgr.h"
				16	#include "llvm/Support/MemoryBuffer.h"
				17	#include "llvm/Config/config.h"
				18	#include <cctype>
				19	#include <cstdio>
				20	#include <cstdlib>
				21	#include <cstring>
				22	#include <cerrno>
				23	using namespace llvm;
				24
				25	TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
				26	CurBuffer = 0;
				27	CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
				28	CurPtr = CurBuf->getBufferStart();
				29	TokStart = 0;
				30	}
				31
				32	SMLoc TGLexer::getLoc() const {
				33	return SMLoc::getFromPointer(TokStart);
				34	}
				35
				36
				37	/// ReturnError - Set the error to the specified string at the specified
				38	/// location. This is defined to always return tgtok::Error.
				39	tgtok::TokKind TGLexer::ReturnError(const char *Loc, const std::string &Msg) {
				40	PrintError(Loc, Msg);
				41	return tgtok::Error;
				42	}
				43
				44
				45	void TGLexer::PrintError(const char *Loc, const std::string &Msg) const {
				46	SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
				47	}
				48
				49	void TGLexer::PrintError(SMLoc Loc, const std::string &Msg) const {
				50	SrcMgr.PrintMessage(Loc, Msg, "error");
				51	}
				52
				53
				54	int TGLexer::getNextChar() {
				55	char CurChar = *CurPtr++;
				56	switch (CurChar) {
				57	default:
				58	return (unsigned char)CurChar;
				59	case 0: {
				60	// A nul character in the stream is either the end of the current buffer or
				61	// a random nul in the file. Disambiguate that here.
				62	if (CurPtr-1 != CurBuf->getBufferEnd())
				63	return 0; // Just whitespace.
				64
				65	// If this is the end of an included file, pop the parent file off the
				66	// include stack.
				67	SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
				68	if (ParentIncludeLoc != SMLoc()) {
				69	CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
				70	CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
				71	CurPtr = ParentIncludeLoc.getPointer();
				72	return getNextChar();
				73	}
				74
				75	// Otherwise, return end of file.
				76	--CurPtr; // Another call to lex will return EOF again.
				77	return EOF;
				78	}
				79	case '\n':
				80	case '\r':
				81	// Handle the newline character by ignoring it and incrementing the line
				82	// count. However, be careful about 'dos style' files with \n\r in them.
				83	// Only treat a \n\r or \r\n as a single line.
				84	if ((CurPtr == '\n' \|\| (CurPtr == '\r')) &&
				85	*CurPtr != CurChar)
				86	++CurPtr; // Eat the two char newline sequence.
				87	return '\n';
				88	}
				89	}
				90
				91	tgtok::TokKind TGLexer::LexToken() {
				92	TokStart = CurPtr;
				93	// This always consumes at least one character.
				94	int CurChar = getNextChar();
				95
				96	switch (CurChar) {
				97	default:
				98	// Handle letters: [a-zA-Z_]
				99	if (isalpha(CurChar) \|\| CurChar == '_' \|\| CurChar == '#')
				100	return LexIdentifier();
				101
				102	// Unknown character, emit an error.
				103	return ReturnError(TokStart, "Unexpected character");
				104	case EOF: return tgtok::Eof;
				105	case ':': return tgtok::colon;
				106	case ';': return tgtok::semi;
				107	case '.': return tgtok::period;
				108	case ',': return tgtok::comma;
				109	case '<': return tgtok::less;
				110	case '>': return tgtok::greater;
				111	case ']': return tgtok::r_square;
				112	case '{': return tgtok::l_brace;
				113	case '}': return tgtok::r_brace;
				114	case '(': return tgtok::l_paren;
				115	case ')': return tgtok::r_paren;
				116	case '=': return tgtok::equal;
				117	case '?': return tgtok::question;
				118
				119	case 0:
				120	case ' ':
				121	case '\t':
				122	case '\n':
				123	case '\r':
				124	// Ignore whitespace.
				125	return LexToken();
				126	case '/':
				127	// If this is the start of a // comment, skip until the end of the line or
				128	// the end of the buffer.
				129	if (*CurPtr == '/')
				130	SkipBCPLComment();
				131	else if (CurPtr == '') {
				132	if (SkipCComment())
				133	return tgtok::Error;
				134	} else // Otherwise, this is an error.
				135	return ReturnError(TokStart, "Unexpected character");
				136	return LexToken();
				137	case '-': case '+':
				138	case '0': case '1': case '2': case '3': case '4': case '5': case '6':
				139	case '7': case '8': case '9':
				140	return LexNumber();
				141	case '"': return LexString();
				142	case '$': return LexVarName();
				143	case '[': return LexBracket();
				144	case '!': return LexExclaim();
				145	}
				146	}
				147
				148	/// LexString - Lex "[^"]*"
				149	tgtok::TokKind TGLexer::LexString() {
				150	const char *StrStart = CurPtr;
				151
				152	CurStrVal = "";
				153
				154	while (*CurPtr != '"') {
				155	// If we hit the end of the buffer, report an error.
				156	if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
				157	return ReturnError(StrStart, "End of file in string literal");
				158
				159	if (CurPtr == '\n' \|\| CurPtr == '\r')
				160	return ReturnError(StrStart, "End of line in string literal");
				161
				162	if (*CurPtr != '\\') {
				163	CurStrVal += *CurPtr++;
				164	continue;
				165	}
				166
				167	++CurPtr;
				168
				169	switch (*CurPtr) {
				170	case '\\': case '\'': case '"':
				171	// These turn into their literal character.
				172	CurStrVal += *CurPtr++;
				173	break;
				174	case 't':
				175	CurStrVal += '\t';
				176	++CurPtr;
				177	break;
				178	case 'n':
				179	CurStrVal += '\n';
				180	++CurPtr;
				181	break;
				182
				183	case '\n':
				184	case '\r':
				185	return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
				186
				187	// If we hit the end of the buffer, report an error.
				188	case '\0':
				189	if (CurPtr == CurBuf->getBufferEnd())
				190	return ReturnError(StrStart, "End of file in string literal");
				191	// FALL THROUGH
				192	default:
				193	return ReturnError(CurPtr, "invalid escape in string literal");
				194	}
				195	}
				196
				197	++CurPtr;
				198	return tgtok::StrVal;
				199	}
				200
				201	tgtok::TokKind TGLexer::LexVarName() {
				202	if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
				203	return ReturnError(TokStart, "Invalid variable name");
				204
				205	// Otherwise, we're ok, consume the rest of the characters.
				206	const char *VarNameStart = CurPtr++;
				207
				208	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				209	++CurPtr;
				210
				211	CurStrVal.assign(VarNameStart, CurPtr);
				212	return tgtok::VarName;
				213	}
				214
				215
				216	tgtok::TokKind TGLexer::LexIdentifier() {
				217	// The first letter is [a-zA-Z_].
				218	const char *IdentStart = TokStart;
				219
				220	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				221	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_'
				222	\|\| *CurPtr == '#') {
				223	// If this contains a '#', make sure it's value
				224	if (*CurPtr == '#') {
				225	if (strncmp(CurPtr, "#NAME#", 6) != 0) {
				226	return tgtok::Error;
				227	}
				228	CurPtr += 6;
				229	}
				230	else {
				231	++CurPtr;
				232	}
				233	}
				234
				235
				236	// Check to see if this identifier is a keyword.
				237	unsigned Len = CurPtr-IdentStart;
				238
				239	if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int;
				240	if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit;
				241	if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits;
				242	if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String;
				243	if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List;
				244	if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code;
				245	if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag;
				246
				247	if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
				248	if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
				249	if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
				250	if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
				251	return tgtok::MultiClass;
				252	if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field;
				253	if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let;
				254	if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In;
				255
				256	if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
				257	if (LexInclude()) return tgtok::Error;
				258	return Lex();
				259	}
				260
				261	CurStrVal.assign(IdentStart, CurPtr);
				262	return tgtok::Id;
				263	}
				264
				265	/// LexInclude - We just read the "include" token. Get the string token that
				266	/// comes next and enter the include.
				267	bool TGLexer::LexInclude() {
				268	// The token after the include must be a string.
				269	tgtok::TokKind Tok = LexToken();
				270	if (Tok == tgtok::Error) return true;
				271	if (Tok != tgtok::StrVal) {
				272	PrintError(getLoc(), "Expected filename after include");
				273	return true;
				274	}
				275
				276	// Get the string.
				277	std::string Filename = CurStrVal;
				278
				279
				280	CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
				281	if (CurBuffer == -1) {
				282	PrintError(getLoc(), "Could not find include file '" + Filename + "'");
				283	return true;
				284	}
				285
				286	// Save the line number and lex buffer of the includer.
				287	CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
				288	CurPtr = CurBuf->getBufferStart();
				289	return false;
				290	}
				291
				292	void TGLexer::SkipBCPLComment() {
				293	++CurPtr; // skip the second slash.
				294	while (1) {
				295	switch (*CurPtr) {
				296	case '\n':
				297	case '\r':
				298	return; // Newline is end of comment.
				299	case 0:
				300	// If this is the end of the buffer, end the comment.
				301	if (CurPtr == CurBuf->getBufferEnd())
				302	return;
				303	break;
				304	}
				305	// Otherwise, skip the character.
				306	++CurPtr;
				307	}
				308	}
				309
				310	/// SkipCComment - This skips C-style /**/ comments. The only difference from C
				311	/// is that we allow nesting.
				312	bool TGLexer::SkipCComment() {
				313	++CurPtr; // skip the star.
				314	unsigned CommentDepth = 1;
				315
				316	while (1) {
				317	int CurChar = getNextChar();
				318	switch (CurChar) {
				319	case EOF:
				320	PrintError(TokStart, "Unterminated comment!");
				321	return true;
				322	case '*':
				323	// End of the comment?
				324	if (CurPtr[0] != '/') break;
				325
				326	++CurPtr; // End the */.
				327	if (--CommentDepth == 0)
				328	return false;
				329	break;
				330	case '/':
				331	// Start of a nested comment?
				332	if (CurPtr[0] != '*') break;
				333	++CurPtr;
				334	++CommentDepth;
				335	break;
				336	}
				337	}
				338	}
				339
				340	/// LexNumber - Lex:
				341	/// [-+]?[0-9]+
				342	/// 0x[0-9a-fA-F]+
				343	/// 0b[01]+
				344	tgtok::TokKind TGLexer::LexNumber() {
				345	if (CurPtr[-1] == '0') {
				346	if (CurPtr[0] == 'x') {
				347	++CurPtr;
				348	const char *NumStart = CurPtr;
				349	while (isxdigit(CurPtr[0]))
				350	++CurPtr;
				351
				352	// Requires at least one hex digit.
				353	if (CurPtr == NumStart)
				354	return ReturnError(TokStart, "Invalid hexadecimal number");
				355
				356	errno = 0;
				357	CurIntVal = strtoll(NumStart, 0, 16);
				358	if (errno == EINVAL)
				359	return ReturnError(TokStart, "Invalid hexadecimal number");
				360	if (errno == ERANGE) {
				361	errno = 0;
				362	CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
				363	if (errno == EINVAL)
				364	return ReturnError(TokStart, "Invalid hexadecimal number");
				365	if (errno == ERANGE)
				366	return ReturnError(TokStart, "Hexadecimal number out of range");
				367	}
				368	return tgtok::IntVal;
				369	} else if (CurPtr[0] == 'b') {
				370	++CurPtr;
				371	const char *NumStart = CurPtr;
				372	while (CurPtr[0] == '0' \|\| CurPtr[0] == '1')
				373	++CurPtr;
				374
				375	// Requires at least one binary digit.
				376	if (CurPtr == NumStart)
				377	return ReturnError(CurPtr-2, "Invalid binary number");
				378	CurIntVal = strtoll(NumStart, 0, 2);
				379	return tgtok::IntVal;
				380	}
				381	}
				382
				383	// Check for a sign without a digit.
				384	if (!isdigit(CurPtr[0])) {
				385	if (CurPtr[-1] == '-')
				386	return tgtok::minus;
				387	else if (CurPtr[-1] == '+')
				388	return tgtok::plus;
				389	}
				390
				391	while (isdigit(CurPtr[0]))
				392	++CurPtr;
				393	CurIntVal = strtoll(TokStart, 0, 10);
				394	return tgtok::IntVal;
				395	}
				396
				397	/// LexBracket - We just read '['. If this is a code block, return it,
				398	/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ \| }[^]] )* }]'
				399	tgtok::TokKind TGLexer::LexBracket() {
				400	if (CurPtr[0] != '{')
				401	return tgtok::l_square;
				402	++CurPtr;
				403	const char *CodeStart = CurPtr;
				404	while (1) {
				405	int Char = getNextChar();
				406	if (Char == EOF) break;
				407
				408	if (Char != '}') continue;
				409
				410	Char = getNextChar();
				411	if (Char == EOF) break;
				412	if (Char == ']') {
				413	CurStrVal.assign(CodeStart, CurPtr-2);
				414	return tgtok::CodeFragment;
				415	}
				416	}
				417
				418	return ReturnError(CodeStart-2, "Unterminated Code Block");
				419	}
				420
				421	/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
				422	tgtok::TokKind TGLexer::LexExclaim() {
				423	if (!isalpha(*CurPtr))
				424	return ReturnError(CurPtr-1, "Invalid \"!operator\"");
				425
				426	const char *Start = CurPtr++;
				427	while (isalpha(*CurPtr))
				428	++CurPtr;
				429
				430	// Check to see which operator this is.
				431	unsigned Len = CurPtr-Start;
				432
				433	if (Len == 3 && !memcmp(Start, "con", 3)) return tgtok::XConcat;
				434	if (Len == 3 && !memcmp(Start, "sra", 3)) return tgtok::XSRA;
				435	if (Len == 3 && !memcmp(Start, "srl", 3)) return tgtok::XSRL;
				436	if (Len == 3 && !memcmp(Start, "shl", 3)) return tgtok::XSHL;
				437	if (Len == 2 && !memcmp(Start, "eq", 2)) return tgtok::XEq;
				438	if (Len == 9 && !memcmp(Start, "strconcat", 9)) return tgtok::XStrConcat;
				439	if (Len == 10 && !memcmp(Start, "nameconcat", 10)) return tgtok::XNameConcat;
				440	if (Len == 5 && !memcmp(Start, "subst", 5)) return tgtok::XSubst;
				441	if (Len == 7 && !memcmp(Start, "foreach", 7)) return tgtok::XForEach;
				442	if (Len == 4 && !memcmp(Start, "cast", 4)) return tgtok::XCast;
				443	if (Len == 3 && !memcmp(Start, "car", 3)) return tgtok::XCar;
				444	if (Len == 3 && !memcmp(Start, "cdr", 3)) return tgtok::XCdr;
				445	if (Len == 4 && !memcmp(Start, "null", 4)) return tgtok::XNull;
				446	if (Len == 2 && !memcmp(Start, "if", 2)) return tgtok::XIf;
				447
				448	return ReturnError(Start-1, "Unknown operator");
				449	}
				450