Blame - lib/Parser/Lexer.cpp - platform/external/tensorflow

blob: 17755e0291f917e7e02455bf7a9fcf70a7dccf9b [file] [log] [blame]

Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	1	//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
				2	//
				3	// Copyright 2019 The MLIR Authors.
				4	//
				5	// Licensed under the Apache License, Version 2.0 (the "License");
				6	// you may not use this file except in compliance with the License.
				7	// You may obtain a copy of the License at
				8	//
				9	// http://www.apache.org/licenses/LICENSE-2.0
				10	//
				11	// Unless required by applicable law or agreed to in writing, software
				12	// distributed under the License is distributed on an "AS IS" BASIS,
				13	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	// See the License for the specific language governing permissions and
				15	// limitations under the License.
				16	// =============================================================================
				17	//
				18	// This file implements the lexer for the MLIR textual form.
				19	//
				20	//===----------------------------------------------------------------------===//
				21
				22	#include "Lexer.h"
				23	#include "llvm/Support/SourceMgr.h"
				24	using namespace mlir;
				25	using llvm::SMLoc;
				26	using llvm::SourceMgr;
				27
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	28	// Returns true if 'c' is an allowable puncuation character: [$._-]
				29	// Returns false otherwise.
				30	static bool isPunct(char c) {
				31	return c == '$' \|\| c == '.' \|\| c == '_' \|\| c == '-';
				32	}
				33
Jacques Pienaar	9c411be	2018-06-24 19:17:35 -0700	[diff] [blame]	34	Lexer::Lexer(llvm::SourceMgr &sourceMgr,
				35	const SMDiagnosticHandlerTy &errorReporter)
				36	: sourceMgr(sourceMgr), errorReporter(errorReporter) {
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	37	auto bufferID = sourceMgr.getMainFileID();
				38	curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
				39	curPtr = curBuffer.begin();
				40	}
				41
				42	/// emitError - Emit an error message and return an Token::error token.
				43	Token Lexer::emitError(const char *loc, const Twine &message) {
Jacques Pienaar	9c411be	2018-06-24 19:17:35 -0700	[diff] [blame]	44	errorReporter(sourceMgr.GetMessage(SMLoc::getFromPointer(loc),
				45	SourceMgr::DK_Error, message));
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	46	return formToken(Token::error, loc);
				47	}
				48
				49	Token Lexer::lexToken() {
				50	const char *tokStart = curPtr;
				51
				52	switch (*curPtr++) {
				53	default:
				54	// Handle bare identifiers.
				55	if (isalpha(curPtr[-1]))
				56	return lexBareIdentifierOrKeyword(tokStart);
				57
				58	// Unknown character, emit an error.
				59	return emitError(tokStart, "unexpected character");
				60
				61	case 0:
				62	// This may either be a nul character in the source file or may be the EOF
				63	// marker that llvm::MemoryBuffer guarantees will be there.
				64	if (curPtr-1 == curBuffer.end())
				65	return formToken(Token::eof, tokStart);
				66
				67	LLVM_FALLTHROUGH;
				68	case ' ':
				69	case '\t':
				70	case '\n':
				71	case '\r':
				72	// Ignore whitespace.
				73	return lexToken();
				74
Chris Lattner	4c95a50	2018-06-23 16:03:42 -0700	[diff] [blame]	75	case ':': return formToken(Token::colon, tokStart);
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	76	case ',': return formToken(Token::comma, tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	77	case '(': return formToken(Token::l_paren, tokStart);
				78	case ')': return formToken(Token::r_paren, tokStart);
Chris Lattner	4c95a50	2018-06-23 16:03:42 -0700	[diff] [blame]	79	case '{': return formToken(Token::l_brace, tokStart);
				80	case '}': return formToken(Token::r_brace, tokStart);
Uday Bondhugula	faf37dd	2018-06-29 18:09:29 -0700	[diff] [blame^]	81	case '[': return formToken(Token::l_bracket, tokStart);
				82	case ']': return formToken(Token::r_bracket, tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	83	case '<': return formToken(Token::less, tokStart);
				84	case '>': return formToken(Token::greater, tokStart);
Uday Bondhugula	faf37dd	2018-06-29 18:09:29 -0700	[diff] [blame^]	85	case '=': return formToken(Token::equal, tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	86
Uday Bondhugula	faf37dd	2018-06-29 18:09:29 -0700	[diff] [blame^]	87	case '+': return formToken(Token::plus, tokStart);
				88	case '*': return formToken(Token::star, tokStart);
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	89	case '-':
				90	if (*curPtr == '>') {
				91	++curPtr;
				92	return formToken(Token::arrow, tokStart);
				93	}
				94	return emitError(tokStart, "unexpected character");
				95
				96	case '?':
				97	if (*curPtr == '?') {
				98	++curPtr;
				99	return formToken(Token::questionquestion, tokStart);
				100	}
				101
				102	return formToken(Token::question, tokStart);
				103
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	104	case ';': return lexComment();
				105	case '@': return lexAtIdentifier(tokStart);
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	106	case '#': return lexAffineMapId(tokStart);
Chris Lattner	ed65a73	2018-06-28 20:45:33 -0700	[diff] [blame]	107	case '"': return lexString(tokStart);
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	108
				109	case '0': case '1': case '2': case '3': case '4':
				110	case '5': case '6': case '7': case '8': case '9':
				111	return lexNumber(tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	112	}
				113	}
				114
				115	/// Lex a comment line, starting with a semicolon.
				116	///
				117	/// TODO: add a regex for comments here and to the spec.
				118	///
				119	Token Lexer::lexComment() {
				120	while (true) {
				121	switch (*curPtr++) {
				122	case '\n':
				123	case '\r':
				124	// Newline is end of comment.
				125	return lexToken();
				126	case 0:
				127	// If this is the end of the buffer, end the comment.
				128	if (curPtr-1 == curBuffer.end()) {
				129	--curPtr;
				130	return lexToken();
				131	}
				132	LLVM_FALLTHROUGH;
				133	default:
				134	// Skip over other characters.
				135	break;
				136	}
				137	}
				138	}
				139
				140	/// Lex a bare identifier or keyword that starts with a letter.
				141	///
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	142	/// bare-id ::= letter (letter\|digit\|[_])*
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	143	///
				144	Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	145	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				146	while (isalpha(curPtr) \|\| isdigit(curPtr) \|\| *curPtr == '_')
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	147	++curPtr;
				148
				149	// Check to see if this identifier is a keyword.
				150	StringRef spelling(tokStart, curPtr-tokStart);
				151
Chris Lattner	8da0c28	2018-06-29 11:15:56 -0700	[diff] [blame]	152	Token::Kind kind = llvm::StringSwitch<Token::Kind>(spelling)
				153	#define TOK_KEYWORD(SPELLING) \
				154	.Case(#SPELLING, Token::kw_##SPELLING)
				155	#include "TokenKinds.def"
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	156	.Default(Token::bare_identifier);
				157
				158	return Token(kind, spelling);
				159	}
				160
				161	/// Lex an '@foo' identifier.
				162	///
				163	/// function-id ::= `@` bare-id
				164	///
				165	Token Lexer::lexAtIdentifier(const char *tokStart) {
				166	// These always start with a letter.
				167	if (!isalpha(*curPtr++))
				168	return emitError(curPtr-1, "expected letter in @ identifier");
				169
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	170	while (isalpha(curPtr) \|\| isdigit(curPtr) \|\| *curPtr == '_')
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	171	++curPtr;
				172	return formToken(Token::at_identifier, tokStart);
				173	}
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	174
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	175	/// Lex an '#foo' identifier.
				176	///
				177	/// affine-map-id ::= `#` suffix-id
				178	/// suffix-id ::= digit+ \| (letter\|id-punct) (letter\|id-punct\|digit)*
				179	///
				180	// TODO(andydavis) Consider moving suffix-id parsing to a shared function
				181	// so it can be re-used to parse %suffix-id.
				182	Token Lexer::lexAffineMapId(const char *tokStart) {
				183	// Parse suffix-id.
				184	if (isdigit(*curPtr)) {
				185	// If suffix-id starts with a digit, the rest must be digits.
				186	while (isdigit(*curPtr)) {
				187	++curPtr;
				188	}
				189	} else if (isalpha(curPtr) \|\| isPunct(curPtr)) {
				190	do {
				191	++curPtr;
				192	} while (isalpha(curPtr) \|\| isdigit(curPtr) \|\| isPunct(*curPtr));
				193	} else {
				194	return emitError(curPtr-1, "invalid affine map id");
				195	}
Chris Lattner	8da0c28	2018-06-29 11:15:56 -0700	[diff] [blame]	196	return formToken(Token::affine_map_identifier, tokStart);
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	197	}
				198
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	199	/// Lex an integer literal.
				200	///
				201	/// integer-literal ::= digit+ \| `0x` hex_digit+
				202	///
				203	Token Lexer::lexNumber(const char *tokStart) {
				204	assert(isdigit(curPtr[-1]));
				205
				206	// Handle the hexadecimal case.
				207	if (curPtr[-1] == '0' && *curPtr == 'x') {
				208	++curPtr;
				209
				210	if (!isxdigit(*curPtr))
				211	return emitError(curPtr, "expected hexadecimal digit");
				212
				213	while (isxdigit(*curPtr))
				214	++curPtr;
				215
				216	return formToken(Token::integer, tokStart);
				217	}
				218
				219	// Handle the normal decimal case.
				220	while (isdigit(*curPtr))
				221	++curPtr;
				222
				223	return formToken(Token::integer, tokStart);
				224	}
Chris Lattner	ed65a73	2018-06-28 20:45:33 -0700	[diff] [blame]	225
				226	/// Lex a string literal.
				227	///
				228	/// string-literal ::= '"' [^"\n\f\v\r]* '"'
				229	///
				230	/// TODO: define escaping rules.
				231	Token Lexer::lexString(const char *tokStart) {
				232	assert(curPtr[-1] == '"');
				233
				234	while (1) {
				235	switch (*curPtr++) {
				236	case '"':
				237	return formToken(Token::string, tokStart);
				238	case '0':
				239	// If this is a random nul character in the middle of a string, just
				240	// include it. If it is the end of file, then it is an error.
				241	if (curPtr-1 != curBuffer.end())
				242	continue;
				243	LLVM_FALLTHROUGH;
				244	case '\n':
				245	case '\v':
				246	case '\f':
				247	return emitError(curPtr-1, "expected '\"' in string literal");
				248
				249	default:
				250	continue;
				251	}
				252	}
				253	}
Uday Bondhugula	faf37dd	2018-06-29 18:09:29 -0700	[diff] [blame^]	254