Blame - lib/Parser/Lexer.cpp - platform/external/tensorflow

blob: b6473f523ebd6e066b6b9ae46474da01d28d870a [file] [log] [blame]

Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	1	//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
				2	//
				3	// Copyright 2019 The MLIR Authors.
				4	//
				5	// Licensed under the Apache License, Version 2.0 (the "License");
				6	// you may not use this file except in compliance with the License.
				7	// You may obtain a copy of the License at
				8	//
				9	// http://www.apache.org/licenses/LICENSE-2.0
				10	//
				11	// Unless required by applicable law or agreed to in writing, software
				12	// distributed under the License is distributed on an "AS IS" BASIS,
				13	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	// See the License for the specific language governing permissions and
				15	// limitations under the License.
				16	// =============================================================================
				17	//
				18	// This file implements the lexer for the MLIR textual form.
				19	//
				20	//===----------------------------------------------------------------------===//
				21
				22	#include "Lexer.h"
				23	#include "llvm/Support/SourceMgr.h"
				24	using namespace mlir;
				25	using llvm::SMLoc;
				26	using llvm::SourceMgr;
				27
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	28	// Returns true if 'c' is an allowable puncuation character: [$._-]
				29	// Returns false otherwise.
				30	static bool isPunct(char c) {
				31	return c == '$' \|\| c == '.' \|\| c == '_' \|\| c == '-';
				32	}
				33
Jacques Pienaar	9c411be	2018-06-24 19:17:35 -0700	[diff] [blame]	34	Lexer::Lexer(llvm::SourceMgr &sourceMgr,
				35	const SMDiagnosticHandlerTy &errorReporter)
				36	: sourceMgr(sourceMgr), errorReporter(errorReporter) {
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	37	auto bufferID = sourceMgr.getMainFileID();
				38	curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
				39	curPtr = curBuffer.begin();
				40	}
				41
				42	/// emitError - Emit an error message and return an Token::error token.
				43	Token Lexer::emitError(const char *loc, const Twine &message) {
Jacques Pienaar	9c411be	2018-06-24 19:17:35 -0700	[diff] [blame]	44	errorReporter(sourceMgr.GetMessage(SMLoc::getFromPointer(loc),
				45	SourceMgr::DK_Error, message));
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	46	return formToken(Token::error, loc);
				47	}
				48
				49	Token Lexer::lexToken() {
				50	const char *tokStart = curPtr;
				51
				52	switch (*curPtr++) {
				53	default:
				54	// Handle bare identifiers.
				55	if (isalpha(curPtr[-1]))
				56	return lexBareIdentifierOrKeyword(tokStart);
				57
				58	// Unknown character, emit an error.
				59	return emitError(tokStart, "unexpected character");
				60
				61	case 0:
				62	// This may either be a nul character in the source file or may be the EOF
				63	// marker that llvm::MemoryBuffer guarantees will be there.
				64	if (curPtr-1 == curBuffer.end())
				65	return formToken(Token::eof, tokStart);
				66
				67	LLVM_FALLTHROUGH;
				68	case ' ':
				69	case '\t':
				70	case '\n':
				71	case '\r':
				72	// Ignore whitespace.
				73	return lexToken();
				74
Chris Lattner	4c95a50	2018-06-23 16:03:42 -0700	[diff] [blame]	75	case ':': return formToken(Token::colon, tokStart);
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	76	case ',': return formToken(Token::comma, tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	77	case '(': return formToken(Token::l_paren, tokStart);
				78	case ')': return formToken(Token::r_paren, tokStart);
Chris Lattner	4c95a50	2018-06-23 16:03:42 -0700	[diff] [blame]	79	case '{': return formToken(Token::l_brace, tokStart);
				80	case '}': return formToken(Token::r_brace, tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	81	case '<': return formToken(Token::less, tokStart);
				82	case '>': return formToken(Token::greater, tokStart);
				83
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	84	case '-':
				85	if (*curPtr == '>') {
				86	++curPtr;
				87	return formToken(Token::arrow, tokStart);
				88	}
				89	return emitError(tokStart, "unexpected character");
				90
				91	case '?':
				92	if (*curPtr == '?') {
				93	++curPtr;
				94	return formToken(Token::questionquestion, tokStart);
				95	}
				96
				97	return formToken(Token::question, tokStart);
				98
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	99	case ';': return lexComment();
				100	case '@': return lexAtIdentifier(tokStart);
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	101	case '#': return lexAffineMapId(tokStart);
Chris Lattner	ed65a73	2018-06-28 20:45:33 -0700	[diff] [blame^]	102	case '"': return lexString(tokStart);
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	103
				104	case '0': case '1': case '2': case '3': case '4':
				105	case '5': case '6': case '7': case '8': case '9':
				106	return lexNumber(tokStart);
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	107	}
				108	}
				109
				110	/// Lex a comment line, starting with a semicolon.
				111	///
				112	/// TODO: add a regex for comments here and to the spec.
				113	///
				114	Token Lexer::lexComment() {
				115	while (true) {
				116	switch (*curPtr++) {
				117	case '\n':
				118	case '\r':
				119	// Newline is end of comment.
				120	return lexToken();
				121	case 0:
				122	// If this is the end of the buffer, end the comment.
				123	if (curPtr-1 == curBuffer.end()) {
				124	--curPtr;
				125	return lexToken();
				126	}
				127	LLVM_FALLTHROUGH;
				128	default:
				129	// Skip over other characters.
				130	break;
				131	}
				132	}
				133	}
				134
				135	/// Lex a bare identifier or keyword that starts with a letter.
				136	///
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	137	/// bare-id ::= letter (letter\|digit\|[_])*
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	138	///
				139	Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	140	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				141	while (isalpha(curPtr) \|\| isdigit(curPtr) \|\| *curPtr == '_')
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	142	++curPtr;
				143
				144	// Check to see if this identifier is a keyword.
				145	StringRef spelling(tokStart, curPtr-tokStart);
				146
				147	Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	148	.Case("bf16", Token::kw_bf16)
				149	.Case("br", Token::kw_br)
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	150	.Case("cfgfunc", Token::kw_cfgfunc)
				151	.Case("extfunc", Token::kw_extfunc)
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	152	.Case("f16", Token::kw_f16)
				153	.Case("f32", Token::kw_f32)
				154	.Case("f64", Token::kw_f64)
				155	.Case("i1", Token::kw_i1)
				156	.Case("i16", Token::kw_i16)
				157	.Case("i32", Token::kw_i32)
				158	.Case("i64", Token::kw_i64)
				159	.Case("i8", Token::kw_i8)
				160	.Case("int", Token::kw_int)
				161	.Case("memref", Token::kw_memref)
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	162	.Case("mlfunc", Token::kw_mlfunc)
Chris Lattner	4c95a50	2018-06-23 16:03:42 -0700	[diff] [blame]	163	.Case("return", Token::kw_return)
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	164	.Case("tensor", Token::kw_tensor)
				165	.Case("vector", Token::kw_vector)
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	166	.Default(Token::bare_identifier);
				167
				168	return Token(kind, spelling);
				169	}
				170
				171	/// Lex an '@foo' identifier.
				172	///
				173	/// function-id ::= `@` bare-id
				174	///
				175	Token Lexer::lexAtIdentifier(const char *tokStart) {
				176	// These always start with a letter.
				177	if (!isalpha(*curPtr++))
				178	return emitError(curPtr-1, "expected letter in @ identifier");
				179
Chris Lattner	f6d80a0	2018-06-24 11:18:29 -0700	[diff] [blame]	180	while (isalpha(curPtr) \|\| isdigit(curPtr) \|\| *curPtr == '_')
Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame]	181	++curPtr;
				182	return formToken(Token::at_identifier, tokStart);
				183	}
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	184
MLIR Team	f85a626	2018-06-27 11:03:08 -0700	[diff] [blame]	185	/// Lex an '#foo' identifier.
				186	///
				187	/// affine-map-id ::= `#` suffix-id
				188	/// suffix-id ::= digit+ \| (letter\|id-punct) (letter\|id-punct\|digit)*
				189	///
				190	// TODO(andydavis) Consider moving suffix-id parsing to a shared function
				191	// so it can be re-used to parse %suffix-id.
				192	Token Lexer::lexAffineMapId(const char *tokStart) {
				193	// Parse suffix-id.
				194	if (isdigit(*curPtr)) {
				195	// If suffix-id starts with a digit, the rest must be digits.
				196	while (isdigit(*curPtr)) {
				197	++curPtr;
				198	}
				199	} else if (isalpha(curPtr) \|\| isPunct(curPtr)) {
				200	do {
				201	++curPtr;
				202	} while (isalpha(curPtr) \|\| isdigit(curPtr) \|\| isPunct(*curPtr));
				203	} else {
				204	return emitError(curPtr-1, "invalid affine map id");
				205	}
				206	return formToken(Token::affine_map_id, tokStart);
				207	}
				208
Chris Lattner	bb8fafc	2018-06-22 15:52:02 -0700	[diff] [blame]	209	/// Lex an integer literal.
				210	///
				211	/// integer-literal ::= digit+ \| `0x` hex_digit+
				212	///
				213	Token Lexer::lexNumber(const char *tokStart) {
				214	assert(isdigit(curPtr[-1]));
				215
				216	// Handle the hexadecimal case.
				217	if (curPtr[-1] == '0' && *curPtr == 'x') {
				218	++curPtr;
				219
				220	if (!isxdigit(*curPtr))
				221	return emitError(curPtr, "expected hexadecimal digit");
				222
				223	while (isxdigit(*curPtr))
				224	++curPtr;
				225
				226	return formToken(Token::integer, tokStart);
				227	}
				228
				229	// Handle the normal decimal case.
				230	while (isdigit(*curPtr))
				231	++curPtr;
				232
				233	return formToken(Token::integer, tokStart);
				234	}
Chris Lattner	ed65a73	2018-06-28 20:45:33 -0700	[diff] [blame^]	235
				236	/// Lex a string literal.
				237	///
				238	/// string-literal ::= '"' [^"\n\f\v\r]* '"'
				239	///
				240	/// TODO: define escaping rules.
				241	Token Lexer::lexString(const char *tokStart) {
				242	assert(curPtr[-1] == '"');
				243
				244	while (1) {
				245	switch (*curPtr++) {
				246	case '"':
				247	return formToken(Token::string, tokStart);
				248	case '0':
				249	// If this is a random nul character in the middle of a string, just
				250	// include it. If it is the end of file, then it is an error.
				251	if (curPtr-1 != curBuffer.end())
				252	continue;
				253	LLVM_FALLTHROUGH;
				254	case '\n':
				255	case '\v':
				256	case '\f':
				257	return emitError(curPtr-1, "expected '\"' in string literal");
				258
				259	default:
				260	continue;
				261	}
				262	}
				263	}