Blame - lib/Parser/Lexer.cpp - platform/external/tensorflow

blob: 5958658b797a575aa21417195c7eedcbf25c8dbf [file] [log] [blame]

Chris Lattner	e79379a	2018-06-22 10:39:19 -0700	[diff] [blame^]	1	//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
				2	//
				3	// Copyright 2019 The MLIR Authors.
				4	//
				5	// Licensed under the Apache License, Version 2.0 (the "License");
				6	// you may not use this file except in compliance with the License.
				7	// You may obtain a copy of the License at
				8	//
				9	// http://www.apache.org/licenses/LICENSE-2.0
				10	//
				11	// Unless required by applicable law or agreed to in writing, software
				12	// distributed under the License is distributed on an "AS IS" BASIS,
				13	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	// See the License for the specific language governing permissions and
				15	// limitations under the License.
				16	// =============================================================================
				17	//
				18	// This file implements the lexer for the MLIR textual form.
				19	//
				20	//===----------------------------------------------------------------------===//
				21
				22	#include "Lexer.h"
				23	#include "llvm/Support/SourceMgr.h"
				24	using namespace mlir;
				25	using llvm::SMLoc;
				26	using llvm::SourceMgr;
				27
				28	Lexer::Lexer(llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) {
				29	auto bufferID = sourceMgr.getMainFileID();
				30	curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
				31	curPtr = curBuffer.begin();
				32	}
				33
				34	/// emitError - Emit an error message and return an Token::error token.
				35	Token Lexer::emitError(const char *loc, const Twine &message) {
				36	// TODO(clattner): If/when we want to implement a -verify mode, this will need
				37	// to package up errors into SMDiagnostic and report them.
				38	sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), SourceMgr::DK_Error,
				39	message);
				40	return formToken(Token::error, loc);
				41	}
				42
				43	Token Lexer::lexToken() {
				44	const char *tokStart = curPtr;
				45
				46	switch (*curPtr++) {
				47	default:
				48	// Handle bare identifiers.
				49	if (isalpha(curPtr[-1]))
				50	return lexBareIdentifierOrKeyword(tokStart);
				51
				52	// Unknown character, emit an error.
				53	return emitError(tokStart, "unexpected character");
				54
				55	case 0:
				56	// This may either be a nul character in the source file or may be the EOF
				57	// marker that llvm::MemoryBuffer guarantees will be there.
				58	if (curPtr-1 == curBuffer.end())
				59	return formToken(Token::eof, tokStart);
				60
				61	LLVM_FALLTHROUGH;
				62	case ' ':
				63	case '\t':
				64	case '\n':
				65	case '\r':
				66	// Ignore whitespace.
				67	return lexToken();
				68
				69	case '(': return formToken(Token::l_paren, tokStart);
				70	case ')': return formToken(Token::r_paren, tokStart);
				71	case '<': return formToken(Token::less, tokStart);
				72	case '>': return formToken(Token::greater, tokStart);
				73
				74	case ';': return lexComment();
				75	case '@': return lexAtIdentifier(tokStart);
				76	}
				77	}
				78
				79	/// Lex a comment line, starting with a semicolon.
				80	///
				81	/// TODO: add a regex for comments here and to the spec.
				82	///
				83	Token Lexer::lexComment() {
				84	while (true) {
				85	switch (*curPtr++) {
				86	case '\n':
				87	case '\r':
				88	// Newline is end of comment.
				89	return lexToken();
				90	case 0:
				91	// If this is the end of the buffer, end the comment.
				92	if (curPtr-1 == curBuffer.end()) {
				93	--curPtr;
				94	return lexToken();
				95	}
				96	LLVM_FALLTHROUGH;
				97	default:
				98	// Skip over other characters.
				99	break;
				100	}
				101	}
				102	}
				103
				104	/// Lex a bare identifier or keyword that starts with a letter.
				105	///
				106	/// bare-id ::= letter (letter\|digit)*
				107	///
				108	Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
				109	// Match the rest of the identifier regex: [0-9a-zA-Z]*
				110	while (isalpha(curPtr) \|\| isdigit(curPtr))
				111	++curPtr;
				112
				113	// Check to see if this identifier is a keyword.
				114	StringRef spelling(tokStart, curPtr-tokStart);
				115
				116	Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
				117	.Case("cfgfunc", Token::kw_cfgfunc)
				118	.Case("extfunc", Token::kw_extfunc)
				119	.Case("mlfunc", Token::kw_mlfunc)
				120	.Default(Token::bare_identifier);
				121
				122	return Token(kind, spelling);
				123	}
				124
				125	/// Lex an '@foo' identifier.
				126	///
				127	/// function-id ::= `@` bare-id
				128	///
				129	Token Lexer::lexAtIdentifier(const char *tokStart) {
				130	// These always start with a letter.
				131	if (!isalpha(*curPtr++))
				132	return emitError(curPtr-1, "expected letter in @ identifier");
				133
				134	while (isalpha(curPtr) \|\| isdigit(curPtr))
				135	++curPtr;
				136	return formToken(Token::at_identifier, tokStart);
				137	}