Refactor information about tokens out into a new TokenKinds.def file. Use this
to share code a bit more, and fixes a diagnostic bug Uday pointed out where
parseCommaSeparatedList would print the wrong diagnostic when the end signifier
was not a ).
PiperOrigin-RevId: 202676858
diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp
index b6473f5..e3e5e9e 100644
--- a/lib/Parser/Lexer.cpp
+++ b/lib/Parser/Lexer.cpp
@@ -144,25 +144,10 @@
// Check to see if this identifier is a keyword.
StringRef spelling(tokStart, curPtr-tokStart);
- Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
- .Case("bf16", Token::kw_bf16)
- .Case("br", Token::kw_br)
- .Case("cfgfunc", Token::kw_cfgfunc)
- .Case("extfunc", Token::kw_extfunc)
- .Case("f16", Token::kw_f16)
- .Case("f32", Token::kw_f32)
- .Case("f64", Token::kw_f64)
- .Case("i1", Token::kw_i1)
- .Case("i16", Token::kw_i16)
- .Case("i32", Token::kw_i32)
- .Case("i64", Token::kw_i64)
- .Case("i8", Token::kw_i8)
- .Case("int", Token::kw_int)
- .Case("memref", Token::kw_memref)
- .Case("mlfunc", Token::kw_mlfunc)
- .Case("return", Token::kw_return)
- .Case("tensor", Token::kw_tensor)
- .Case("vector", Token::kw_vector)
+ Token::Kind kind = llvm::StringSwitch<Token::Kind>(spelling)
+#define TOK_KEYWORD(SPELLING) \
+ .Case(#SPELLING, Token::kw_##SPELLING)
+#include "TokenKinds.def"
.Default(Token::bare_identifier);
return Token(kind, spelling);
@@ -203,7 +188,7 @@
} else {
return emitError(curPtr-1, "invalid affine map id");
}
- return formToken(Token::affine_map_id, tokStart);
+ return formToken(Token::affine_map_identifier, tokStart);
}
/// Lex an integer literal.
diff --git a/lib/Parser/Lexer.h b/lib/Parser/Lexer.h
index f0274fe..4bbd9b7 100644
--- a/lib/Parser/Lexer.h
+++ b/lib/Parser/Lexer.h
@@ -50,7 +50,7 @@
void resetPointer(const char *newPointer) { curPtr = newPointer; }
private:
// Helpers.
- Token formToken(Token::TokenKind kind, const char *tokStart) {
+ Token formToken(Token::Kind kind, const char *tokStart) {
return Token(kind, StringRef(tokStart, curPtr-tokStart));
}
diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp
index c36d3b9..c62ee5d 100644
--- a/lib/Parser/Parser.cpp
+++ b/lib/Parser/Parser.cpp
@@ -94,21 +94,21 @@
/// Advance the current lexer onto the next token, asserting what the expected
/// current token is. This is preferred to the above method because it leads
/// to more self-documenting code with better checking.
- void consumeToken(Token::TokenKind kind) {
+ void consumeToken(Token::Kind kind) {
assert(curToken.is(kind) && "consumed an unexpected token");
consumeToken();
}
/// If the current token has the specified kind, consume it and return true.
/// If not, return false.
- bool consumeIf(Token::TokenKind kind) {
+ bool consumeIf(Token::Kind kind) {
if (curToken.isNot(kind))
return false;
consumeToken(kind);
return true;
}
- ParseResult parseCommaSeparatedList(Token::TokenKind rightToken,
+ ParseResult parseCommaSeparatedList(Token::Kind rightToken,
const std::function<ParseResult()> &parseElement,
bool allowEmptyList = true);
@@ -169,7 +169,7 @@
/// abstract-list ::= element (',' element)* rightToken
///
ParseResult Parser::
-parseCommaSeparatedList(Token::TokenKind rightToken,
+parseCommaSeparatedList(Token::Kind rightToken,
const std::function<ParseResult()> &parseElement,
bool allowEmptyList) {
// Handle the empty case.
@@ -192,7 +192,8 @@
// Consume the end character.
if (!consumeIf(rightToken))
- return emitError("expected ',' or ')'");
+ return emitError("expected ',' or '" + Token::getTokenSpelling(rightToken) +
+ "'");
return ParseSuccess;
}
@@ -487,7 +488,7 @@
/// dim-size ::= affine-expr | `min` `(` affine-expr ( `,` affine-expr)+ `)`
///
ParseResult Parser::parseAffineMapDef() {
- assert(curToken.is(Token::affine_map_id));
+ assert(curToken.is(Token::affine_map_identifier));
StringRef affineMapId = curToken.getSpelling().drop_front();
// Check that 'affineMapId' is unique.
@@ -495,7 +496,7 @@
if (affineMaps.count(affineMapId) > 0)
return emitError("redefinition of affine map id '" + affineMapId + "'");
- consumeToken(Token::affine_map_id);
+ consumeToken(Token::affine_map_identifier);
// TODO(andydavis,bondhugula) Parse affine map definition.
affineMaps[affineMapId].reset(new AffineMap(1, 0));
@@ -829,7 +830,7 @@
case Token::kw_cfgfunc:
if (parseCFGFunc()) return nullptr;
break;
- case Token::affine_map_id:
+ case Token::affine_map_identifier:
if (parseAffineMapDef()) return nullptr;
break;
diff --git a/lib/Parser/Token.cpp b/lib/Parser/Token.cpp
index a8affc7..ca88b06 100644
--- a/lib/Parser/Token.cpp
+++ b/lib/Parser/Token.cpp
@@ -56,3 +56,16 @@
// Just drop the quotes off for now.
return getSpelling().drop_front().drop_back().str();
}
+
+
+/// Given a punctuation or keyword token kind, return the spelling of the
+/// token as a string. Warning: This will abort on markers, identifiers and
+/// literal tokens since they have no fixed spelling.
+StringRef Token::getTokenSpelling(Kind kind) {
+ switch (kind) {
+ default: assert(0 && "This token kind has no fixed spelling");
+#define TOK_PUNCTUATION(NAME, SPELLING) case NAME: return SPELLING;
+#define TOK_KEYWORD(SPELLING) case kw_##SPELLING: return #SPELLING;
+#include "TokenKinds.def"
+ }
+}
diff --git a/lib/Parser/Token.h b/lib/Parser/Token.h
index 15ce015..9c4d4f9 100644
--- a/lib/Parser/Token.h
+++ b/lib/Parser/Token.h
@@ -27,78 +27,42 @@
/// This represents a token in the MLIR syntax.
class Token {
public:
- enum TokenKind {
- // Markers
- eof, error,
-
- // Identifiers.
- bare_identifier, // foo
- at_identifier, // @foo
- affine_map_id, // #foo
- // TODO: @@foo, etc.
-
- integer, // 42
- string, // "foo"
-
- // Punctuation.
- arrow, // ->
- colon, // :
- comma, // ,
- question, // ?
- questionquestion, // ??
- l_paren, r_paren, // ( )
- l_brace, r_brace, // { }
- less, greater, // < >
- // TODO: More punctuation.
-
- // Keywords.
- kw_bf16,
- kw_br,
- kw_cfgfunc,
- kw_extfunc,
- kw_f16,
- kw_f32,
- kw_f64,
- kw_i1,
- kw_i16,
- kw_i32,
- kw_i64,
- kw_i8,
- kw_int,
- kw_memref,
- kw_mlfunc,
- kw_return,
- kw_tensor,
- kw_vector,
+ enum Kind {
+#define TOK_MARKER(NAME) NAME,
+#define TOK_IDENTIFIER(NAME) NAME,
+#define TOK_LITERAL(NAME) NAME,
+#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
+#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
+#include "TokenKinds.def"
};
- Token(TokenKind kind, StringRef spelling)
+ Token(Kind kind, StringRef spelling)
: kind(kind), spelling(spelling) {}
// Return the bytes that make up this token.
StringRef getSpelling() const { return spelling; }
// Token classification.
- TokenKind getKind() const { return kind; }
- bool is(TokenKind K) const { return kind == K; }
+ Kind getKind() const { return kind; }
+ bool is(Kind K) const { return kind == K; }
- bool isAny(TokenKind k1, TokenKind k2) const {
+ bool isAny(Kind k1, Kind k2) const {
return is(k1) || is(k2);
}
/// Return true if this token is one of the specified kinds.
template <typename ...T>
- bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const {
+ bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
if (is(k1))
return true;
return isAny(k2, k3, others...);
}
- bool isNot(TokenKind k) const { return kind != k; }
+ bool isNot(Kind k) const { return kind != k; }
/// Return true if this token isn't one of the specified kinds.
template <typename ...T>
- bool isNot(TokenKind k1, TokenKind k2, T... others) const {
+ bool isNot(Kind k1, Kind k2, T... others) const {
return !isAny(k1, k2, others...);
}
@@ -117,9 +81,15 @@
llvm::SMLoc getEndLoc() const;
llvm::SMRange getLocRange() const;
+
+ /// Given a punctuation or keyword token kind, return the spelling of the
+ /// token as a string. Warning: This will abort on markers, identifiers and
+ /// literal tokens since they have no fixed spelling.
+ static StringRef getTokenSpelling(Kind kind);
+
private:
/// Discriminator that indicates the sort of token this is.
- TokenKind kind;
+ Kind kind;
/// A reference to the entire token contents; this is always a pointer into
/// a memory buffer owned by the source manager.
diff --git a/lib/Parser/TokenKinds.def b/lib/Parser/TokenKinds.def
new file mode 100644
index 0000000..7eae470
--- /dev/null
+++ b/lib/Parser/TokenKinds.def
@@ -0,0 +1,97 @@
+//===- TokenKinds.def - MLIR Token Description ------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file is intended to be #include'd multiple times to extract information
+// about tokens for various clients in the lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && !defined(TOK_LITERAL)&&\
+ !defined(TOK_PUNCTUATION) && !defined(TOK_KEYWORD)
+# error Must define one of the TOK_ macros.
+#endif
+
+#ifndef TOK_MARKER
+#define TOK_MARKER(X)
+#endif
+#ifndef TOK_IDENTIFIER
+#define TOK_IDENTIFIER(NAME)
+#endif
+#ifndef TOK_LITERAL
+#define TOK_LITERAL(NAME)
+#endif
+#ifndef TOK_PUNCTUATION
+#define TOK_PUNCTUATION(NAME, SPELLING)
+#endif
+#ifndef TOK_KEYWORD
+#define TOK_KEYWORD(SPELLING)
+#endif
+
+
+// Markers
+TOK_MARKER(eof)
+TOK_MARKER(error)
+
+// Identifiers.
+TOK_IDENTIFIER(bare_identifier) // foo
+TOK_IDENTIFIER(at_identifier) // @foo
+TOK_IDENTIFIER(affine_map_identifier) // #foo
+// TODO: @@foo, etc.
+
+// Literals
+TOK_LITERAL(integer) // 42
+TOK_LITERAL(string) // "foo"
+
+// Punctuation.
+TOK_PUNCTUATION(arrow, "->")
+TOK_PUNCTUATION(colon, ":")
+TOK_PUNCTUATION(comma, ",")
+TOK_PUNCTUATION(question, "?")
+TOK_PUNCTUATION(questionquestion, "??")
+TOK_PUNCTUATION(l_paren, "(")
+TOK_PUNCTUATION(r_paren, ")")
+TOK_PUNCTUATION(l_brace, "{")
+TOK_PUNCTUATION(r_brace, "}")
+TOK_PUNCTUATION(less, "<")
+TOK_PUNCTUATION(greater, ">")
+// TODO: More punctuation.
+
+// Keywords. These turn "foo" into Token::kw_foo enums.
+TOK_KEYWORD(bf16)
+TOK_KEYWORD(br)
+TOK_KEYWORD(cfgfunc)
+TOK_KEYWORD(extfunc)
+TOK_KEYWORD(f16)
+TOK_KEYWORD(f32)
+TOK_KEYWORD(f64)
+TOK_KEYWORD(i1)
+TOK_KEYWORD(i16)
+TOK_KEYWORD(i32)
+TOK_KEYWORD(i64)
+TOK_KEYWORD(i8)
+TOK_KEYWORD(int)
+TOK_KEYWORD(memref)
+TOK_KEYWORD(mlfunc)
+TOK_KEYWORD(return)
+TOK_KEYWORD(tensor)
+TOK_KEYWORD(vector)
+
+#undef TOK_MARKER
+#undef TOK_IDENTIFIER
+#undef TOK_LITERAL
+#undef TOK_PUNCTUATION
+#undef TOK_KEYWORD