Refactor information about tokens out into a new TokenKinds.def file. Use this to share code a bit more, and fixes a diagnostic bug Uday pointed out where parseCommaSeparatedList would print the wrong diagnostic when the end signifier was not a ). PiperOrigin-RevId: 202676858

commit: 8da0c28d35fe3d613b5b10389ece5214160c2db0 [log] [tgz]
author: Chris Lattner <clattner@google.com> Fri Jun 29 11:15:56 2018 -0700
committer: jpienaar <jpienaar@google.com> Fri Mar 29 12:27:07 2019 -0700
tree: dfa87b64c429ba43e1c88b2fa2ca8facd91c3d2d
parent: ed65a73a07f09ba344f53c5f7719c39c315c010d [diff]
diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp
index b6473f5..e3e5e9e 100644
--- a/lib/Parser/Lexer.cpp
+++ b/lib/Parser/Lexer.cpp

@@ -144,25 +144,10 @@
   // Check to see if this identifier is a keyword.
   StringRef spelling(tokStart, curPtr-tokStart);
 
-  Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
-    .Case("bf16", Token::kw_bf16)
-    .Case("br", Token::kw_br)
-    .Case("cfgfunc", Token::kw_cfgfunc)
-    .Case("extfunc", Token::kw_extfunc)
-    .Case("f16", Token::kw_f16)
-    .Case("f32", Token::kw_f32)
-    .Case("f64", Token::kw_f64)
-    .Case("i1", Token::kw_i1)
-    .Case("i16", Token::kw_i16)
-    .Case("i32", Token::kw_i32)
-    .Case("i64", Token::kw_i64)
-    .Case("i8", Token::kw_i8)
-    .Case("int", Token::kw_int)
-    .Case("memref", Token::kw_memref)
-    .Case("mlfunc", Token::kw_mlfunc)
-    .Case("return", Token::kw_return)
-    .Case("tensor", Token::kw_tensor)
-    .Case("vector", Token::kw_vector)
+  Token::Kind kind = llvm::StringSwitch<Token::Kind>(spelling)
+#define TOK_KEYWORD(SPELLING) \
+    .Case(#SPELLING, Token::kw_##SPELLING)
+#include "TokenKinds.def"
     .Default(Token::bare_identifier);
 
   return Token(kind, spelling);
@@ -203,7 +188,7 @@
   } else {
     return emitError(curPtr-1, "invalid affine map id");
   }
-  return formToken(Token::affine_map_id, tokStart);
+  return formToken(Token::affine_map_identifier, tokStart);
 }
 
 /// Lex an integer literal.

diff --git a/lib/Parser/Lexer.h b/lib/Parser/Lexer.h
index f0274fe..4bbd9b7 100644
--- a/lib/Parser/Lexer.h
+++ b/lib/Parser/Lexer.h

@@ -50,7 +50,7 @@
   void resetPointer(const char *newPointer) { curPtr = newPointer; }
 private:
   // Helpers.
-  Token formToken(Token::TokenKind kind, const char *tokStart) {
+  Token formToken(Token::Kind kind, const char *tokStart) {
     return Token(kind, StringRef(tokStart, curPtr-tokStart));
   }
 

diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp
index c36d3b9..c62ee5d 100644
--- a/lib/Parser/Parser.cpp
+++ b/lib/Parser/Parser.cpp

@@ -94,21 +94,21 @@
   /// Advance the current lexer onto the next token, asserting what the expected
   /// current token is.  This is preferred to the above method because it leads
   /// to more self-documenting code with better checking.
-  void consumeToken(Token::TokenKind kind) {
+  void consumeToken(Token::Kind kind) {
     assert(curToken.is(kind) && "consumed an unexpected token");
     consumeToken();
   }
 
   /// If the current token has the specified kind, consume it and return true.
   /// If not, return false.
-  bool consumeIf(Token::TokenKind kind) {
+  bool consumeIf(Token::Kind kind) {
     if (curToken.isNot(kind))
       return false;
     consumeToken(kind);
     return true;
   }
 
-  ParseResult parseCommaSeparatedList(Token::TokenKind rightToken,
+  ParseResult parseCommaSeparatedList(Token::Kind rightToken,
                                const std::function<ParseResult()> &parseElement,
                                       bool allowEmptyList = true);
 
@@ -169,7 +169,7 @@
 ///   abstract-list ::= element (',' element)* rightToken
 ///
 ParseResult Parser::
-parseCommaSeparatedList(Token::TokenKind rightToken,
+parseCommaSeparatedList(Token::Kind rightToken,
                         const std::function<ParseResult()> &parseElement,
                         bool allowEmptyList) {
   // Handle the empty case.
@@ -192,7 +192,8 @@
 
   // Consume the end character.
   if (!consumeIf(rightToken))
-    return emitError("expected ',' or ')'");
+    return emitError("expected ',' or '" + Token::getTokenSpelling(rightToken) +
+                     "'");
 
   return ParseSuccess;
 }
@@ -487,7 +488,7 @@
 ///  dim-size ::= affine-expr | `min` `(` affine-expr ( `,` affine-expr)+ `)`
 ///
 ParseResult Parser::parseAffineMapDef() {
-  assert(curToken.is(Token::affine_map_id));
+  assert(curToken.is(Token::affine_map_identifier));
 
   StringRef affineMapId = curToken.getSpelling().drop_front();
   // Check that 'affineMapId' is unique.
@@ -495,7 +496,7 @@
   if (affineMaps.count(affineMapId) > 0)
     return emitError("redefinition of affine map id '" + affineMapId + "'");
 
-  consumeToken(Token::affine_map_id);
+  consumeToken(Token::affine_map_identifier);
 
   // TODO(andydavis,bondhugula) Parse affine map definition.
   affineMaps[affineMapId].reset(new AffineMap(1, 0));
@@ -829,7 +830,7 @@
     case Token::kw_cfgfunc:
       if (parseCFGFunc()) return nullptr;
       break;
-    case Token::affine_map_id:
+    case Token::affine_map_identifier:
       if (parseAffineMapDef()) return nullptr;
       break;
 

diff --git a/lib/Parser/Token.cpp b/lib/Parser/Token.cpp
index a8affc7..ca88b06 100644
--- a/lib/Parser/Token.cpp
+++ b/lib/Parser/Token.cpp

@@ -56,3 +56,16 @@
   // Just drop the quotes off for now.
   return getSpelling().drop_front().drop_back().str();
 }
+
+
+/// Given a punctuation or keyword token kind, return the spelling of the
+/// token as a string.  Warning: This will abort on markers, identifiers and
+/// literal tokens since they have no fixed spelling.
+StringRef Token::getTokenSpelling(Kind kind) {
+   switch (kind) {
+   default: assert(0 && "This token kind has no fixed spelling");
+#define TOK_PUNCTUATION(NAME, SPELLING) case NAME: return SPELLING;
+#define TOK_KEYWORD(SPELLING) case kw_##SPELLING: return #SPELLING;
+#include "TokenKinds.def"
+   }
+}

diff --git a/lib/Parser/Token.h b/lib/Parser/Token.h
index 15ce015..9c4d4f9 100644
--- a/lib/Parser/Token.h
+++ b/lib/Parser/Token.h

@@ -27,78 +27,42 @@
 /// This represents a token in the MLIR syntax.
 class Token {
 public:
-  enum TokenKind {
-    // Markers
-    eof, error,
-
-    // Identifiers.
-    bare_identifier,    // foo
-    at_identifier,      // @foo
-    affine_map_id,      // #foo
-    // TODO: @@foo, etc.
-
-    integer,            // 42
-    string,             // "foo"
-
-    // Punctuation.
-    arrow,              // ->
-    colon,              // :
-    comma,              // ,
-    question,           // ?
-    questionquestion,   // ??
-    l_paren, r_paren,   // ( )
-    l_brace, r_brace,   // { }
-    less, greater,      // < >
-    // TODO: More punctuation.
-
-    // Keywords.
-    kw_bf16,
-    kw_br,
-    kw_cfgfunc,
-    kw_extfunc,
-    kw_f16,
-    kw_f32,
-    kw_f64,
-    kw_i1,
-    kw_i16,
-    kw_i32,
-    kw_i64,
-    kw_i8,
-    kw_int,
-    kw_memref,
-    kw_mlfunc,
-    kw_return,
-    kw_tensor,
-    kw_vector,
+  enum Kind {
+#define TOK_MARKER(NAME) NAME,
+#define TOK_IDENTIFIER(NAME) NAME,
+#define TOK_LITERAL(NAME) NAME,
+#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
+#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
+#include "TokenKinds.def"
   };
 
-  Token(TokenKind kind, StringRef spelling)
+  Token(Kind kind, StringRef spelling)
     : kind(kind), spelling(spelling) {}
 
   // Return the bytes that make up this token.
   StringRef getSpelling() const { return spelling; }
 
   // Token classification.
-  TokenKind getKind() const { return kind; }
-  bool is(TokenKind K) const { return kind == K; }
+  Kind getKind() const { return kind; }
+  bool is(Kind K) const { return kind == K; }
 
-  bool isAny(TokenKind k1, TokenKind k2) const {
+  bool isAny(Kind k1, Kind k2) const {
     return is(k1) || is(k2);
   }
 
   /// Return true if this token is one of the specified kinds.
   template <typename ...T>
-  bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const {
+  bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
     if (is(k1))
       return true;
     return isAny(k2, k3, others...);
   }
 
-  bool isNot(TokenKind k) const { return kind != k; }
+  bool isNot(Kind k) const { return kind != k; }
 
   /// Return true if this token isn't one of the specified kinds.
   template <typename ...T>
-  bool isNot(TokenKind k1, TokenKind k2, T... others) const {
+  bool isNot(Kind k1, Kind k2, T... others) const {
     return !isAny(k1, k2, others...);
   }
 
@@ -117,9 +81,15 @@
   llvm::SMLoc getEndLoc() const;
   llvm::SMRange getLocRange() const;
 
+
+  /// Given a punctuation or keyword token kind, return the spelling of the
+  /// token as a string.  Warning: This will abort on markers, identifiers and
+  /// literal tokens since they have no fixed spelling.
+  static StringRef getTokenSpelling(Kind kind);
+
 private:
   /// Discriminator that indicates the sort of token this is.
-  TokenKind kind;
+  Kind kind;
 
   /// A reference to the entire token contents; this is always a pointer into
   /// a memory buffer owned by the source manager.

diff --git a/lib/Parser/TokenKinds.def b/lib/Parser/TokenKinds.def
new file mode 100644
index 0000000..7eae470
--- /dev/null
+++ b/lib/Parser/TokenKinds.def

@@ -0,0 +1,97 @@
+//===- TokenKinds.def - MLIR Token Description ------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file is intended to be #include'd multiple times to extract information
+// about tokens for various clients in the lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && !defined(TOK_LITERAL)&&\
+    !defined(TOK_PUNCTUATION) && !defined(TOK_KEYWORD)
+#  error Must define one of the TOK_ macros.
+#endif
+
+#ifndef TOK_MARKER
+#define TOK_MARKER(X)
+#endif
+#ifndef TOK_IDENTIFIER
+#define TOK_IDENTIFIER(NAME)
+#endif
+#ifndef TOK_LITERAL
+#define TOK_LITERAL(NAME)
+#endif
+#ifndef TOK_PUNCTUATION
+#define TOK_PUNCTUATION(NAME, SPELLING)
+#endif
+#ifndef TOK_KEYWORD
+#define TOK_KEYWORD(SPELLING)
+#endif
+
+
+// Markers
+TOK_MARKER(eof)
+TOK_MARKER(error)
+
+// Identifiers.
+TOK_IDENTIFIER(bare_identifier)         // foo
+TOK_IDENTIFIER(at_identifier)           // @foo
+TOK_IDENTIFIER(affine_map_identifier)   // #foo
+// TODO: @@foo, etc.
+
+// Literals
+TOK_LITERAL(integer)                    // 42
+TOK_LITERAL(string)                     // "foo"
+
+// Punctuation.
+TOK_PUNCTUATION(arrow,            "->")
+TOK_PUNCTUATION(colon,            ":")
+TOK_PUNCTUATION(comma,            ",")
+TOK_PUNCTUATION(question,         "?")
+TOK_PUNCTUATION(questionquestion, "??")
+TOK_PUNCTUATION(l_paren,          "(")
+TOK_PUNCTUATION(r_paren,          ")")
+TOK_PUNCTUATION(l_brace,          "{")
+TOK_PUNCTUATION(r_brace,          "}")
+TOK_PUNCTUATION(less,             "<")
+TOK_PUNCTUATION(greater,          ">")
+// TODO: More punctuation.
+
+// Keywords.  These turn "foo" into Token::kw_foo enums.
+TOK_KEYWORD(bf16)
+TOK_KEYWORD(br)
+TOK_KEYWORD(cfgfunc)
+TOK_KEYWORD(extfunc)
+TOK_KEYWORD(f16)
+TOK_KEYWORD(f32)
+TOK_KEYWORD(f64)
+TOK_KEYWORD(i1)
+TOK_KEYWORD(i16)
+TOK_KEYWORD(i32)
+TOK_KEYWORD(i64)
+TOK_KEYWORD(i8)
+TOK_KEYWORD(int)
+TOK_KEYWORD(memref)
+TOK_KEYWORD(mlfunc)
+TOK_KEYWORD(return)
+TOK_KEYWORD(tensor)
+TOK_KEYWORD(vector)
+
+#undef TOK_MARKER
+#undef TOK_IDENTIFIER
+#undef TOK_LITERAL
+#undef TOK_PUNCTUATION
+#undef TOK_KEYWORD
commit	8da0c28d35fe3d613b5b10389ece5214160c2db0	[log] [tgz]
author	Chris Lattner <clattner@google.com>	Fri Jun 29 11:15:56 2018 -0700
committer	jpienaar <jpienaar@google.com>	Fri Mar 29 12:27:07 2019 -0700
tree	dfa87b64c429ba43e1c88b2fa2ca8facd91c3d2d
parent	ed65a73a07f09ba344f53c5f7719c39c315c010d [diff]