[MC] Defer asm errors to post-statement failure

Allow errors to be deferred and emitted as part of clean up to simplify
and shorten Assembly parser code. This will allow error messages to be
emitted in helper functions and be modified by the caller which has
better context.

As part of this many minor cleanups to the Parser:

* Unify parser cleanup on error
* Add Workaround for incorrect return values in ParseDirective instances
* Tighten checks on error-signifying return values for parser functions
  and fix in-tree TargetParsers to be more consistent with the changes.
* Fix AArch64 test cases checking for spurious error messages that are
  now fixed.

These changes should be backwards compatible with current Target Parsers
so long as the error status are correctly returned in appropriate
functions.

Reviewers: rnk, majnemer

Subscribers: aemerson, jyknight, llvm-commits

Differential Revision: https://reviews.llvm.org/D24047

llvm-svn: 281249
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index a75818e..6c11a82 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -178,9 +178,6 @@
   /// \brief Keeps track of how many .macro's have been instantiated.
   unsigned NumOfMacroInstantiations;
 
-  /// Flag tracking whether any errors have been encountered.
-  unsigned HadError : 1;
-
   /// The values from the last parsed cpp hash file line comment if any.
   struct CppHashInfoTy {
     StringRef Filename;
@@ -247,12 +244,9 @@
     AssemblerDialect = i;
   }
 
-  void Note(SMLoc L, const Twine &Msg,
-            ArrayRef<SMRange> Ranges = None) override;
-  bool Warning(SMLoc L, const Twine &Msg,
-               ArrayRef<SMRange> Ranges = None) override;
-  bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = None) override;
+  void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
+  bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
+  bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
 
   const AsmToken &Lex() override;
 
@@ -337,7 +331,8 @@
 
   void printMacroInstantiations();
   void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
-                    ArrayRef<SMRange> Ranges = None) const {
+                    SMRange Range = None) const {
+    ArrayRef<SMRange> Ranges(Range);
     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
@@ -565,8 +560,8 @@
                      const MCAsmInfo &MAI)
     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
       PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
-      MacrosEnabledFlag(true), HadError(false), CppHashInfo(),
-      AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
+      MacrosEnabledFlag(true), CppHashInfo(), AssemblerDialect(~0U),
+      IsDarwin(false), ParsingInlineAsm(false) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -609,24 +604,25 @@
                  "while in macro instantiation");
 }
 
-void AsmParser::Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
-  printMessage(L, SourceMgr::DK_Note, Msg, Ranges);
+void AsmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
+  printPendingErrors();
+  printMessage(L, SourceMgr::DK_Note, Msg, Range);
   printMacroInstantiations();
 }
 
-bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+bool AsmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
   if(getTargetParser().getTargetOptions().MCNoWarn)
     return false;
   if (getTargetParser().getTargetOptions().MCFatalWarnings)
-    return Error(L, Msg, Ranges);
-  printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
+    return Error(L, Msg, Range);
+  printMessage(L, SourceMgr::DK_Warning, Msg, Range);
   printMacroInstantiations();
   return false;
 }
 
-bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+bool AsmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
   HadError = true;
-  printMessage(L, SourceMgr::DK_Error, Msg, Ranges);
+  printMessage(L, SourceMgr::DK_Error, Msg, Range);
   printMacroInstantiations();
   return true;
 }
@@ -731,32 +727,38 @@
     if (!parseStatement(Info, nullptr))
       continue;
 
-    // If we've failed, but on a Error Token, but did not consume it in
-    // favor of a better message, emit it now.
-    if (Lexer.getTok().is(AsmToken::Error)) {
+    // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
+    // for printing ErrMsg via Lex() only if no (presumably better) parser error
+    // exists.
+    if (!hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
       Lex();
     }
 
-    // We had an error, validate that one was emitted and recover by skipping to
-    // the next line.
-    assert(HadError && "Parse statement returned an error, but none emitted!");
-    eatToEndOfStatement();
+    // parseStatement returned true so may need to emit an error.
+    printPendingErrors();
+
+    // Skipping to the next line if needed.
+    if (!getLexer().isAtStartOfStatement())
+      eatToEndOfStatement();
   }
 
+  // All errors should have been emitted.
+  assert(!hasPendingError() && "unexpected error from parseStatement");
+
   getTargetParser().flushPendingInstructions(getStreamer());
 
   if (TheCondState.TheCond != StartingCondState.TheCond ||
       TheCondState.Ignore != StartingCondState.Ignore)
-    return TokError("unmatched .ifs or .elses");
-
+    printError(getTok().getLoc(), "unmatched .ifs or .elses");
   // Check to see there are no empty DwarfFile slots.
   const auto &LineTables = getContext().getMCDwarfLineTables();
   if (!LineTables.empty()) {
     unsigned Index = 0;
     for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
       if (File.Name.empty() && Index != 0)
-        TokError("unassigned file number: " + Twine(Index) +
-                 " for .file directives");
+        printError(getTok().getLoc(), "unassigned file number: " +
+                                          Twine(Index) +
+                                          " for .file directives");
       ++Index;
     }
   }
@@ -776,9 +778,8 @@
           // FIXME: We would really like to refer back to where the symbol was
           // first referenced for a source location. We need to add something
           // to track that. Currently, we just point to the end of the file.
-          HadError |=
-              Error(getTok().getLoc(), "assembler local symbol '" +
-                                           Sym->getName() + "' not defined");
+          printError(getTok().getLoc(), "assembler local symbol '" +
+                                            Sym->getName() + "' not defined");
       }
     }
 
@@ -789,7 +790,7 @@
         // Reset the state of any "# line file" directives we've seen to the
         // context as it was at the diagnostic site.
         CppHashInfo = std::get<1>(LocSym);
-        HadError |= Error(std::get<0>(LocSym), "directional label undefined");
+        printError(std::get<0>(LocSym), "directional label undefined");
       }
     }
   }
@@ -804,7 +805,8 @@
 
 void AsmParser::checkForValidSection() {
   if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
-    TokError("expected section directive before assembly directive");
+    printError(getTok().getLoc(),
+               "expected section directive before assembly directive");
     Out.InitSections(false);
   }
 }
@@ -1435,6 +1437,7 @@
 ///   ::= Label* Identifier OperandList* EndOfStatement
 bool AsmParser::parseStatement(ParseStatementInfo &Info,
                                MCAsmParserSemaCallback *SI) {
+  assert(!hasPendingError() && "parseStatement started with pending error");
   // Eat initial spaces and comments
   while (Lexer.is(AsmToken::Space))
     Lex();
@@ -1467,15 +1470,19 @@
   if (Lexer.is(AsmToken::Integer)) {
     LocalLabelVal = getTok().getIntVal();
     if (LocalLabelVal < 0) {
-      if (!TheCondState.Ignore)
-        return TokError("unexpected token at start of statement");
+      if (!TheCondState.Ignore) {
+        Lex(); // always eat a token
+        return Error(IDLoc, "unexpected token at start of statement");
+      }
       IDVal = "";
     } else {
       IDVal = getTok().getString();
       Lex(); // Consume the integer token to be used as an identifier token.
       if (Lexer.getKind() != AsmToken::Colon) {
-        if (!TheCondState.Ignore)
-          return TokError("unexpected token at start of statement");
+        if (!TheCondState.Ignore) {
+          Lex(); // always eat a token
+          return Error(IDLoc, "unexpected token at start of statement");
+        }
       }
     }
   } else if (Lexer.is(AsmToken::Dot)) {
@@ -1492,8 +1499,10 @@
     Lex();
     IDVal = "}";
   } else if (parseIdentifier(IDVal)) {
-    if (!TheCondState.Ignore)
-      return TokError("unexpected token at start of statement");
+    if (!TheCondState.Ignore) {
+      Lex(); // always eat a token
+      return Error(IDLoc, "unexpected token at start of statement");
+    }
     IDVal = "";
   }
 
@@ -1655,9 +1664,20 @@
 
     getTargetParser().flushPendingInstructions(getStreamer());
 
-    // First query the target-specific parser. It will return 'true' if it
-    // isn't interested in this directive.
-    if (!getTargetParser().ParseDirective(ID))
+    SMLoc StartTokLoc = getTok().getLoc();
+    bool TPDirectiveReturn = getTargetParser().ParseDirective(ID);
+
+    if (hasPendingError())
+      return true;
+    // Currently the return value should be true if we are
+    // uninterested but as this is at odds with the standard parsing
+    // convention (return true = error) we have instances of a parsed
+    // directive that fails returning true as an error. Catch these
+    // cases as best as possible errors here.
+    if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
+      return true;
+    // Return if we did some parsing or believe we succeeded.
+    if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
       return false;
 
     // Next, check the extension directive map to see if any extension has
@@ -1912,9 +1932,9 @@
   // Canonicalize the opcode to lower case.
   std::string OpcodeStr = IDVal.lower();
   ParseInstructionInfo IInfo(Info.AsmRewrites);
-  bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
-                                                     Info.ParsedOperands);
-  Info.ParseError = HadError;
+  bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
+                                                          Info.ParsedOperands);
+  Info.ParseError = ParseHadError;
 
   // Dump the parsed representation, if requested.
   if (getShowParsedOperands()) {
@@ -1931,9 +1951,13 @@
     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
   }
 
+  // Fail even if ParseInstruction erroneously returns false.
+  if (hasPendingError() || ParseHadError)
+    return true;
+
   // If we are generating dwarf for the current section then generate a .loc
   // directive for the instruction.
-  if (!HadError && getContext().getGenDwarfForAssembly() &&
+  if (!ParseHadError && getContext().getGenDwarfForAssembly() &&
       getContext().getGenDwarfSectionSyms().count(
           getStreamer().getCurrentSection().first)) {
     unsigned Line;
@@ -1975,15 +1999,13 @@
   }
 
   // If parsing succeeded, match the instruction.
-  if (!HadError) {
+  if (!ParseHadError) {
     uint64_t ErrorInfo;
-    getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
-                                              Info.ParsedOperands, Out,
-                                              ErrorInfo, ParsingInlineAsm);
+    if (getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
+                                                  Info.ParsedOperands, Out,
+                                                  ErrorInfo, ParsingInlineAsm))
+      return true;
   }
-
-  // Don't skip the rest of the line, the instruction parser is responsible for
-  // that.
   return false;
 }
 
@@ -2019,6 +2041,7 @@
          "Lexing Cpp line comment: Expected String");
   StringRef Filename = getTok().getString();
   Lex();
+
   // Get rid of the enclosing quotes.
   Filename = Filename.substr(1, Filename.size() - 2);
 
@@ -2353,27 +2376,19 @@
     MCAsmMacroParameter FA;
 
     if (Lexer.is(AsmToken::Identifier) && Lexer.peekTok().is(AsmToken::Equal)) {
-      if (parseIdentifier(FA.Name)) {
-        Error(IDLoc, "invalid argument identifier for formal argument");
-        eatToEndOfStatement();
-        return true;
-      }
+      if (parseIdentifier(FA.Name))
+        return Error(IDLoc, "invalid argument identifier for formal argument");
 
-      if (Lexer.isNot(AsmToken::Equal)) {
-        TokError("expected '=' after formal parameter identifier");
-        eatToEndOfStatement();
-        return true;
-      }
+      if (Lexer.isNot(AsmToken::Equal))
+        return TokError("expected '=' after formal parameter identifier");
+
       Lex();
 
       NamedParametersFound = true;
     }
 
-    if (NamedParametersFound && FA.Name.empty()) {
-      Error(IDLoc, "cannot mix positional and keyword arguments");
-      eatToEndOfStatement();
-      return true;
-    }
+    if (NamedParametersFound && FA.Name.empty())
+      return Error(IDLoc, "cannot mix positional and keyword arguments");
 
     bool Vararg = HasVararg && Parameter == (NParameters - 1);
     if (parseMacroArgument(FA.Value, Vararg))
@@ -2388,10 +2403,8 @@
 
       if (FAI >= NParameters) {
         assert(M && "expected macro to be defined");
-        Error(IDLoc,
-              "parameter named '" + FA.Name + "' does not exist for macro '" +
-              M->Name + "'");
-        return true;
+        return Error(IDLoc, "parameter named '" + FA.Name +
+                                "' does not exist for macro '" + M->Name + "'");
       }
       PI = FAI;
     }
@@ -2992,11 +3005,14 @@
   if (!HasFillExpr)
     FillExpr = 0;
 
+  // Always emit an alignment here even if we thrown an error.
+  bool ReturnVal = false;
+
   // Compute alignment in bytes.
   if (IsPow2) {
     // FIXME: Diagnose overflow.
     if (Alignment >= 32) {
-      Error(AlignmentLoc, "invalid alignment value");
+      ReturnVal |= Error(AlignmentLoc, "invalid alignment value");
       Alignment = 31;
     }
 
@@ -3008,13 +3024,14 @@
     if (Alignment == 0)
       Alignment = 1;
     if (!isPowerOf2_64(Alignment))
-      Error(AlignmentLoc, "alignment must be a power of 2");
+      ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
   }
 
   // Diagnose non-sensical max bytes to align.
   if (MaxBytesLoc.isValid()) {
     if (MaxBytesToFill < 1) {
-      Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+      ReturnVal |= Error(MaxBytesLoc,
+                         "alignment directive can never be satisfied in this "
                          "many bytes, ignoring maximum bytes expression");
       MaxBytesToFill = 0;
     }
@@ -3040,7 +3057,7 @@
                                        MaxBytesToFill);
   }
 
-  return false;
+  return ReturnVal;
 }
 
 /// parseDirectiveFile
@@ -3094,7 +3111,7 @@
       getContext().setGenDwarfForAssembly(false);
     else if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename) ==
         0)
-      Error(FileNumberLoc, "file number already allocated");
+      return Error(FileNumberLoc, "file number already allocated");
   }
 
   return false;
@@ -3346,7 +3363,7 @@
 
   if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
                                                  IALine, IACol, FunctionIdLoc))
-    Error(FunctionIdLoc, "function id already allocated");
+    return Error(FunctionIdLoc, "function id already allocated");
 
   return false;
 }
@@ -4340,9 +4357,9 @@
     return true;
 
   if (Str.empty())
-    Error(Loc, ".abort detected. Assembly stopping.");
+    return Error(Loc, ".abort detected. Assembly stopping.");
   else
-    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+    return Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
   // FIXME: Actually abort assembly here.
 
   return false;
@@ -4487,11 +4504,8 @@
 bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual) {
   if (Lexer.isNot(AsmToken::String)) {
     if (ExpectEqual)
-      TokError("expected string parameter for '.ifeqs' directive");
-    else
-      TokError("expected string parameter for '.ifnes' directive");
-    eatToEndOfStatement();
-    return true;
+      return TokError("expected string parameter for '.ifeqs' directive");
+    return TokError("expected string parameter for '.ifnes' directive");
   }
 
   StringRef String1 = getTok().getStringContents();
@@ -4499,22 +4513,17 @@
 
   if (Lexer.isNot(AsmToken::Comma)) {
     if (ExpectEqual)
-      TokError("expected comma after first string for '.ifeqs' directive");
-    else
-      TokError("expected comma after first string for '.ifnes' directive");
-    eatToEndOfStatement();
-    return true;
+      return TokError(
+          "expected comma after first string for '.ifeqs' directive");
+    return TokError("expected comma after first string for '.ifnes' directive");
   }
 
   Lex();
 
   if (Lexer.isNot(AsmToken::String)) {
     if (ExpectEqual)
-      TokError("expected string parameter for '.ifeqs' directive");
-    else
-      TokError("expected string parameter for '.ifnes' directive");
-    eatToEndOfStatement();
-    return true;
+      return TokError("expected string parameter for '.ifeqs' directive");
+    return TokError("expected string parameter for '.ifnes' directive");
   }
 
   StringRef String2 = getTok().getStringContents();
@@ -4559,8 +4568,8 @@
 bool AsmParser::parseDirectiveElseIf(SMLoc DirectiveLoc) {
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-    Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
-                        " an .elseif");
+    return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
+                               " .if or  an .elseif");
   TheCondState.TheCond = AsmCond::ElseIfCond;
 
   bool LastIgnoreState = false;
@@ -4594,8 +4603,8 @@
 
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-    Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
-                        ".elseif");
+    return Error(DirectiveLoc, "Encountered a .else that doesn't follow "
+                               " an .if or an .elseif");
   TheCondState.TheCond = AsmCond::ElseCond;
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
@@ -4637,18 +4646,14 @@
 
   StringRef Message = ".error directive invoked in source file";
   if (Lexer.isNot(AsmToken::EndOfStatement)) {
-    if (Lexer.isNot(AsmToken::String)) {
-      TokError(".error argument must be a string");
-      eatToEndOfStatement();
-      return true;
-    }
+    if (Lexer.isNot(AsmToken::String))
+      return TokError(".error argument must be a string");
 
     Message = getTok().getStringContents();
     Lex();
   }
 
-  Error(L, Message);
-  return true;
+  return Error(L, Message);
 }
 
 /// parseDirectiveWarning
@@ -4663,18 +4668,14 @@
 
   StringRef Message = ".warning directive invoked in source file";
   if (Lexer.isNot(AsmToken::EndOfStatement)) {
-    if (Lexer.isNot(AsmToken::String)) {
-      TokError(".warning argument must be a string");
-      eatToEndOfStatement();
-      return true;
-    }
+    if (Lexer.isNot(AsmToken::String))
+      return TokError(".warning argument must be a string");
 
     Message = getTok().getStringContents();
     Lex();
   }
 
-  Warning(L, Message);
-  return false;
+  return Warning(L, Message);
 }
 
 /// parseDirectiveEndIf
@@ -4685,8 +4686,8 @@
     return true;
 
   if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
-    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
-                        ".else");
+    return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
+                               "an .if or .else");
   if (!TheCondStack.empty()) {
     TheCondState = TheCondStack.back();
     TheCondStack.pop_back();
@@ -4838,7 +4839,7 @@
   while (true) {
     // Check whether we have reached the end of the file.
     if (getLexer().is(AsmToken::Eof)) {
-      Error(DirectiveLoc, "no matching '.endr' in definition");
+      printError(DirectiveLoc, "no matching '.endr' in definition");
       return nullptr;
     }
 
@@ -4855,7 +4856,8 @@
         EndToken = getTok();
         Lex();
         if (Lexer.isNot(AsmToken::EndOfStatement)) {
-          TokError("unexpected token in '.endr' directive");
+          printError(getTok().getLoc(),
+                     "unexpected token in '.endr' directive");
           return nullptr;
         }
         break;
@@ -4905,7 +4907,6 @@
 
   int64_t Count;
   if (!CountExpr->evaluateAsAbsolute(Count)) {
-    eatToEndOfStatement();
     return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
   }
 
@@ -5108,11 +5109,16 @@
       continue;
 
     ParseStatementInfo Info(&AsmStrRewrites);
-    if (parseStatement(Info, &SI))
-      return true;
+    bool StatementErr = parseStatement(Info, &SI);
 
-    if (Info.ParseError)
+    if (StatementErr || Info.ParseError) {
+      // Emit pending errors if any exist.
+      printPendingErrors();
       return true;
+    }
+
+    // No pending error should exist here.
+    assert(!hasPendingError() && "unexpected error from parseStatement");
 
     if (Info.Opcode == ~0U)
       continue;
@@ -5339,7 +5345,6 @@
 
   if (Parser.parseExpression(Value)) {
     Parser.TokError("missing expression");
-    Parser.eatToEndOfStatement();
     return true;
   }