[MC] Fix floating-point literal lexing. This patch has three related fixes to improve float literal lexing: 1. Make AsmLexer::LexDigit handle floats without a decimal point more consistently. 2. Make AsmLexer::LexFloatLiteral print an error for floats which are apparently missing an "e". 3. Make APFloat::convertFromString use binutils-compatible exponent parsing. Together, this fixes some cases where a float would be incorrectly rejected, fixes some cases where the compiler would crash, and improves diagnostics in some cases. Patch by Brandon Jones. Differential Revision: https://reviews.llvm.org/D57321 llvm-svn: 357214

commit: 3dd72ea810dbb0c45c5815d2f43cc2b393d274a1 [log] [tgz]
author: Eli Friedman <efriedma@quicinc.com> Thu Mar 28 21:12:28 2019 +0000
committer: Eli Friedman <efriedma@quicinc.com> Thu Mar 28 21:12:28 2019 +0000
tree: fd8ab38e89d21640f9637a18c98c3e62f988e397
parent: ea626d8bdb402b2ea91483be49e870e75c76e5e0 [diff]
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index d21bc64..9155ae0 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp

@@ -61,8 +61,6 @@
   return (unsigned char)*CurPtr++;
 }
 
-/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
-///
 /// The leading integral digit sequence and dot should have already been
 /// consumed, some or all of the fractional digit sequence *can* have been
 /// consumed.
@@ -71,13 +69,16 @@
   while (isDigit(*CurPtr))
     ++CurPtr;
 
-  // Check for exponent; we intentionally accept a slighlty wider set of
-  // literals here and rely on the upstream client to reject invalid ones (e.g.,
-  // "1e+").
-  if (*CurPtr == 'e' || *CurPtr == 'E') {
+  if (*CurPtr == '-' || *CurPtr == '+')
+    return ReturnError(CurPtr, "Invalid sign in float literal");
+
+  // Check for exponent
+  if ((*CurPtr == 'e' || *CurPtr == 'E')) {
     ++CurPtr;
+
     if (*CurPtr == '-' || *CurPtr == '+')
       ++CurPtr;
+
     while (isDigit(*CurPtr))
       ++CurPtr;
   }
@@ -145,8 +146,9 @@
     // Disambiguate a .1243foo identifier from a floating literal.
     while (isDigit(*CurPtr))
       ++CurPtr;
-    if (*CurPtr == 'e' || *CurPtr == 'E' ||
-        !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
+
+    if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
+        *CurPtr == 'e' || *CurPtr == 'E')
       return LexFloatLiteral();
   }
 
@@ -326,8 +328,9 @@
     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
     bool isHex = Radix == 16;
     // Check for floating point literals.
-    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
-      ++CurPtr;
+    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
+      if (*CurPtr == '.')
+        ++CurPtr;
       return LexFloatLiteral();
     }
 

diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 3ebed5b..208950d 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp

@@ -198,7 +198,10 @@
   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
   StringRef::iterator p = begin;
 
-  assert(p != end && "Exponent has no digits");
+  // Treat no exponent as 0 to match binutils
+  if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
+    return 0;
+  }
 
   isNegative = (*p == '-');
   if (*p == '-' || *p == '+') {
commit	3dd72ea810dbb0c45c5815d2f43cc2b393d274a1	[log] [tgz]
author	Eli Friedman <efriedma@quicinc.com>	Thu Mar 28 21:12:28 2019 +0000
committer	Eli Friedman <efriedma@quicinc.com>	Thu Mar 28 21:12:28 2019 +0000
tree	fd8ab38e89d21640f9637a18c98c3e62f988e397
parent	ea626d8bdb402b2ea91483be49e870e75c76e5e0 [diff]