improve isHexaLiteral to work with escaped newlines and trigraphs,
patch by Francois Pichet!


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112602 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 98277a4..917829b 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -921,13 +921,14 @@
 }
 
 /// isHexaLiteral - Return true if Start points to a hex constant.
-/// FIXME: This isn't correct, it will mislex:
-///     0\       <- escaped newline.
-///     x1234e+1
 /// in microsoft mode (where this is supposed to be several different tokens).
-static inline bool isHexaLiteral(const char *Start, const char *End) {
-  return ((End - Start > 2) && Start[0] == '0' && 
-          (Start[1] == 'x' || Start[1] == 'X'));
+static bool isHexaLiteral(const char *Start, const LangOptions &Features) {
+  unsigned Size;
+  char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, Features);
+  if (C1 != '0')
+    return false;
+  char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, Features);
+  return (C2 == 'x' || C2 == 'X');
 }
 
 /// LexNumericConstant - Lex the remainder of a integer or floating point
@@ -947,7 +948,7 @@
   if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) {
     // If we are in Microsoft mode, don't continue if the constant is hex.
     // For example, MSVC will accept the following as 3 tokens: 0x1234567e+1
-    if (!Features.Microsoft || !isHexaLiteral(BufferPtr, CurPtr))
+    if (!Features.Microsoft || !isHexaLiteral(BufferPtr, Features))
       return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
   }
 
diff --git a/test/Lexer/ms-extensions.c b/test/Lexer/ms-extensions.c
index 5b45ab4..8b7d2e1 100644
--- a/test/Lexer/ms-extensions.c
+++ b/test/Lexer/ms-extensions.c
@@ -30,4 +30,12 @@
   int var2 = 0X1111111e+1;
   int var3 = 0xe+1;
   int var4 = 0XE+1;
+
+  int var5=    0\
+x1234e+1;
+
+  int var6=
+  /*expected-warning {{backslash and newline separated by space}} */    0\       
+x1234e+1;                      
 }
+